From b9136651976ab1f1c85797c5abb10b1d65b39277 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle <47210458+raphaelthegreat@users.noreply.github.com> Date: Mon, 2 Sep 2024 20:45:11 +0300 Subject: [PATCH] video_core: Refactor and render target swizzles --- CMakeLists.txt | 3 +- .../frontend/translate/export.cpp | 23 +- .../frontend/translate/translate.cpp | 1 - .../frontend/translate/vector_alu.cpp | 9 +- src/shader_recompiler/recompiler.cpp | 1 + src/shader_recompiler/runtime_info.h | 9 + src/shader_recompiler/specialization.h | 87 +++++++ .../renderer_vulkan/vk_graphics_pipeline.h | 1 + .../renderer_vulkan/vk_pipeline_cache.cpp | 231 +++++++++++++++++- .../renderer_vulkan/vk_pipeline_cache.h | 46 +++- .../renderer_vulkan/vk_shader_cache.cpp | 192 --------------- .../renderer_vulkan/vk_shader_cache.h | 156 ------------ 12 files changed, 392 insertions(+), 367 deletions(-) create mode 100644 src/shader_recompiler/specialization.h delete mode 100644 src/video_core/renderer_vulkan/vk_shader_cache.cpp delete mode 100644 src/video_core/renderer_vulkan/vk_shader_cache.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c40f0a8f..c192dd7bc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -420,6 +420,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h src/shader_recompiler/recompiler.cpp src/shader_recompiler/recompiler.h src/shader_recompiler/runtime_info.h + src/shader_recompiler/specialization.h src/shader_recompiler/backend/spirv/emit_spirv.cpp src/shader_recompiler/backend/spirv/emit_spirv.h src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -533,8 +534,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp src/video_core/renderer_vulkan/vk_resource_pool.h src/video_core/renderer_vulkan/vk_scheduler.cpp src/video_core/renderer_vulkan/vk_scheduler.h - src/video_core/renderer_vulkan/vk_shader_cache.cpp - src/video_core/renderer_vulkan/vk_shader_cache.h src/video_core/renderer_vulkan/vk_shader_util.cpp src/video_core/renderer_vulkan/vk_shader_util.h src/video_core/renderer_vulkan/vk_swapchain.cpp diff --git a/src/shader_recompiler/frontend/translate/export.cpp b/src/shader_recompiler/frontend/translate/export.cpp index d80de002c..821923e90 100644 --- a/src/shader_recompiler/frontend/translate/export.cpp +++ b/src/shader_recompiler/frontend/translate/export.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "shader_recompiler/frontend/translate/translate.h" +#include "shader_recompiler/specialization.h" namespace Shader::Gcn { @@ -19,12 +20,28 @@ void Translator::EmitExport(const GcnInst& inst) { IR::VectorReg(inst.src[3].code), }; + const auto swizzle = [&](u32 comp) { + if (!IR::IsMrt(attrib)) { + return comp; + } + const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0); + switch (info.mrt_swizzles[index]) { + case MrtSwizzle::Identity: + return comp; + case MrtSwizzle::Alt: + static constexpr std::array AltSwizzle = {2, 1, 0, 3}; + return AltSwizzle[comp]; + default: + UNREACHABLE(); + } + }; + const auto unpack = [&](u32 idx) { const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx])); const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)}; const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)}; - ir.SetAttribute(attrib, r, idx * 2); - ir.SetAttribute(attrib, g, idx * 2 + 1); + ir.SetAttribute(attrib, r, swizzle(idx * 2)); + ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1)); }; // Components are float16 packed into a VGPR @@ -45,7 +62,7 @@ void Translator::EmitExport(const GcnInst& inst) { continue; } const IR::F32 comp = ir.GetVectorReg(vsrc[i]); - ir.SetAttribute(attrib, comp, i); + ir.SetAttribute(attrib, comp, swizzle(i)); } } } diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index eb86310b8..8600dbd39 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -445,7 +445,6 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) { } void Translator::LogMissingOpcode(const GcnInst& inst) { - const u32 opcode = u32(inst.opcode); LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})", magic_enum::enum_name(inst.opcode), u32(inst.opcode), magic_enum::enum_name(inst.category)); diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 7fef91377..5d306b295 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -479,10 +479,11 @@ void Translator::V_ADD_F32(const GcnInst& inst) { void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) { const IR::U32 src0{GetSrc(inst.src[0])}; const IR::VectorReg dst_reg{inst.dst[0].code}; - ir.SetVectorReg( - dst_reg, - ir.FPMul(ir.ConvertUToF(32, 32, ir.ISub(ir.BitwiseAnd(src0, ir.Imm32(0xF)), ir.Imm32(8))), - ir.Imm32(1.f / 16.f))); + ASSERT(src0.IsImmediate()); + static constexpr std::array IntToFloat = { + 0.0f, 0.0625f, 0.1250f, 0.1875f, 0.2500f, 0.3125f, 0.3750f, 0.4375f, + -0.5000f, -0.4375f, -0.3750f, -0.3125f, -0.2500f, -0.1875f, -0.1250f, -0.0625f}; + ir.SetVectorReg(dst_reg, ir.Imm32(IntToFloat[src0.U32()])); } void Translator::V_MED3_F32(const GcnInst& inst) { diff --git a/src/shader_recompiler/recompiler.cpp b/src/shader_recompiler/recompiler.cpp index dfcf9ed1b..bde93ec1b 100644 --- a/src/shader_recompiler/recompiler.cpp +++ b/src/shader_recompiler/recompiler.cpp @@ -6,6 +6,7 @@ #include "shader_recompiler/frontend/structured_control_flow.h" #include "shader_recompiler/ir/passes/ir_passes.h" #include "shader_recompiler/ir/post_order.h" +#include "shader_recompiler/recompiler.h" namespace Shader { diff --git a/src/shader_recompiler/runtime_info.h b/src/shader_recompiler/runtime_info.h index 77c57e947..0c0985aa9 100644 --- a/src/shader_recompiler/runtime_info.h +++ b/src/shader_recompiler/runtime_info.h @@ -16,6 +16,14 @@ namespace Shader { static constexpr size_t NumUserDataRegs = 16; +static constexpr size_t MaxColorBuffers = 8; + +enum class MrtSwizzle : u32 { + Identity = 0, + Alt = 1, + Reverse = 2, + ReverseAlt = 3, +}; enum class Stage : u32 { Fragment, @@ -162,6 +170,7 @@ struct Info { u32 default_value; }; boost::container::static_vector ps_inputs{}; + std::array mrt_swizzles; struct AttributeFlags { bool Get(IR::Attribute attrib, u32 comp = 0) const { diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h new file mode 100644 index 000000000..b29892afd --- /dev/null +++ b/src/shader_recompiler/specialization.h @@ -0,0 +1,87 @@ +// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#include + +#include "common/types.h" +#include "shader_recompiler/runtime_info.h" + +namespace Shader { + +struct BufferSpecialization { + u16 stride : 14; + u16 is_storage : 1; + + auto operator<=>(const BufferSpecialization&) const = default; +}; + +struct TextureBufferSpecialization { + bool is_integer = false; + + auto operator<=>(const TextureBufferSpecialization&) const = default; +}; + +struct ImageSpecialization { + AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; + bool is_integer = false; + + auto operator<=>(const ImageSpecialization&) const = default; +}; + +struct StageSpecialization { + static constexpr size_t MaxStageResources = 32; + + const Shader::Info* info; + std::bitset bitset{}; + boost::container::small_vector buffers; + boost::container::small_vector tex_buffers; + boost::container::small_vector images; + std::array mrt_swizzles; + u32 start_binding{}; + + explicit StageSpecialization(const Shader::Info& info_) : info{&info_} {} + + void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { + for (const auto& desc : desc_list) { + auto& spec = spec_list.emplace_back(); + const auto sharp = desc.GetSharp(*info); + if (!sharp) { + binding++; + continue; + } + bitset.set(binding++); + func(spec, desc, sharp); + } + } + + bool operator==(const StageSpecialization& other) const { + if (start_binding != other.start_binding) { + return false; + } + if (info->stage == Shader::Stage::Fragment && + !std::ranges::equal(mrt_swizzles, other.mrt_swizzles)) { + return false; + } + u32 binding{}; + for (u32 i = 0; i < buffers.size(); i++) { + if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { + return false; + } + } + for (u32 i = 0; i < tex_buffers.size(); i++) { + if (other.bitset[binding++] && tex_buffers[i] != other.tex_buffers[i]) { + return false; + } + } + for (u32 i = 0; i < images.size(); i++) { + if (other.bitset[binding++] && images[i] != other.images[i]) { + return false; + } + } + return true; + } +}; + +} // namespace Shader diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index 3e51e6529..c06ddd204 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -25,6 +25,7 @@ using Liverpool = AmdGpu::Liverpool; struct GraphicsPipelineKey { std::array stage_hashes; std::array color_formats; + std::array mrt_swizzles; vk::Format depth_format; vk::Format stencil_format; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index f8de5ffeb..183961ac3 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -1,21 +1,83 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include + +#include "common/config.h" +#include "common/io_file.h" +#include "common/path_util.h" +#include "shader_recompiler/backend/spirv/emit_spirv.h" +#include "shader_recompiler/recompiler.h" #include "shader_recompiler/runtime_info.h" #include "video_core/renderer_vulkan/renderer_vulkan.h" #include "video_core/renderer_vulkan/vk_instance.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" -#include "video_core/renderer_vulkan/vk_shader_cache.h" +#include "video_core/renderer_vulkan/vk_shader_util.h" extern std::unique_ptr renderer; namespace Vulkan { +using Shader::VsOutput; + +void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { + const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { + if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || + w != VsOutput::None) { + info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); + } + }; + // VS_OUT_MISC_VEC + add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, + ctl.use_vtx_edge_flag + ? VsOutput::EdgeFlag + : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), + ctl.use_vtx_kill_flag + ? VsOutput::KillFlag + : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), + ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); + // VS_OUT_CCDIST0 + add_output(ctl.IsClipDistEnabled(0) + ? VsOutput::ClipDist0 + : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), + ctl.IsClipDistEnabled(1) + ? VsOutput::ClipDist1 + : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), + ctl.IsClipDistEnabled(2) + ? VsOutput::ClipDist2 + : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), + ctl.IsClipDistEnabled(3) + ? VsOutput::ClipDist3 + : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); + // VS_OUT_CCDIST1 + add_output(ctl.IsClipDistEnabled(4) + ? VsOutput::ClipDist4 + : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), + ctl.IsClipDistEnabled(5) + ? VsOutput::ClipDist5 + : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), + ctl.IsClipDistEnabled(6) + ? VsOutput::ClipDist6 + : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), + ctl.IsClipDistEnabled(7) + ? VsOutput::ClipDist7 + : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); +} + +[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { + return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); +} + PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, - shader_cache{std::make_unique(instance, liverpool)} { + : instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192}, + block_pool{512} { + profile = Shader::Profile{ + .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, + .subgroup_size = instance.SubgroupSize(), + .support_explicit_workgroup_layout = true, + }; pipeline_cache = instance.GetDevice().createPipelineCacheUnique({}); } @@ -134,6 +196,7 @@ bool PipelineCache::RefreshGraphicsKey() { key.color_formats.fill(vk::Format::eUndefined); key.blend_controls.fill({}); key.write_masks.fill({}); + key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard); int remapped_cb{}; for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) { auto const& col_buf = regs.color_buffers[cb]; @@ -142,9 +205,12 @@ bool PipelineCache::RefreshGraphicsKey() { } const auto base_format = LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat()); - const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf); + const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf); key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat( base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/); + if (base_format == key.color_formats[remapped_cb]) { + key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value(); + } key.blend_controls[remapped_cb] = regs.blend_control[cb]; key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable && !col_buf.info.blend_bypass); @@ -169,6 +235,7 @@ bool PipelineCache::RefreshGraphicsKey() { } const auto* bininfo = Liverpool::GetBinaryInfo(*pgm); if (!bininfo->Valid()) { + LOG_WARNING(Render_Vulkan, "Invalid binary info structure!"); key.stage_hashes[i] = 0; infos[i] = nullptr; continue; @@ -177,9 +244,8 @@ bool PipelineCache::RefreshGraphicsKey() { return false; } const auto stage = Shader::Stage{i}; - const GuestProgram guest_pgm{pgm, stage}; - std::tie(infos[i], modules[i], key.stage_hashes[i]) = - shader_cache->GetProgram(guest_pgm, binding); + const auto guest_pgm = GuestProgram{pgm, stage}; + std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(guest_pgm, binding); } return true; } @@ -191,8 +257,157 @@ bool PipelineCache::RefreshComputeKey() { if (ShouldSkipShader(guest_pgm.hash, "compute")) { return false; } - std::tie(infos[0], modules[0], compute_key) = shader_cache->GetProgram(guest_pgm, binding); + std::tie(infos[0], modules[0], compute_key) = GetProgram(guest_pgm, binding); return true; } +vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span code, + size_t perm_idx, u32& binding) { + LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, + perm_idx != 0 ? "(permutation)" : ""); + + if (Config::dumpShaders()) { + DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); + } + + block_pool.ReleaseContents(); + inst_pool.ReleaseContents(); + const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile); + + // Compile IR to SPIR-V + const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding); + if (Config::dumpShaders()) { + DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); + } + + // Create module and set name to hash in renderdoc + const auto module = CompileSPV(spv, instance.GetDevice()); + ASSERT(module != VK_NULL_HANDLE); + const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); + Vulkan::SetObjectName(instance.GetDevice(), module, name); + return module; +} + +std::tuple PipelineCache::GetProgram( + const GuestProgram& pgm, u32& binding) { + auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash); + if (new_program) { + Program* program = program_pool.Create(BuildShaderInfo(pgm, liverpool->regs)); + u32 start_binding = binding; + const auto module = CompileModule(program->info, pgm.code, 0, binding); + const auto spec = BuildStageSpec(program->info, start_binding); + program->modules.emplace_back(module, spec); + it_pgm.value() = program; + return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0)); + } + + Program* program = it_pgm->second; + const auto& info = program->info; + const auto spec = BuildStageSpec(info, binding); + size_t perm_idx = program->modules.size(); + vk::ShaderModule module{}; + + const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); + if (it == program->modules.end()) { + auto new_info = BuildShaderInfo(pgm, liverpool->regs); + module = CompileModule(new_info, pgm.code, perm_idx, binding); + program->modules.emplace_back(module, std::move(spec)); + } else { + binding += info.NumBindings(); + module = it->module; + perm_idx = std::distance(program->modules.begin(), it); + } + return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx)); +} + +Shader::Info PipelineCache::BuildShaderInfo(const GuestProgram& pgm, + const AmdGpu::Liverpool::Regs& regs) { + Shader::Info info{}; + info.user_data = pgm.user_data; + info.pgm_base = VAddr(pgm.code.data()); + info.pgm_hash = pgm.hash; + info.stage = pgm.stage; + switch (pgm.stage) { + case Shader::Stage::Vertex: { + info.num_user_data = regs.vs_program.settings.num_user_regs; + info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; + BuildVsOutputs(info, regs.vs_output_control); + break; + } + case Shader::Stage::Fragment: { + info.num_user_data = regs.ps_program.settings.num_user_regs; + std::ranges::transform(graphics_key.mrt_swizzles, info.mrt_swizzles.begin(), + [](Liverpool::ColorBuffer::SwapMode mode) { + return static_cast(mode); + }); + for (u32 i = 0; i < regs.num_interp; i++) { + info.ps_inputs.push_back({ + .param_index = regs.ps_inputs[i].input_offset.Value(), + .is_default = bool(regs.ps_inputs[i].use_default), + .is_flat = bool(regs.ps_inputs[i].flat_shade), + .default_value = regs.ps_inputs[i].default_value, + }); + } + break; + } + case Shader::Stage::Compute: { + const auto& cs_pgm = regs.cs_program; + info.num_user_data = cs_pgm.settings.num_user_regs; + info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, + cs_pgm.num_thread_z.full}; + info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), + cs_pgm.IsTgidEnabled(2)}; + info.shared_memory_size = cs_pgm.SharedMemSize(); + break; + } + default: + break; + } + return info; +} + +Shader::StageSpecialization PipelineCache::BuildStageSpec(const Shader::Info& info, + u32 start_binding) { + u32 binding{}; + auto spec = Shader::StageSpecialization{info}; + spec.start_binding = start_binding; + spec.ForEachSharp(binding, spec.buffers, info.buffers, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.stride = sharp.GetStride(); + spec.is_storage = desc.IsStorage(sharp); + }); + spec.ForEachSharp(binding, spec.tex_buffers, info.texture_buffers, + [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { + spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + }); + spec.ForEachSharp(binding, spec.images, info.images, + [](auto& spec, const auto& desc, AmdGpu::Image sharp) { + spec.type = sharp.GetType(); + spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + }); + switch (info.stage) { + case Shader::Stage::Fragment: + std::ranges::transform(graphics_key.mrt_swizzles, spec.mrt_swizzles.begin(), + [](Liverpool::ColorBuffer::SwapMode mode) { + return static_cast(mode); + }); + default: + break; + } + + return spec; +} + +void PipelineCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, + size_t perm_idx, std::string_view ext) { + using namespace Common::FS; + const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; + if (!std::filesystem::exists(dump_dir)) { + std::filesystem::create_directories(dump_dir); + } + const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext); + const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; + file.WriteSpan(code); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 40853b746..545ddaf27 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -4,6 +4,10 @@ #pragma once #include +#include "common/object_pool.h" +#include "shader_recompiler/ir/basic_block.h" +#include "shader_recompiler/profile.h" +#include "shader_recompiler/specialization.h" #include "video_core/renderer_vulkan/vk_compute_pipeline.h" #include "video_core/renderer_vulkan/vk_graphics_pipeline.h" @@ -17,6 +21,31 @@ class Instance; class Scheduler; class ShaderCache; +struct Program { + struct Module { + vk::ShaderModule module; + Shader::StageSpecialization spec; + }; + + Shader::Info info; + boost::container::small_vector modules; + + explicit Program(const Shader::Info& info_) : info{info_} {} +}; + +struct GuestProgram { + Shader::Stage stage; + std::span user_data; + std::span code; + u64 hash; + + explicit GuestProgram(const auto* pgm, Shader::Stage stage_) + : stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} { + const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm); + hash = bininfo->shader_hash; + } +}; + class PipelineCache { static constexpr size_t MaxShaderStages = 5; @@ -29,17 +58,32 @@ public: const ComputePipeline* GetComputePipeline(); + std::tuple GetProgram(const GuestProgram& pgm, + u32& binding); + private: bool RefreshGraphicsKey(); bool RefreshComputeKey(); + void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, + std::string_view ext); + vk::ShaderModule CompileModule(Shader::Info& info, std::span code, size_t perm_idx, + u32& binding); + + Shader::Info BuildShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs); + Shader::StageSpecialization BuildStageSpec(const Shader::Info& info, u32 binding); + private: const Instance& instance; Scheduler& scheduler; AmdGpu::Liverpool* liverpool; vk::UniquePipelineCache pipeline_cache; vk::UniquePipelineLayout pipeline_layout; - std::unique_ptr shader_cache; + Shader::Profile profile{}; + tsl::robin_map program_cache; + Common::ObjectPool inst_pool; + Common::ObjectPool block_pool; + Common::ObjectPool program_pool; tsl::robin_map> compute_pipelines; tsl::robin_map> graphics_pipelines; std::array infos{}; diff --git a/src/video_core/renderer_vulkan/vk_shader_cache.cpp b/src/video_core/renderer_vulkan/vk_shader_cache.cpp deleted file mode 100644 index 9250f84ce..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_cache.cpp +++ /dev/null @@ -1,192 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#include "common/config.h" -#include "common/io_file.h" -#include "common/path_util.h" -#include "shader_recompiler/backend/spirv/emit_spirv.h" -#include "shader_recompiler/recompiler.h" -#include "video_core/renderer_vulkan/vk_instance.h" -#include "video_core/renderer_vulkan/vk_platform.h" -#include "video_core/renderer_vulkan/vk_shader_cache.h" -#include "video_core/renderer_vulkan/vk_shader_util.h" - -namespace Vulkan { - -using Shader::VsOutput; - -void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) { - const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) { - if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None || - w != VsOutput::None) { - info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w}); - } - }; - // VS_OUT_MISC_VEC - add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None, - ctl.use_vtx_edge_flag - ? VsOutput::EdgeFlag - : (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None), - ctl.use_vtx_kill_flag - ? VsOutput::KillFlag - : (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None), - ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None); - // VS_OUT_CCDIST0 - add_output(ctl.IsClipDistEnabled(0) - ? VsOutput::ClipDist0 - : (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None), - ctl.IsClipDistEnabled(1) - ? VsOutput::ClipDist1 - : (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None), - ctl.IsClipDistEnabled(2) - ? VsOutput::ClipDist2 - : (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None), - ctl.IsClipDistEnabled(3) - ? VsOutput::ClipDist3 - : (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None)); - // VS_OUT_CCDIST1 - add_output(ctl.IsClipDistEnabled(4) - ? VsOutput::ClipDist4 - : (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None), - ctl.IsClipDistEnabled(5) - ? VsOutput::ClipDist5 - : (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None), - ctl.IsClipDistEnabled(6) - ? VsOutput::ClipDist6 - : (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None), - ctl.IsClipDistEnabled(7) - ? VsOutput::ClipDist7 - : (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None)); -} - -Shader::Info MakeShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs) { - Shader::Info info{}; - info.user_data = pgm.user_data; - info.pgm_base = VAddr(pgm.code.data()); - info.pgm_hash = pgm.hash; - info.stage = pgm.stage; - switch (pgm.stage) { - case Shader::Stage::Vertex: { - info.num_user_data = regs.vs_program.settings.num_user_regs; - info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt; - BuildVsOutputs(info, regs.vs_output_control); - break; - } - case Shader::Stage::Fragment: { - info.num_user_data = regs.ps_program.settings.num_user_regs; - for (u32 i = 0; i < regs.num_interp; i++) { - info.ps_inputs.push_back({ - .param_index = regs.ps_inputs[i].input_offset.Value(), - .is_default = bool(regs.ps_inputs[i].use_default), - .is_flat = bool(regs.ps_inputs[i].flat_shade), - .default_value = regs.ps_inputs[i].default_value, - }); - } - break; - } - case Shader::Stage::Compute: { - const auto& cs_pgm = regs.cs_program; - info.num_user_data = cs_pgm.settings.num_user_regs; - info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full, - cs_pgm.num_thread_z.full}; - info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1), - cs_pgm.IsTgidEnabled(2)}; - info.shared_memory_size = cs_pgm.SharedMemSize(); - break; - } - default: - break; - } - return info; -} - -[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) { - return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2)); -} - -ShaderCache::ShaderCache(const Instance& instance_, AmdGpu::Liverpool* liverpool_) - : instance{instance_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} { - profile = Shader::Profile{ - .supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U, - .subgroup_size = instance.SubgroupSize(), - .support_explicit_workgroup_layout = true, - }; -} - -vk::ShaderModule ShaderCache::CompileModule(Shader::Info& info, std::span code, - size_t perm_idx, u32& binding) { - LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash, - perm_idx != 0 ? "(permutation)" : ""); - - if (Config::dumpShaders()) { - DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin"); - } - - block_pool.ReleaseContents(); - inst_pool.ReleaseContents(); - const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile); - - // Compile IR to SPIR-V - const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding); - if (Config::dumpShaders()) { - DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv"); - } - - // Create module and set name to hash in renderdoc - const auto module = CompileSPV(spv, instance.GetDevice()); - ASSERT(module != VK_NULL_HANDLE); - const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx); - Vulkan::SetObjectName(instance.GetDevice(), module, name); - return module; -} - -Program* ShaderCache::CreateProgram(const GuestProgram& pgm, u32& binding) { - Program* program = program_pool.Create(MakeShaderInfo(pgm, liverpool->regs)); - u32 start_binding = binding; - const auto module = CompileModule(program->info, pgm.code, 0, binding); - program->modules.emplace_back(module, StageSpecialization{program->info, start_binding}); - return program; -} - -std::tuple ShaderCache::GetProgram( - const GuestProgram& pgm, u32& binding) { - auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash); - if (new_program) { - auto program = CreateProgram(pgm, binding); - const auto module = program->modules.back().module; - it_pgm.value() = program; - return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0)); - } - - Program* program = it_pgm->second; - const auto& info = program->info; - size_t perm_idx = program->modules.size(); - StageSpecialization spec{info, binding}; - vk::ShaderModule module{}; - - const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec); - if (it == program->modules.end()) { - auto new_info = MakeShaderInfo(pgm, liverpool->regs); - module = CompileModule(new_info, pgm.code, perm_idx, binding); - program->modules.emplace_back(module, std::move(spec)); - } else { - binding += info.NumBindings(); - module = it->module; - perm_idx = std::distance(program->modules.begin(), it); - } - return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx)); -} - -void ShaderCache::DumpShader(std::span code, u64 hash, Shader::Stage stage, - size_t perm_idx, std::string_view ext) { - using namespace Common::FS; - const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps"; - if (!std::filesystem::exists(dump_dir)) { - std::filesystem::create_directories(dump_dir); - } - const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext); - const auto file = IOFile{dump_dir / filename, FileAccessMode::Write}; - file.WriteSpan(code); -} - -} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_shader_cache.h b/src/video_core/renderer_vulkan/vk_shader_cache.h deleted file mode 100644 index 02532373f..000000000 --- a/src/video_core/renderer_vulkan/vk_shader_cache.h +++ /dev/null @@ -1,156 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include -#include -#include -#include "common/object_pool.h" -#include "shader_recompiler/ir/basic_block.h" -#include "shader_recompiler/profile.h" -#include "shader_recompiler/runtime_info.h" -#include "video_core/amdgpu/liverpool.h" -#include "video_core/renderer_vulkan/vk_common.h" - -namespace Vulkan { - -class Instance; - -struct BufferSpecialization { - u16 stride : 14; - u16 is_storage : 1; - - auto operator<=>(const BufferSpecialization&) const = default; -}; - -struct TextureBufferSpecialization { - bool is_integer = false; - - auto operator<=>(const TextureBufferSpecialization&) const = default; -}; - -struct ImageSpecialization { - AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; - bool is_integer = false; - - auto operator<=>(const ImageSpecialization&) const = default; -}; - -struct StageSpecialization { - static constexpr size_t MaxStageResources = 32; - - const Shader::Info* info; - std::bitset bitset{}; - boost::container::small_vector buffers; - boost::container::small_vector tex_buffers; - boost::container::small_vector images; - u32 start_binding{}; - - void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) { - for (const auto& desc : desc_list) { - auto& spec = spec_list.emplace_back(); - const auto sharp = desc.GetSharp(*info); - if (!sharp) { - binding++; - continue; - } - bitset.set(binding++); - func(spec, desc, sharp); - } - } - - StageSpecialization(const Shader::Info& info_, u32 start_binding_) - : info{&info_}, start_binding{start_binding_} { - u32 binding{}; - ForEachSharp(binding, buffers, info->buffers, - [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { - spec.stride = sharp.GetStride(); - spec.is_storage = desc.IsStorage(sharp); - }); - ForEachSharp(binding, tex_buffers, info->texture_buffers, - [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { - spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); - }); - ForEachSharp(binding, images, info->images, - [](auto& spec, const auto& desc, AmdGpu::Image sharp) { - spec.type = sharp.GetType(); - spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); - }); - } - - bool operator==(const StageSpecialization& other) const { - if (start_binding != other.start_binding) { - return false; - } - u32 binding{}; - for (u32 i = 0; i < buffers.size(); i++) { - if (other.bitset[binding++] && buffers[i] != other.buffers[i]) { - return false; - } - } - for (u32 i = 0; i < tex_buffers.size(); i++) { - if (other.bitset[binding++] && tex_buffers[i] != other.tex_buffers[i]) { - return false; - } - } - for (u32 i = 0; i < images.size(); i++) { - if (other.bitset[binding++] && images[i] != other.images[i]) { - return false; - } - } - return true; - } -}; - -struct Program { - struct Module { - vk::ShaderModule module; - StageSpecialization spec; - }; - - Shader::Info info; - boost::container::small_vector modules; - - explicit Program(const Shader::Info& info_) : info{info_} {} -}; - -struct GuestProgram { - Shader::Stage stage; - std::span user_data; - std::span code; - u64 hash; - - explicit GuestProgram(const auto* pgm, Shader::Stage stage_) - : stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} { - const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm); - hash = bininfo->shader_hash; - } -}; - -class ShaderCache { -public: - explicit ShaderCache(const Instance& instance, AmdGpu::Liverpool* liverpool); - ~ShaderCache() = default; - - std::tuple GetProgram(const GuestProgram& pgm, - u32& binding); - -private: - void DumpShader(std::span code, u64 hash, Shader::Stage stage, size_t perm_idx, - std::string_view ext); - vk::ShaderModule CompileModule(Shader::Info& info, std::span code, size_t perm_idx, - u32& binding); - Program* CreateProgram(const GuestProgram& pgm, u32& binding); - -private: - const Instance& instance; - AmdGpu::Liverpool* liverpool; - Shader::Profile profile{}; - tsl::robin_map program_cache; - Common::ObjectPool inst_pool; - Common::ObjectPool block_pool; - Common::ObjectPool program_pool; -}; - -} // namespace Vulkan