video_core: Refactor and render target swizzles

This commit is contained in:
IndecisiveTurtle 2024-09-02 20:45:11 +03:00
commit b913665197
12 changed files with 392 additions and 367 deletions

View file

@ -420,6 +420,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/recompiler.cpp
src/shader_recompiler/recompiler.h
src/shader_recompiler/runtime_info.h
src/shader_recompiler/specialization.h
src/shader_recompiler/backend/spirv/emit_spirv.cpp
src/shader_recompiler/backend/spirv/emit_spirv.h
src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp
@ -533,8 +534,6 @@ set(VIDEO_CORE src/video_core/amdgpu/liverpool.cpp
src/video_core/renderer_vulkan/vk_resource_pool.h
src/video_core/renderer_vulkan/vk_scheduler.cpp
src/video_core/renderer_vulkan/vk_scheduler.h
src/video_core/renderer_vulkan/vk_shader_cache.cpp
src/video_core/renderer_vulkan/vk_shader_cache.h
src/video_core/renderer_vulkan/vk_shader_util.cpp
src/video_core/renderer_vulkan/vk_shader_util.h
src/video_core/renderer_vulkan/vk_swapchain.cpp

View file

@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/specialization.h"
namespace Shader::Gcn {
@ -19,12 +20,28 @@ void Translator::EmitExport(const GcnInst& inst) {
IR::VectorReg(inst.src[3].code),
};
const auto swizzle = [&](u32 comp) {
if (!IR::IsMrt(attrib)) {
return comp;
}
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
switch (info.mrt_swizzles[index]) {
case MrtSwizzle::Identity:
return comp;
case MrtSwizzle::Alt:
static constexpr std::array<u32, 4> AltSwizzle = {2, 1, 0, 3};
return AltSwizzle[comp];
default:
UNREACHABLE();
}
};
const auto unpack = [&](u32 idx) {
const IR::Value value = ir.UnpackHalf2x16(ir.GetVectorReg(vsrc[idx]));
const IR::F32 r = IR::F32{ir.CompositeExtract(value, 0)};
const IR::F32 g = IR::F32{ir.CompositeExtract(value, 1)};
ir.SetAttribute(attrib, r, idx * 2);
ir.SetAttribute(attrib, g, idx * 2 + 1);
ir.SetAttribute(attrib, r, swizzle(idx * 2));
ir.SetAttribute(attrib, g, swizzle(idx * 2 + 1));
};
// Components are float16 packed into a VGPR
@ -45,7 +62,7 @@ void Translator::EmitExport(const GcnInst& inst) {
continue;
}
const IR::F32 comp = ir.GetVectorReg<IR::F32>(vsrc[i]);
ir.SetAttribute(attrib, comp, i);
ir.SetAttribute(attrib, comp, swizzle(i));
}
}
}

View file

@ -445,7 +445,6 @@ void Translator::EmitFlowControl(u32 pc, const GcnInst& inst) {
}
void Translator::LogMissingOpcode(const GcnInst& inst) {
const u32 opcode = u32(inst.opcode);
LOG_ERROR(Render_Recompiler, "Unknown opcode {} ({}, category = {})",
magic_enum::enum_name(inst.opcode), u32(inst.opcode),
magic_enum::enum_name(inst.category));

View file

@ -479,10 +479,11 @@ void Translator::V_ADD_F32(const GcnInst& inst) {
void Translator::V_CVT_OFF_F32_I4(const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::VectorReg dst_reg{inst.dst[0].code};
ir.SetVectorReg(
dst_reg,
ir.FPMul(ir.ConvertUToF(32, 32, ir.ISub(ir.BitwiseAnd(src0, ir.Imm32(0xF)), ir.Imm32(8))),
ir.Imm32(1.f / 16.f)));
ASSERT(src0.IsImmediate());
static constexpr std::array IntToFloat = {
0.0f, 0.0625f, 0.1250f, 0.1875f, 0.2500f, 0.3125f, 0.3750f, 0.4375f,
-0.5000f, -0.4375f, -0.3750f, -0.3125f, -0.2500f, -0.1875f, -0.1250f, -0.0625f};
ir.SetVectorReg(dst_reg, ir.Imm32(IntToFloat[src0.U32()]));
}
void Translator::V_MED3_F32(const GcnInst& inst) {

View file

@ -6,6 +6,7 @@
#include "shader_recompiler/frontend/structured_control_flow.h"
#include "shader_recompiler/ir/passes/ir_passes.h"
#include "shader_recompiler/ir/post_order.h"
#include "shader_recompiler/recompiler.h"
namespace Shader {

View file

@ -16,6 +16,14 @@
namespace Shader {
static constexpr size_t NumUserDataRegs = 16;
static constexpr size_t MaxColorBuffers = 8;
enum class MrtSwizzle : u32 {
Identity = 0,
Alt = 1,
Reverse = 2,
ReverseAlt = 3,
};
enum class Stage : u32 {
Fragment,
@ -162,6 +170,7 @@ struct Info {
u32 default_value;
};
boost::container::static_vector<PsInput, 32> ps_inputs{};
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
struct AttributeFlags {
bool Get(IR::Attribute attrib, u32 comp = 0) const {

View file

@ -0,0 +1,87 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <bitset>
#include "common/types.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader {
struct BufferSpecialization {
u16 stride : 14;
u16 is_storage : 1;
auto operator<=>(const BufferSpecialization&) const = default;
};
struct TextureBufferSpecialization {
bool is_integer = false;
auto operator<=>(const TextureBufferSpecialization&) const = default;
};
struct ImageSpecialization {
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
bool is_integer = false;
auto operator<=>(const ImageSpecialization&) const = default;
};
struct StageSpecialization {
static constexpr size_t MaxStageResources = 32;
const Shader::Info* info;
std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 8> images;
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
u32 start_binding{};
explicit StageSpecialization(const Shader::Info& info_) : info{&info_} {}
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
const auto sharp = desc.GetSharp(*info);
if (!sharp) {
binding++;
continue;
}
bitset.set(binding++);
func(spec, desc, sharp);
}
}
bool operator==(const StageSpecialization& other) const {
if (start_binding != other.start_binding) {
return false;
}
if (info->stage == Shader::Stage::Fragment &&
!std::ranges::equal(mrt_swizzles, other.mrt_swizzles)) {
return false;
}
u32 binding{};
for (u32 i = 0; i < buffers.size(); i++) {
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
return false;
}
}
for (u32 i = 0; i < tex_buffers.size(); i++) {
if (other.bitset[binding++] && tex_buffers[i] != other.tex_buffers[i]) {
return false;
}
}
for (u32 i = 0; i < images.size(); i++) {
if (other.bitset[binding++] && images[i] != other.images[i]) {
return false;
}
}
return true;
}
};
} // namespace Shader

View file

@ -25,6 +25,7 @@ using Liverpool = AmdGpu::Liverpool;
struct GraphicsPipelineKey {
std::array<size_t, MaxShaderStages> stage_hashes;
std::array<vk::Format, Liverpool::NumColorBuffers> color_formats;
std::array<Liverpool::ColorBuffer::SwapMode, Liverpool::NumColorBuffers> mrt_swizzles;
vk::Format depth_format;
vk::Format stencil_format;

View file

@ -1,21 +1,83 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <ranges>
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_shader_cache.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
extern std::unique_ptr<Vulkan::RendererVulkan> renderer;
namespace Vulkan {
using Shader::VsOutput;
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
w != VsOutput::None) {
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
}
};
// VS_OUT_MISC_VEC
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
ctl.use_vtx_edge_flag
? VsOutput::EdgeFlag
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
ctl.use_vtx_kill_flag
? VsOutput::KillFlag
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
// VS_OUT_CCDIST0
add_output(ctl.IsClipDistEnabled(0)
? VsOutput::ClipDist0
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
ctl.IsClipDistEnabled(1)
? VsOutput::ClipDist1
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
ctl.IsClipDistEnabled(2)
? VsOutput::ClipDist2
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
ctl.IsClipDistEnabled(3)
? VsOutput::ClipDist3
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
// VS_OUT_CCDIST1
add_output(ctl.IsClipDistEnabled(4)
? VsOutput::ClipDist4
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
ctl.IsClipDistEnabled(5)
? VsOutput::ClipDist5
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
ctl.IsClipDistEnabled(6)
? VsOutput::ClipDist6
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
ctl.IsClipDistEnabled(7)
? VsOutput::ClipDist7
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
}
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
}
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_},
shader_cache{std::make_unique<ShaderCache>(instance, liverpool)} {
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
block_pool{512} {
profile = Shader::Profile{
.supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
.subgroup_size = instance.SubgroupSize(),
.support_explicit_workgroup_layout = true,
};
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
}
@ -134,6 +196,7 @@ bool PipelineCache::RefreshGraphicsKey() {
key.color_formats.fill(vk::Format::eUndefined);
key.blend_controls.fill({});
key.write_masks.fill({});
key.mrt_swizzles.fill(Liverpool::ColorBuffer::SwapMode::Standard);
int remapped_cb{};
for (auto cb = 0u; cb < Liverpool::NumColorBuffers; ++cb) {
auto const& col_buf = regs.color_buffers[cb];
@ -142,9 +205,12 @@ bool PipelineCache::RefreshGraphicsKey() {
}
const auto base_format =
LiverpoolToVK::SurfaceFormat(col_buf.info.format, col_buf.NumFormat());
const auto is_vo_surface = renderer->IsVideoOutSurface(col_buf);
const bool is_vo_surface = renderer->IsVideoOutSurface(col_buf);
key.color_formats[remapped_cb] = LiverpoolToVK::AdjustColorBufferFormat(
base_format, col_buf.info.comp_swap.Value(), false /*is_vo_surface*/);
if (base_format == key.color_formats[remapped_cb]) {
key.mrt_swizzles[remapped_cb] = col_buf.info.comp_swap.Value();
}
key.blend_controls[remapped_cb] = regs.blend_control[cb];
key.blend_controls[remapped_cb].enable.Assign(key.blend_controls[remapped_cb].enable &&
!col_buf.info.blend_bypass);
@ -169,6 +235,7 @@ bool PipelineCache::RefreshGraphicsKey() {
}
const auto* bininfo = Liverpool::GetBinaryInfo(*pgm);
if (!bininfo->Valid()) {
LOG_WARNING(Render_Vulkan, "Invalid binary info structure!");
key.stage_hashes[i] = 0;
infos[i] = nullptr;
continue;
@ -177,9 +244,8 @@ bool PipelineCache::RefreshGraphicsKey() {
return false;
}
const auto stage = Shader::Stage{i};
const GuestProgram guest_pgm{pgm, stage};
std::tie(infos[i], modules[i], key.stage_hashes[i]) =
shader_cache->GetProgram(guest_pgm, binding);
const auto guest_pgm = GuestProgram{pgm, stage};
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(guest_pgm, binding);
}
return true;
}
@ -191,8 +257,157 @@ bool PipelineCache::RefreshComputeKey() {
if (ShouldSkipShader(guest_pgm.hash, "compute")) {
return false;
}
std::tie(infos[0], modules[0], compute_key) = shader_cache->GetProgram(guest_pgm, binding);
std::tie(infos[0], modules[0], compute_key) = GetProgram(guest_pgm, binding);
return true;
}
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span<const u32> code,
size_t perm_idx, u32& binding) {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
perm_idx != 0 ? "(permutation)" : "");
if (Config::dumpShaders()) {
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
}
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);
// Compile IR to SPIR-V
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
if (Config::dumpShaders()) {
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
}
// Create module and set name to hash in renderdoc
const auto module = CompileSPV(spv, instance.GetDevice());
ASSERT(module != VK_NULL_HANDLE);
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
Vulkan::SetObjectName(instance.GetDevice(), module, name);
return module;
}
std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram(
const GuestProgram& pgm, u32& binding) {
auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash);
if (new_program) {
Program* program = program_pool.Create(BuildShaderInfo(pgm, liverpool->regs));
u32 start_binding = binding;
const auto module = CompileModule(program->info, pgm.code, 0, binding);
const auto spec = BuildStageSpec(program->info, start_binding);
program->modules.emplace_back(module, spec);
it_pgm.value() = program;
return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0));
}
Program* program = it_pgm->second;
const auto& info = program->info;
const auto spec = BuildStageSpec(info, binding);
size_t perm_idx = program->modules.size();
vk::ShaderModule module{};
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
if (it == program->modules.end()) {
auto new_info = BuildShaderInfo(pgm, liverpool->regs);
module = CompileModule(new_info, pgm.code, perm_idx, binding);
program->modules.emplace_back(module, std::move(spec));
} else {
binding += info.NumBindings();
module = it->module;
perm_idx = std::distance(program->modules.begin(), it);
}
return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx));
}
Shader::Info PipelineCache::BuildShaderInfo(const GuestProgram& pgm,
const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{};
info.user_data = pgm.user_data;
info.pgm_base = VAddr(pgm.code.data());
info.pgm_hash = pgm.hash;
info.stage = pgm.stage;
switch (pgm.stage) {
case Shader::Stage::Vertex: {
info.num_user_data = regs.vs_program.settings.num_user_regs;
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
BuildVsOutputs(info, regs.vs_output_control);
break;
}
case Shader::Stage::Fragment: {
info.num_user_data = regs.ps_program.settings.num_user_regs;
std::ranges::transform(graphics_key.mrt_swizzles, info.mrt_swizzles.begin(),
[](Liverpool::ColorBuffer::SwapMode mode) {
return static_cast<Shader::MrtSwizzle>(mode);
});
for (u32 i = 0; i < regs.num_interp; i++) {
info.ps_inputs.push_back({
.param_index = regs.ps_inputs[i].input_offset.Value(),
.is_default = bool(regs.ps_inputs[i].use_default),
.is_flat = bool(regs.ps_inputs[i].flat_shade),
.default_value = regs.ps_inputs[i].default_value,
});
}
break;
}
case Shader::Stage::Compute: {
const auto& cs_pgm = regs.cs_program;
info.num_user_data = cs_pgm.settings.num_user_regs;
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
cs_pgm.num_thread_z.full};
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
cs_pgm.IsTgidEnabled(2)};
info.shared_memory_size = cs_pgm.SharedMemSize();
break;
}
default:
break;
}
return info;
}
Shader::StageSpecialization PipelineCache::BuildStageSpec(const Shader::Info& info,
u32 start_binding) {
u32 binding{};
auto spec = Shader::StageSpecialization{info};
spec.start_binding = start_binding;
spec.ForEachSharp(binding, spec.buffers, info.buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
spec.is_storage = desc.IsStorage(sharp);
});
spec.ForEachSharp(binding, spec.tex_buffers, info.texture_buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
});
spec.ForEachSharp(binding, spec.images, info.images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetType();
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
});
switch (info.stage) {
case Shader::Stage::Fragment:
std::ranges::transform(graphics_key.mrt_swizzles, spec.mrt_swizzles.begin(),
[](Liverpool::ColorBuffer::SwapMode mode) {
return static_cast<Shader::MrtSwizzle>(mode);
});
default:
break;
}
return spec;
}
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
size_t perm_idx, std::string_view ext) {
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteSpan(code);
}
} // namespace Vulkan

View file

@ -4,6 +4,10 @@
#pragma once
#include <tsl/robin_map.h>
#include "common/object_pool.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/specialization.h"
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
@ -17,6 +21,31 @@ class Instance;
class Scheduler;
class ShaderCache;
struct Program {
struct Module {
vk::ShaderModule module;
Shader::StageSpecialization spec;
};
Shader::Info info;
boost::container::small_vector<Module, 8> modules;
explicit Program(const Shader::Info& info_) : info{info_} {}
};
struct GuestProgram {
Shader::Stage stage;
std::span<const u32, AmdGpu::Liverpool::NumShaderUserData> user_data;
std::span<const u32> code;
u64 hash;
explicit GuestProgram(const auto* pgm, Shader::Stage stage_)
: stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} {
const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm);
hash = bininfo->shader_hash;
}
};
class PipelineCache {
static constexpr size_t MaxShaderStages = 5;
@ -29,17 +58,32 @@ public:
const ComputePipeline* GetComputePipeline();
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(const GuestProgram& pgm,
u32& binding);
private:
bool RefreshGraphicsKey();
bool RefreshComputeKey();
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
std::string_view ext);
vk::ShaderModule CompileModule(Shader::Info& info, std::span<const u32> code, size_t perm_idx,
u32& binding);
Shader::Info BuildShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs);
Shader::StageSpecialization BuildStageSpec(const Shader::Info& info, u32 binding);
private:
const Instance& instance;
Scheduler& scheduler;
AmdGpu::Liverpool* liverpool;
vk::UniquePipelineCache pipeline_cache;
vk::UniquePipelineLayout pipeline_layout;
std::unique_ptr<ShaderCache> shader_cache;
Shader::Profile profile{};
tsl::robin_map<size_t, Program*> program_cache;
Common::ObjectPool<Shader::IR::Inst> inst_pool;
Common::ObjectPool<Shader::IR::Block> block_pool;
Common::ObjectPool<Program> program_pool;
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
std::array<const Shader::Info*, MaxShaderStages> infos{};

View file

@ -1,192 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/config.h"
#include "common/io_file.h"
#include "common/path_util.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/recompiler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_platform.h"
#include "video_core/renderer_vulkan/vk_shader_cache.h"
#include "video_core/renderer_vulkan/vk_shader_util.h"
namespace Vulkan {
using Shader::VsOutput;
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
w != VsOutput::None) {
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
}
};
// VS_OUT_MISC_VEC
add_output(ctl.use_vtx_point_size ? VsOutput::PointSprite : VsOutput::None,
ctl.use_vtx_edge_flag
? VsOutput::EdgeFlag
: (ctl.use_vtx_gs_cut_flag ? VsOutput::GsCutFlag : VsOutput::None),
ctl.use_vtx_kill_flag
? VsOutput::KillFlag
: (ctl.use_vtx_render_target_idx ? VsOutput::GsMrtIndex : VsOutput::None),
ctl.use_vtx_viewport_idx ? VsOutput::GsVpIndex : VsOutput::None);
// VS_OUT_CCDIST0
add_output(ctl.IsClipDistEnabled(0)
? VsOutput::ClipDist0
: (ctl.IsCullDistEnabled(0) ? VsOutput::CullDist0 : VsOutput::None),
ctl.IsClipDistEnabled(1)
? VsOutput::ClipDist1
: (ctl.IsCullDistEnabled(1) ? VsOutput::CullDist1 : VsOutput::None),
ctl.IsClipDistEnabled(2)
? VsOutput::ClipDist2
: (ctl.IsCullDistEnabled(2) ? VsOutput::CullDist2 : VsOutput::None),
ctl.IsClipDistEnabled(3)
? VsOutput::ClipDist3
: (ctl.IsCullDistEnabled(3) ? VsOutput::CullDist3 : VsOutput::None));
// VS_OUT_CCDIST1
add_output(ctl.IsClipDistEnabled(4)
? VsOutput::ClipDist4
: (ctl.IsCullDistEnabled(4) ? VsOutput::CullDist4 : VsOutput::None),
ctl.IsClipDistEnabled(5)
? VsOutput::ClipDist5
: (ctl.IsCullDistEnabled(5) ? VsOutput::CullDist5 : VsOutput::None),
ctl.IsClipDistEnabled(6)
? VsOutput::ClipDist6
: (ctl.IsCullDistEnabled(6) ? VsOutput::CullDist6 : VsOutput::None),
ctl.IsClipDistEnabled(7)
? VsOutput::ClipDist7
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
}
Shader::Info MakeShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{};
info.user_data = pgm.user_data;
info.pgm_base = VAddr(pgm.code.data());
info.pgm_hash = pgm.hash;
info.stage = pgm.stage;
switch (pgm.stage) {
case Shader::Stage::Vertex: {
info.num_user_data = regs.vs_program.settings.num_user_regs;
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
BuildVsOutputs(info, regs.vs_output_control);
break;
}
case Shader::Stage::Fragment: {
info.num_user_data = regs.ps_program.settings.num_user_regs;
for (u32 i = 0; i < regs.num_interp; i++) {
info.ps_inputs.push_back({
.param_index = regs.ps_inputs[i].input_offset.Value(),
.is_default = bool(regs.ps_inputs[i].use_default),
.is_flat = bool(regs.ps_inputs[i].flat_shade),
.default_value = regs.ps_inputs[i].default_value,
});
}
break;
}
case Shader::Stage::Compute: {
const auto& cs_pgm = regs.cs_program;
info.num_user_data = cs_pgm.settings.num_user_regs;
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
cs_pgm.num_thread_z.full};
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
cs_pgm.IsTgidEnabled(2)};
info.shared_memory_size = cs_pgm.SharedMemSize();
break;
}
default:
break;
}
return info;
}
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
}
ShaderCache::ShaderCache(const Instance& instance_, AmdGpu::Liverpool* liverpool_)
: instance{instance_}, liverpool{liverpool_}, inst_pool{8192}, block_pool{512} {
profile = Shader::Profile{
.supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
.subgroup_size = instance.SubgroupSize(),
.support_explicit_workgroup_layout = true,
};
}
vk::ShaderModule ShaderCache::CompileModule(Shader::Info& info, std::span<const u32> code,
size_t perm_idx, u32& binding) {
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
perm_idx != 0 ? "(permutation)" : "");
if (Config::dumpShaders()) {
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
}
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);
// Compile IR to SPIR-V
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
if (Config::dumpShaders()) {
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
}
// Create module and set name to hash in renderdoc
const auto module = CompileSPV(spv, instance.GetDevice());
ASSERT(module != VK_NULL_HANDLE);
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
Vulkan::SetObjectName(instance.GetDevice(), module, name);
return module;
}
Program* ShaderCache::CreateProgram(const GuestProgram& pgm, u32& binding) {
Program* program = program_pool.Create(MakeShaderInfo(pgm, liverpool->regs));
u32 start_binding = binding;
const auto module = CompileModule(program->info, pgm.code, 0, binding);
program->modules.emplace_back(module, StageSpecialization{program->info, start_binding});
return program;
}
std::tuple<const Shader::Info*, vk::ShaderModule, u64> ShaderCache::GetProgram(
const GuestProgram& pgm, u32& binding) {
auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash);
if (new_program) {
auto program = CreateProgram(pgm, binding);
const auto module = program->modules.back().module;
it_pgm.value() = program;
return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0));
}
Program* program = it_pgm->second;
const auto& info = program->info;
size_t perm_idx = program->modules.size();
StageSpecialization spec{info, binding};
vk::ShaderModule module{};
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
if (it == program->modules.end()) {
auto new_info = MakeShaderInfo(pgm, liverpool->regs);
module = CompileModule(new_info, pgm.code, perm_idx, binding);
program->modules.emplace_back(module, std::move(spec));
} else {
binding += info.NumBindings();
module = it->module;
perm_idx = std::distance(program->modules.begin(), it);
}
return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx));
}
void ShaderCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
size_t perm_idx, std::string_view ext) {
using namespace Common::FS;
const auto dump_dir = GetUserPath(PathType::ShaderDir) / "dumps";
if (!std::filesystem::exists(dump_dir)) {
std::filesystem::create_directories(dump_dir);
}
const auto filename = fmt::format("{}_{:#018x}_{}.{}", stage, hash, perm_idx, ext);
const auto file = IOFile{dump_dir / filename, FileAccessMode::Write};
file.WriteSpan(code);
}
} // namespace Vulkan

View file

@ -1,156 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#pragma once
#include <bitset>
#include <boost/container/small_vector.hpp>
#include <tsl/robin_map.h>
#include "common/object_pool.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/profile.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/renderer_vulkan/vk_common.h"
namespace Vulkan {
class Instance;
struct BufferSpecialization {
u16 stride : 14;
u16 is_storage : 1;
auto operator<=>(const BufferSpecialization&) const = default;
};
struct TextureBufferSpecialization {
bool is_integer = false;
auto operator<=>(const TextureBufferSpecialization&) const = default;
};
struct ImageSpecialization {
AmdGpu::ImageType type = AmdGpu::ImageType::Color2D;
bool is_integer = false;
auto operator<=>(const ImageSpecialization&) const = default;
};
struct StageSpecialization {
static constexpr size_t MaxStageResources = 32;
const Shader::Info* info;
std::bitset<MaxStageResources> bitset{};
boost::container::small_vector<BufferSpecialization, 16> buffers;
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
boost::container::small_vector<ImageSpecialization, 8> images;
u32 start_binding{};
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
for (const auto& desc : desc_list) {
auto& spec = spec_list.emplace_back();
const auto sharp = desc.GetSharp(*info);
if (!sharp) {
binding++;
continue;
}
bitset.set(binding++);
func(spec, desc, sharp);
}
}
StageSpecialization(const Shader::Info& info_, u32 start_binding_)
: info{&info_}, start_binding{start_binding_} {
u32 binding{};
ForEachSharp(binding, buffers, info->buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.stride = sharp.GetStride();
spec.is_storage = desc.IsStorage(sharp);
});
ForEachSharp(binding, tex_buffers, info->texture_buffers,
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
});
ForEachSharp(binding, images, info->images,
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
spec.type = sharp.GetType();
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
});
}
bool operator==(const StageSpecialization& other) const {
if (start_binding != other.start_binding) {
return false;
}
u32 binding{};
for (u32 i = 0; i < buffers.size(); i++) {
if (other.bitset[binding++] && buffers[i] != other.buffers[i]) {
return false;
}
}
for (u32 i = 0; i < tex_buffers.size(); i++) {
if (other.bitset[binding++] && tex_buffers[i] != other.tex_buffers[i]) {
return false;
}
}
for (u32 i = 0; i < images.size(); i++) {
if (other.bitset[binding++] && images[i] != other.images[i]) {
return false;
}
}
return true;
}
};
struct Program {
struct Module {
vk::ShaderModule module;
StageSpecialization spec;
};
Shader::Info info;
boost::container::small_vector<Module, 8> modules;
explicit Program(const Shader::Info& info_) : info{info_} {}
};
struct GuestProgram {
Shader::Stage stage;
std::span<const u32, AmdGpu::Liverpool::NumShaderUserData> user_data;
std::span<const u32> code;
u64 hash;
explicit GuestProgram(const auto* pgm, Shader::Stage stage_)
: stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} {
const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm);
hash = bininfo->shader_hash;
}
};
class ShaderCache {
public:
explicit ShaderCache(const Instance& instance, AmdGpu::Liverpool* liverpool);
~ShaderCache() = default;
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(const GuestProgram& pgm,
u32& binding);
private:
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
std::string_view ext);
vk::ShaderModule CompileModule(Shader::Info& info, std::span<const u32> code, size_t perm_idx,
u32& binding);
Program* CreateProgram(const GuestProgram& pgm, u32& binding);
private:
const Instance& instance;
AmdGpu::Liverpool* liverpool;
Shader::Profile profile{};
tsl::robin_map<size_t, Program*> program_cache;
Common::ObjectPool<Shader::IR::Inst> inst_pool;
Common::ObjectPool<Shader::IR::Block> block_pool;
Common::ObjectPool<Program> program_pool;
};
} // namespace Vulkan