mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-22 04:24:44 +00:00
video_core: Refactor shader recompiler interface
* Makes it much easier to pass runtime information to the recompiler and have it treated as part of the shader key. Also pulls out most runtime state from Info struct
This commit is contained in:
parent
1a7767788c
commit
f7a8ceb395
25 changed files with 517 additions and 411 deletions
|
@ -419,6 +419,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
|||
src/shader_recompiler/profile.h
|
||||
src/shader_recompiler/recompiler.cpp
|
||||
src/shader_recompiler/recompiler.h
|
||||
src/shader_recompiler/info.h
|
||||
src/shader_recompiler/params.h
|
||||
src/shader_recompiler/runtime_info.h
|
||||
src/shader_recompiler/specialization.h
|
||||
src/shader_recompiler/backend/spirv/emit_spirv.cpp
|
||||
|
|
|
@ -210,7 +210,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
|||
}
|
||||
switch (program.info.stage) {
|
||||
case Stage::Compute: {
|
||||
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
|
||||
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
||||
execution_model = spv::ExecutionModel::GLCompute;
|
||||
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||
workgroup_size[1], workgroup_size[2]);
|
||||
|
@ -258,8 +258,9 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
std::vector<u32> EmitSPIRV(const Profile& profile, const IR::Program& program, u32& binding) {
|
||||
EmitContext ctx{profile, program.info, binding};
|
||||
std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||
const IR::Program& program, u32& binding) {
|
||||
EmitContext ctx{profile, runtime_info, program.info, binding};
|
||||
const Id main{DefineMain(ctx, program)};
|
||||
DefineEntryPoint(program, ctx, main);
|
||||
if (program.info.stage == Stage::Vertex) {
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const IR::Program& program,
|
||||
u32& binding);
|
||||
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||
const IR::Program& program, u32& binding);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -59,7 +59,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
|||
case IR::Attribute::Position2:
|
||||
case IR::Attribute::Position3: {
|
||||
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
|
||||
return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]);
|
||||
return VsOutputAttrPointer(ctx, ctx.runtime_info.vs_info.outputs[index][element]);
|
||||
}
|
||||
case IR::Attribute::RenderTarget0:
|
||||
case IR::Attribute::RenderTarget1:
|
||||
|
|
|
@ -41,9 +41,10 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
|||
|
||||
} // Anonymous namespace
|
||||
|
||||
EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{info_}, profile{profile_}, stage{info.stage},
|
||||
binding{binding_} {
|
||||
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
||||
const Shader::Info& info_, u32& binding_)
|
||||
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||
profile{profile_}, stage{info.stage}, binding{binding_} {
|
||||
AddCapability(spv::Capability::Shader);
|
||||
DefineArithmeticTypes();
|
||||
DefineInterfaces();
|
||||
|
@ -247,7 +248,7 @@ void EmitContext::DefineInputs() {
|
|||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
for (const auto& input : info.ps_inputs) {
|
||||
for (const auto& input : runtime_info.fs_info.inputs) {
|
||||
const u32 semantic = input.param_index;
|
||||
if (input.is_default && !input.is_flat) {
|
||||
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1],
|
||||
|
@ -554,7 +555,7 @@ void EmitContext::DefineSharedMemory() {
|
|||
if (!info.uses_shared) {
|
||||
return;
|
||||
}
|
||||
u32 shared_memory_size = info.shared_memory_size;
|
||||
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||
if (shared_memory_size == 0) {
|
||||
shared_memory_size = DefaultSharedMemSize;
|
||||
}
|
||||
|
|
|
@ -6,9 +6,9 @@
|
|||
#include <array>
|
||||
#include <sirit/sirit.h>
|
||||
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
|
||||
|
@ -36,7 +36,8 @@ struct VectorIds {
|
|||
|
||||
class EmitContext final : public Sirit::Module {
|
||||
public:
|
||||
explicit EmitContext(const Profile& profile, const Shader::Info& info, u32& binding);
|
||||
explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||
const Shader::Info& info, u32& binding);
|
||||
~EmitContext();
|
||||
|
||||
Id Def(const IR::Value& value);
|
||||
|
@ -125,6 +126,7 @@ public:
|
|||
}
|
||||
|
||||
const Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
Stage stage{};
|
||||
|
||||
|
|
|
@ -602,13 +602,14 @@ public:
|
|||
Common::ObjectPool<IR::Block>& block_pool_,
|
||||
Common::ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
||||
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
|
||||
Info& info_, const Profile& profile_)
|
||||
Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
||||
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} {
|
||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_},
|
||||
runtime_info{runtime_info_}, profile{profile_} {
|
||||
Visit(root_stmt, nullptr, nullptr);
|
||||
|
||||
IR::Block& first_block{*syntax_list.front().data.block};
|
||||
Translator{&first_block, info, profile}.EmitPrologue();
|
||||
Translator{&first_block, info, runtime_info, profile}.EmitPrologue();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -637,7 +638,7 @@ private:
|
|||
const u32 start = stmt.block->begin_index;
|
||||
const u32 size = stmt.block->end_index - start + 1;
|
||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
||||
info, profile);
|
||||
info, runtime_info, profile);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -817,19 +818,20 @@ private:
|
|||
const Block dummy_flow_block{.is_dummy = true};
|
||||
std::span<const GcnInst> inst_list;
|
||||
Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg, Info& info,
|
||||
const Profile& profile) {
|
||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
Common::ObjectPool<Statement> stmt_pool{64};
|
||||
GotoPass goto_pass{cfg, stmt_pool};
|
||||
Statement& root{goto_pass.RootStatement()};
|
||||
IR::AbstractSyntaxList syntax_list;
|
||||
TranslatePass{inst_pool, block_pool, stmt_pool, root,
|
||||
syntax_list, cfg.inst_list, info, profile};
|
||||
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list,
|
||||
cfg.inst_list, info, runtime_info, profile};
|
||||
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
|
||||
return syntax_list;
|
||||
}
|
||||
|
|
|
@ -11,12 +11,14 @@
|
|||
namespace Shader {
|
||||
struct Info;
|
||||
struct Profile;
|
||||
struct RuntimeInfo;
|
||||
} // namespace Shader
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
||||
Info& info, const Profile& profile);
|
||||
Info& info, const RuntimeInfo& runtime_info,
|
||||
const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/specialization.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
|
@ -25,7 +25,7 @@ void Translator::EmitExport(const GcnInst& inst) {
|
|||
return comp;
|
||||
}
|
||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||
switch (info.mrt_swizzles[index]) {
|
||||
switch (runtime_info.fs_info.mrt_swizzles[index]) {
|
||||
case MrtSwizzle::Identity:
|
||||
return comp;
|
||||
case MrtSwizzle::Alt:
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
|
@ -16,8 +17,9 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_)
|
||||
: ir{*block_, block_->begin()}, info{info_}, profile{profile_} {}
|
||||
Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_,
|
||||
const Profile& profile_)
|
||||
: ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {}
|
||||
|
||||
void Translator::EmitPrologue() {
|
||||
ir.Prologue();
|
||||
|
@ -25,7 +27,7 @@ void Translator::EmitPrologue() {
|
|||
|
||||
// Initialize user data.
|
||||
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
||||
for (u32 i = 0; i < info.num_user_data; i++) {
|
||||
for (u32 i = 0; i < runtime_info.num_user_data; i++) {
|
||||
ir.SetScalarReg(dst_sreg, ir.GetUserData(dst_sreg));
|
||||
++dst_sreg;
|
||||
}
|
||||
|
@ -36,15 +38,15 @@ void Translator::EmitPrologue() {
|
|||
// v0: vertex ID, always present
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||
// v1: instance ID, step rate 0
|
||||
if (info.num_input_vgprs > 0) {
|
||||
if (runtime_info.num_input_vgprs > 0) {
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0));
|
||||
}
|
||||
// v2: instance ID, step rate 1
|
||||
if (info.num_input_vgprs > 1) {
|
||||
if (runtime_info.num_input_vgprs > 1) {
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1));
|
||||
}
|
||||
// v3: instance ID, plain
|
||||
if (info.num_input_vgprs > 2) {
|
||||
if (runtime_info.num_input_vgprs > 2) {
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
||||
}
|
||||
break;
|
||||
|
@ -64,13 +66,13 @@ void Translator::EmitPrologue() {
|
|||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||
|
||||
if (info.tgid_enable[0]) {
|
||||
if (runtime_info.cs_info.tgid_enable[0]) {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0));
|
||||
}
|
||||
if (info.tgid_enable[1]) {
|
||||
if (runtime_info.cs_info.tgid_enable[1]) {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1));
|
||||
}
|
||||
if (info.tgid_enable[2]) {
|
||||
if (runtime_info.cs_info.tgid_enable[2]) {
|
||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||
}
|
||||
break;
|
||||
|
@ -452,11 +454,11 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
|
||||
const Profile& profile) {
|
||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
Translator translator{block, info, profile};
|
||||
Translator translator{block, info, runtime_info, profile};
|
||||
for (const auto& inst : inst_list) {
|
||||
pc += inst.length;
|
||||
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
#include <span>
|
||||
#include "shader_recompiler/frontend/instruction.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
|
@ -55,7 +55,8 @@ enum class NegateMode : u32 {
|
|||
|
||||
class Translator {
|
||||
public:
|
||||
explicit Translator(IR::Block* block_, Info& info, const Profile& profile);
|
||||
explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info,
|
||||
const Profile& profile);
|
||||
|
||||
// Instruction categories
|
||||
void EmitPrologue();
|
||||
|
@ -237,12 +238,13 @@ private:
|
|||
private:
|
||||
IR::IREmitter ir;
|
||||
Info& info;
|
||||
const RuntimeInfo& runtime_info;
|
||||
const Profile& profile;
|
||||
IR::U32 m0_value;
|
||||
bool opcode_missing = false;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
|
||||
const Profile& profile);
|
||||
const RuntimeInfo& runtime_info, const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -7,14 +7,14 @@ namespace Shader::Gcn {
|
|||
|
||||
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
|
||||
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||
}
|
||||
|
||||
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
|
||||
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||
}
|
||||
|
|
232
src/shader_recompiler/info.h
Normal file
232
src/shader_recompiler/info.h
Normal file
|
@ -0,0 +1,232 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
static constexpr size_t NumUserDataRegs = 16;
|
||||
|
||||
enum class TextureType : u32 {
|
||||
Color1D,
|
||||
ColorArray1D,
|
||||
Color2D,
|
||||
ColorArray2D,
|
||||
Color3D,
|
||||
ColorCube,
|
||||
Buffer,
|
||||
};
|
||||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||
|
||||
struct Info;
|
||||
|
||||
struct BufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
IR::Type used_types;
|
||||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_instance_data{};
|
||||
bool is_written{};
|
||||
|
||||
bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
return buffer.GetSize() > MaxUboSize || is_written;
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
|
||||
|
||||
struct TextureBufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_written{};
|
||||
|
||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using TextureBufferResourceList = boost::container::small_vector<TextureBufferResource, 16>;
|
||||
|
||||
struct ImageResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
AmdGpu::ImageType type;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_storage;
|
||||
bool is_depth;
|
||||
bool is_atomic{};
|
||||
|
||||
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;
|
||||
|
||||
struct SamplerResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
AmdGpu::Sampler inline_sampler{};
|
||||
u32 associated_image : 4;
|
||||
u32 disable_aniso : 1;
|
||||
|
||||
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;
|
||||
|
||||
struct PushData {
|
||||
static constexpr size_t BufOffsetIndex = 2;
|
||||
|
||||
u32 step0;
|
||||
u32 step1;
|
||||
std::array<u8, 32> buf_offsets;
|
||||
|
||||
void AddOffset(u32 binding, u32 offset) {
|
||||
ASSERT(offset < 256 && binding < buf_offsets.size());
|
||||
buf_offsets[binding] = offset;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Contains general information generated by the shader recompiler for an input program.
|
||||
*/
|
||||
struct Info {
|
||||
struct VsInput {
|
||||
enum InstanceIdType : u8 {
|
||||
None = 0,
|
||||
OverStepRate0 = 1,
|
||||
OverStepRate1 = 2,
|
||||
Plain = 3,
|
||||
};
|
||||
|
||||
AmdGpu::NumberFormat fmt;
|
||||
u16 binding;
|
||||
u16 num_components;
|
||||
u8 sgpr_base;
|
||||
u8 dword_offset;
|
||||
InstanceIdType instance_step_rate;
|
||||
s32 instance_data_buf;
|
||||
};
|
||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||
|
||||
struct AttributeFlags {
|
||||
bool Get(IR::Attribute attrib, u32 comp = 0) const {
|
||||
return flags[Index(attrib)] & (1 << comp);
|
||||
}
|
||||
|
||||
bool GetAny(IR::Attribute attrib) const {
|
||||
return flags[Index(attrib)];
|
||||
}
|
||||
|
||||
void Set(IR::Attribute attrib, u32 comp = 0) {
|
||||
flags[Index(attrib)] |= (1 << comp);
|
||||
}
|
||||
|
||||
u32 NumComponents(IR::Attribute attrib) const {
|
||||
return 4;
|
||||
}
|
||||
|
||||
static size_t Index(IR::Attribute attrib) {
|
||||
return static_cast<size_t>(attrib);
|
||||
}
|
||||
|
||||
std::array<u8, IR::NumAttributes> flags;
|
||||
};
|
||||
AttributeFlags loads{};
|
||||
AttributeFlags stores{};
|
||||
|
||||
s8 vertex_offset_sgpr = -1;
|
||||
s8 instance_offset_sgpr = -1;
|
||||
|
||||
BufferResourceList buffers;
|
||||
TextureBufferResourceList texture_buffers;
|
||||
ImageResourceList images;
|
||||
SamplerResourceList samplers;
|
||||
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
|
||||
u64 pgm_hash{};
|
||||
VAddr pgm_base;
|
||||
bool has_storage_images{};
|
||||
bool has_image_buffers{};
|
||||
bool has_texel_buffers{};
|
||||
bool has_discard{};
|
||||
bool has_image_gather{};
|
||||
bool has_image_query{};
|
||||
bool uses_lane_id{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_shared{};
|
||||
bool uses_fp16{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
|
||||
explicit Info(Stage stage_, ShaderParams params)
|
||||
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||
user_data{params.user_data} {}
|
||||
|
||||
template <typename T>
|
||||
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
|
||||
T data;
|
||||
const u32* base = user_data.data();
|
||||
if (ptr_index != IR::NumScalarRegs) {
|
||||
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
|
||||
}
|
||||
std::memcpy(&data, base + dword_offset, sizeof(T));
|
||||
return data;
|
||||
}
|
||||
|
||||
size_t NumBindings() const noexcept {
|
||||
return buffers.size() + texture_buffers.size() + images.size() + samplers.size();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept {
|
||||
u32 vertex_offset = 0;
|
||||
u32 instance_offset = 0;
|
||||
if (vertex_offset_sgpr != -1) {
|
||||
vertex_offset = user_data[vertex_offset_sgpr];
|
||||
}
|
||||
if (instance_offset_sgpr != -1) {
|
||||
instance_offset = user_data[instance_offset_sgpr];
|
||||
}
|
||||
return {vertex_offset, instance_offset};
|
||||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Shader::Stage> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(const Shader::Stage stage, format_context& ctx) const {
|
||||
constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"};
|
||||
return fmt::format_to(ctx.out(), "{}", names[static_cast<size_t>(stage)]);
|
||||
}
|
||||
};
|
|
@ -4,11 +4,11 @@
|
|||
#include <algorithm>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "common/alignment.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||
#include "shader_recompiler/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir/program.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
|
|
|
@ -5,9 +5,9 @@
|
|||
|
||||
#include <string>
|
||||
#include "shader_recompiler/frontend/instruction.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "shader_recompiler/ir/abstract_syntax_list.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
|
|
26
src/shader_recompiler/params.h
Normal file
26
src/shader_recompiler/params.h
Normal file
|
@ -0,0 +1,26 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include "common/types.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
/**
|
||||
* Compilation parameters used to identify and locate a guest shader program.
|
||||
*/
|
||||
struct ShaderParams {
|
||||
static constexpr u32 NumShaderUserData = 16;
|
||||
|
||||
std::span<const u32, NumShaderUserData> user_data;
|
||||
std::span<const u32> code;
|
||||
u64 hash;
|
||||
|
||||
VAddr Base() const noexcept {
|
||||
return reinterpret_cast<VAddr>(code.data());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Shader
|
|
@ -28,29 +28,32 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
|||
return blocks;
|
||||
}
|
||||
|
||||
IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool, std::span<const u32> token,
|
||||
Info& info, const Profile& profile) {
|
||||
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||
// Ensure first instruction is expected.
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||
ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||
|
||||
Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size());
|
||||
Gcn::GcnCodeSlice slice(code.data(), code.data() + code.size());
|
||||
Gcn::GcnDecodeContext decoder;
|
||||
|
||||
// Decode and save instructions
|
||||
IR::Program program{info};
|
||||
program.ins_list.reserve(token.size());
|
||||
program.ins_list.reserve(code.size());
|
||||
while (!slice.atEnd()) {
|
||||
program.ins_list.emplace_back(decoder.decodeInstruction(slice));
|
||||
}
|
||||
|
||||
// Clear any previous pooled data.
|
||||
pools.ReleaseContents();
|
||||
|
||||
// Create control flow graph
|
||||
Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
|
||||
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
|
||||
|
||||
// Structurize control flow graph and create program.
|
||||
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile);
|
||||
program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg,
|
||||
program.info, runtime_info, profile);
|
||||
program.blocks = GenerateBlocks(program.syntax_list);
|
||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
||||
|
@ -64,7 +67,6 @@ IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
|||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||
Shader::Optimization::CollectShaderInfoPass(program);
|
||||
LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
|
||||
|
||||
return program;
|
||||
}
|
||||
|
|
|
@ -10,10 +10,24 @@
|
|||
namespace Shader {
|
||||
|
||||
struct Profile;
|
||||
struct RuntimeInfo;
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||
Common::ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> code, Info& info,
|
||||
const Profile& profile);
|
||||
struct Pools {
|
||||
static constexpr u32 InstPoolSize = 8192;
|
||||
static constexpr u32 BlockPoolSize = 32;
|
||||
|
||||
Common::ObjectPool<IR::Inst> inst_pool;
|
||||
Common::ObjectPool<IR::Block> block_pool;
|
||||
|
||||
explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {}
|
||||
|
||||
void ReleaseContents() {
|
||||
inst_pool.ReleaseContents();
|
||||
block_pool.ReleaseContents();
|
||||
}
|
||||
};
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||
const RuntimeInfo& runtime_info, const Profile& profile);
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -3,28 +3,14 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <algorithm>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/ir/attribute.h"
|
||||
#include "shader_recompiler/ir/reg.h"
|
||||
#include "shader_recompiler/ir/type.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
static constexpr size_t NumUserDataRegs = 16;
|
||||
static constexpr size_t MaxColorBuffers = 8;
|
||||
|
||||
enum class MrtSwizzle : u32 {
|
||||
Identity = 0,
|
||||
Alt = 1,
|
||||
Reverse = 2,
|
||||
ReverseAlt = 3,
|
||||
};
|
||||
|
||||
enum class Stage : u32 {
|
||||
Fragment,
|
||||
Vertex,
|
||||
|
@ -37,21 +23,18 @@ enum class Stage : u32 {
|
|||
constexpr u32 MaxStageTypes = 6;
|
||||
|
||||
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
|
||||
return static_cast<Stage>(static_cast<size_t>(Stage::Vertex) + index);
|
||||
return static_cast<Stage>(index);
|
||||
}
|
||||
|
||||
enum class TextureType : u32 {
|
||||
Color1D,
|
||||
ColorArray1D,
|
||||
Color2D,
|
||||
ColorArray2D,
|
||||
Color3D,
|
||||
ColorCube,
|
||||
Buffer,
|
||||
enum class MrtSwizzle : u8 {
|
||||
Identity = 0,
|
||||
Alt = 1,
|
||||
Reverse = 2,
|
||||
ReverseAlt = 3,
|
||||
};
|
||||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||
static constexpr u32 MaxColorBuffers = 8;
|
||||
|
||||
enum class VsOutput : u32 {
|
||||
enum class VsOutput : u8 {
|
||||
None,
|
||||
PointSprite,
|
||||
EdgeFlag,
|
||||
|
@ -78,212 +61,67 @@ enum class VsOutput : u32 {
|
|||
};
|
||||
using VsOutputMap = std::array<VsOutput, 4>;
|
||||
|
||||
struct Info;
|
||||
struct VertexRuntimeInfo {
|
||||
boost::container::static_vector<VsOutputMap, 3> outputs;
|
||||
|
||||
struct BufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
IR::Type used_types;
|
||||
AmdGpu::Buffer inline_cbuf;
|
||||
bool is_instance_data{};
|
||||
bool is_written{};
|
||||
|
||||
bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
|
||||
static constexpr size_t MaxUboSize = 65536;
|
||||
return buffer.GetSize() > MaxUboSize || is_written;
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
|
||||
|
||||
struct TextureBufferResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_written{};
|
||||
|
||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using TextureBufferResourceList = boost::container::small_vector<TextureBufferResource, 16>;
|
||||
|
||||
struct ImageResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
AmdGpu::ImageType type;
|
||||
AmdGpu::NumberFormat nfmt;
|
||||
bool is_storage;
|
||||
bool is_depth;
|
||||
bool is_atomic{};
|
||||
|
||||
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;
|
||||
|
||||
struct SamplerResource {
|
||||
u32 sgpr_base;
|
||||
u32 dword_offset;
|
||||
AmdGpu::Sampler inline_sampler{};
|
||||
u32 associated_image : 4;
|
||||
u32 disable_aniso : 1;
|
||||
|
||||
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
|
||||
};
|
||||
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;
|
||||
|
||||
struct PushData {
|
||||
static constexpr size_t BufOffsetIndex = 2;
|
||||
|
||||
u32 step0;
|
||||
u32 step1;
|
||||
std::array<u8, 32> buf_offsets;
|
||||
|
||||
void AddOffset(u32 binding, u32 offset) {
|
||||
ASSERT(offset < 256 && binding < buf_offsets.size());
|
||||
buf_offsets[binding] = offset;
|
||||
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
struct Info {
|
||||
struct VsInput {
|
||||
enum InstanceIdType : u8 {
|
||||
None = 0,
|
||||
OverStepRate0 = 1,
|
||||
OverStepRate1 = 2,
|
||||
Plain = 3,
|
||||
};
|
||||
|
||||
AmdGpu::NumberFormat fmt;
|
||||
u16 binding;
|
||||
u16 num_components;
|
||||
u8 sgpr_base;
|
||||
u8 dword_offset;
|
||||
InstanceIdType instance_step_rate;
|
||||
s32 instance_data_buf;
|
||||
};
|
||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||
|
||||
struct FragmentRuntimeInfo {
|
||||
struct PsInput {
|
||||
u32 param_index;
|
||||
u8 param_index;
|
||||
bool is_default;
|
||||
bool is_flat;
|
||||
u32 default_value;
|
||||
u8 default_value;
|
||||
|
||||
auto operator<=>(const PsInput&) const noexcept = default;
|
||||
};
|
||||
boost::container::static_vector<PsInput, 32> ps_inputs{};
|
||||
boost::container::static_vector<PsInput, 32> inputs;
|
||||
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
|
||||
|
||||
struct AttributeFlags {
|
||||
bool Get(IR::Attribute attrib, u32 comp = 0) const {
|
||||
return flags[Index(attrib)] & (1 << comp);
|
||||
}
|
||||
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||
return std::ranges::equal(mrt_swizzles, other.mrt_swizzles) &&
|
||||
std::ranges::equal(inputs, other.inputs);
|
||||
}
|
||||
};
|
||||
|
||||
bool GetAny(IR::Attribute attrib) const {
|
||||
return flags[Index(attrib)];
|
||||
}
|
||||
|
||||
void Set(IR::Attribute attrib, u32 comp = 0) {
|
||||
flags[Index(attrib)] |= (1 << comp);
|
||||
}
|
||||
|
||||
u32 NumComponents(IR::Attribute attrib) const {
|
||||
return 4;
|
||||
}
|
||||
|
||||
static size_t Index(IR::Attribute attrib) {
|
||||
return static_cast<size_t>(attrib);
|
||||
}
|
||||
|
||||
std::array<u8, IR::NumAttributes> flags;
|
||||
};
|
||||
AttributeFlags loads{};
|
||||
AttributeFlags stores{};
|
||||
boost::container::static_vector<VsOutputMap, 3> vs_outputs;
|
||||
|
||||
s8 vertex_offset_sgpr = -1;
|
||||
s8 instance_offset_sgpr = -1;
|
||||
|
||||
BufferResourceList buffers;
|
||||
TextureBufferResourceList texture_buffers;
|
||||
ImageResourceList images;
|
||||
SamplerResourceList samplers;
|
||||
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
struct ComputeRuntimeInfo {
|
||||
u32 shared_memory_size;
|
||||
std::array<u32, 3> workgroup_size;
|
||||
std::array<bool, 3> tgid_enable;
|
||||
|
||||
bool operator==(const ComputeRuntimeInfo& other) const noexcept {
|
||||
return workgroup_size == other.workgroup_size && tgid_enable == other.tgid_enable;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Stores information relevant to shader compilation sourced from liverpool registers.
|
||||
* It may potentially differ with the same shader module so must be checked.
|
||||
* It's also possible to store any other custom information that needs to be part of shader key.
|
||||
*/
|
||||
struct RuntimeInfo {
|
||||
Stage stage;
|
||||
u32 num_user_data;
|
||||
u32 num_input_vgprs;
|
||||
std::span<const u32> user_data;
|
||||
Stage stage;
|
||||
VertexRuntimeInfo vs_info;
|
||||
FragmentRuntimeInfo fs_info;
|
||||
ComputeRuntimeInfo cs_info;
|
||||
|
||||
uintptr_t pgm_base{};
|
||||
u64 pgm_hash{};
|
||||
u32 shared_memory_size{};
|
||||
bool has_storage_images{};
|
||||
bool has_image_buffers{};
|
||||
bool has_texel_buffers{};
|
||||
bool has_discard{};
|
||||
bool has_image_gather{};
|
||||
bool has_image_query{};
|
||||
bool uses_lane_id{};
|
||||
bool uses_group_quad{};
|
||||
bool uses_shared{};
|
||||
bool uses_fp16{};
|
||||
bool uses_step_rates{};
|
||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||
RuntimeInfo(Stage stage_) : stage{stage_} {}
|
||||
|
||||
template <typename T>
|
||||
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
|
||||
T data;
|
||||
const u32* base = user_data.data();
|
||||
if (ptr_index != IR::NumScalarRegs) {
|
||||
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
|
||||
bool operator==(const RuntimeInfo& other) const noexcept {
|
||||
if (stage == Stage::Fragment) {
|
||||
return fs_info == other.fs_info;
|
||||
} else if (stage == Stage::Vertex) {
|
||||
return vs_info == other.vs_info;
|
||||
} else if (stage == Stage::Compute) {
|
||||
return cs_info == other.cs_info;
|
||||
}
|
||||
std::memcpy(&data, base + dword_offset, sizeof(T));
|
||||
return data;
|
||||
}
|
||||
|
||||
size_t NumBindings() const noexcept {
|
||||
return buffers.size() + texture_buffers.size() + images.size() + samplers.size();
|
||||
}
|
||||
|
||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept {
|
||||
u32 vertex_offset = 0;
|
||||
u32 instance_offset = 0;
|
||||
if (vertex_offset_sgpr != -1) {
|
||||
vertex_offset = user_data[vertex_offset_sgpr];
|
||||
}
|
||||
if (instance_offset_sgpr != -1) {
|
||||
instance_offset = user_data[instance_offset_sgpr];
|
||||
}
|
||||
return {vertex_offset, instance_offset};
|
||||
UNREACHABLE();
|
||||
}
|
||||
};
|
||||
|
||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
||||
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
||||
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
|
||||
}
|
||||
|
||||
} // namespace Shader
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<Shader::Stage> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
auto format(const Shader::Stage stage, format_context& ctx) const {
|
||||
constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"};
|
||||
return fmt::format_to(ctx.out(), "{}", names[static_cast<size_t>(stage)]);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#include <bitset>
|
||||
|
||||
#include "common/types.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
|
@ -30,18 +30,42 @@ struct ImageSpecialization {
|
|||
auto operator<=>(const ImageSpecialization&) const = default;
|
||||
};
|
||||
|
||||
/**
|
||||
* Alongside runtime information, this structure also checks bound resources
|
||||
* for compatibility. Can be used as a key for storing shader permutations.
|
||||
* Is separate from runtime information, because resource layout can only be deduced
|
||||
* after the first compilation of a module.
|
||||
*/
|
||||
struct StageSpecialization {
|
||||
static constexpr size_t MaxStageResources = 32;
|
||||
|
||||
const Shader::Info* info;
|
||||
RuntimeInfo runtime_info;
|
||||
std::bitset<MaxStageResources> bitset{};
|
||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||
boost::container::small_vector<ImageSpecialization, 8> images;
|
||||
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
|
||||
u32 start_binding{};
|
||||
|
||||
explicit StageSpecialization(const Shader::Info& info_) : info{&info_} {}
|
||||
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
||||
u32 start_binding_)
|
||||
: info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} {
|
||||
u32 binding{};
|
||||
ForEachSharp(binding, buffers, info->buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.stride = sharp.GetStride();
|
||||
spec.is_storage = desc.IsStorage(sharp);
|
||||
});
|
||||
ForEachSharp(binding, tex_buffers, info->texture_buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
ForEachSharp(binding, images, info->images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.GetType();
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
}
|
||||
|
||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||
for (const auto& desc : desc_list) {
|
||||
|
@ -60,8 +84,7 @@ struct StageSpecialization {
|
|||
if (start_binding != other.start_binding) {
|
||||
return false;
|
||||
}
|
||||
if (info->stage == Shader::Stage::Fragment &&
|
||||
!std::ranges::equal(mrt_swizzles, other.mrt_swizzles)) {
|
||||
if (runtime_info != other.runtime_info) {
|
||||
return false;
|
||||
}
|
||||
u32 binding{};
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "common/polyfill_thread.h"
|
||||
#include "common/types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "shader_recompiler/params.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
#include "video_core/amdgpu/resource.h"
|
||||
|
||||
|
@ -171,6 +172,15 @@ struct Liverpool {
|
|||
return bininfo;
|
||||
}
|
||||
|
||||
static constexpr Shader::ShaderParams GetParams(const auto& sh) {
|
||||
auto* bininfo = GetBinaryInfo(sh);
|
||||
return {
|
||||
.user_data = sh.user_data,
|
||||
.code = sh.Code(),
|
||||
.hash = bininfo->shader_hash,
|
||||
};
|
||||
}
|
||||
|
||||
union PsInputControl {
|
||||
u32 raw;
|
||||
BitField<0, 5, u32> input_offset;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#include <algorithm>
|
||||
#include "common/alignment.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/amdgpu/liverpool.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/renderer_vulkan/vk_common.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
|
|
@ -7,8 +7,7 @@
|
|||
#include "common/io_file.h"
|
||||
#include "common/path_util.h"
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/runtime_info.h"
|
||||
#include "shader_recompiler/info.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
|
@ -21,11 +20,16 @@ namespace Vulkan {
|
|||
|
||||
using Shader::VsOutput;
|
||||
|
||||
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
}
|
||||
|
||||
void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
||||
const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||
w != VsOutput::None) {
|
||||
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||
info.outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||
}
|
||||
};
|
||||
// VS_OUT_MISC_VEC
|
||||
|
@ -65,14 +69,51 @@ void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl
|
|||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||
}
|
||||
|
||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, const GraphicsPipelineKey& key,
|
||||
const AmdGpu::Liverpool::Regs& regs) {
|
||||
auto info = Shader::RuntimeInfo{stage};
|
||||
switch (stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
std::ranges::transform(key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(),
|
||||
[](Liverpool::ColorBuffer::SwapMode mode) {
|
||||
return static_cast<Shader::MrtSwizzle>(mode);
|
||||
});
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.fs_info.inputs.push_back({
|
||||
.param_index = u8(regs.ps_inputs[i].input_offset.Value()),
|
||||
.is_default = bool(regs.ps_inputs[i].use_default),
|
||||
.is_flat = bool(regs.ps_inputs[i].flat_shade),
|
||||
.default_value = u8(regs.ps_inputs[i].default_value),
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||
cs_pgm.IsTgidEnabled(2)};
|
||||
info.cs_info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||
AmdGpu::Liverpool* liverpool_)
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
|
||||
block_pool{512} {
|
||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_} {
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
|
@ -243,9 +284,9 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
if (ShouldSkipShader(bininfo->shader_hash, "graphics")) {
|
||||
return false;
|
||||
}
|
||||
const auto stage = Shader::Stage{i};
|
||||
const auto guest_pgm = GuestProgram{pgm, stage};
|
||||
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(guest_pgm, binding);
|
||||
const auto stage = Shader::StageFromIndex(i);
|
||||
const auto params = Liverpool::GetParams(*pgm);
|
||||
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
@ -253,149 +294,68 @@ bool PipelineCache::RefreshGraphicsKey() {
|
|||
bool PipelineCache::RefreshComputeKey() {
|
||||
u32 binding{};
|
||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||
const GuestProgram guest_pgm{cs_pgm, Shader::Stage::Compute};
|
||||
if (ShouldSkipShader(guest_pgm.hash, "compute")) {
|
||||
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
||||
if (ShouldSkipShader(cs_params.hash, "compute")) {
|
||||
return false;
|
||||
}
|
||||
std::tie(infos[0], modules[0], compute_key) = GetProgram(guest_pgm, binding);
|
||||
std::tie(infos[0], modules[0], compute_key) =
|
||||
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
||||
return true;
|
||||
}
|
||||
|
||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span<const u32> code,
|
||||
size_t perm_idx, u32& binding) {
|
||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
||||
const Shader::RuntimeInfo& runtime_info,
|
||||
std::span<const u32> code, size_t perm_idx,
|
||||
u32& binding) {
|
||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||
perm_idx != 0 ? "(permutation)" : "");
|
||||
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
|
||||
}
|
||||
|
||||
block_pool.ReleaseContents();
|
||||
inst_pool.ReleaseContents();
|
||||
const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
|
||||
const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile);
|
||||
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding);
|
||||
if (Config::dumpShaders()) {
|
||||
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
|
||||
}
|
||||
|
||||
// Create module and set name to hash in renderdoc
|
||||
const auto module = CompileSPV(spv, instance.GetDevice());
|
||||
ASSERT(module != VK_NULL_HANDLE);
|
||||
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
|
||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||
return module;
|
||||
}
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram(
|
||||
const GuestProgram& pgm, u32& binding) {
|
||||
auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash);
|
||||
Shader::Stage stage, Shader::ShaderParams params, u32& binding) {
|
||||
const auto runtime_info = BuildRuntimeInfo(stage, graphics_key, liverpool->regs);
|
||||
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
||||
if (new_program) {
|
||||
Program* program = program_pool.Create(BuildShaderInfo(pgm, liverpool->regs));
|
||||
Program* program = program_pool.Create(stage, params);
|
||||
u32 start_binding = binding;
|
||||
const auto module = CompileModule(program->info, pgm.code, 0, binding);
|
||||
const auto spec = BuildStageSpec(program->info, start_binding);
|
||||
program->modules.emplace_back(module, spec);
|
||||
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start_binding);
|
||||
program->AddPermut(module, std::move(spec));
|
||||
it_pgm.value() = program;
|
||||
return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0));
|
||||
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
|
||||
}
|
||||
|
||||
Program* program = it_pgm->second;
|
||||
const auto& info = program->info;
|
||||
const auto spec = BuildStageSpec(info, binding);
|
||||
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
|
||||
size_t perm_idx = program->modules.size();
|
||||
vk::ShaderModule module{};
|
||||
|
||||
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
||||
if (it == program->modules.end()) {
|
||||
auto new_info = BuildShaderInfo(pgm, liverpool->regs);
|
||||
module = CompileModule(new_info, pgm.code, perm_idx, binding);
|
||||
program->modules.emplace_back(module, std::move(spec));
|
||||
auto new_info = Shader::Info(stage, params);
|
||||
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
|
||||
program->AddPermut(module, std::move(spec));
|
||||
} else {
|
||||
binding += info.NumBindings();
|
||||
module = it->module;
|
||||
perm_idx = std::distance(program->modules.begin(), it);
|
||||
}
|
||||
return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx));
|
||||
}
|
||||
|
||||
Shader::Info PipelineCache::BuildShaderInfo(const GuestProgram& pgm,
|
||||
const AmdGpu::Liverpool::Regs& regs) {
|
||||
Shader::Info info{};
|
||||
info.user_data = pgm.user_data;
|
||||
info.pgm_base = VAddr(pgm.code.data());
|
||||
info.pgm_hash = pgm.hash;
|
||||
info.stage = pgm.stage;
|
||||
switch (pgm.stage) {
|
||||
case Shader::Stage::Vertex: {
|
||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||
BuildVsOutputs(info, regs.vs_output_control);
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Fragment: {
|
||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||
std::ranges::transform(graphics_key.mrt_swizzles, info.mrt_swizzles.begin(),
|
||||
[](Liverpool::ColorBuffer::SwapMode mode) {
|
||||
return static_cast<Shader::MrtSwizzle>(mode);
|
||||
});
|
||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||
info.ps_inputs.push_back({
|
||||
.param_index = regs.ps_inputs[i].input_offset.Value(),
|
||||
.is_default = bool(regs.ps_inputs[i].use_default),
|
||||
.is_flat = bool(regs.ps_inputs[i].flat_shade),
|
||||
.default_value = regs.ps_inputs[i].default_value,
|
||||
});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case Shader::Stage::Compute: {
|
||||
const auto& cs_pgm = regs.cs_program;
|
||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||
cs_pgm.num_thread_z.full};
|
||||
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||
cs_pgm.IsTgidEnabled(2)};
|
||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return info;
|
||||
}
|
||||
|
||||
Shader::StageSpecialization PipelineCache::BuildStageSpec(const Shader::Info& info,
|
||||
u32 start_binding) {
|
||||
u32 binding{};
|
||||
auto spec = Shader::StageSpecialization{info};
|
||||
spec.start_binding = start_binding;
|
||||
spec.ForEachSharp(binding, spec.buffers, info.buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.stride = sharp.GetStride();
|
||||
spec.is_storage = desc.IsStorage(sharp);
|
||||
});
|
||||
spec.ForEachSharp(binding, spec.tex_buffers, info.texture_buffers,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
spec.ForEachSharp(binding, spec.images, info.images,
|
||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||
spec.type = sharp.GetType();
|
||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||
});
|
||||
switch (info.stage) {
|
||||
case Shader::Stage::Fragment:
|
||||
std::ranges::transform(graphics_key.mrt_swizzles, spec.mrt_swizzles.begin(),
|
||||
[](Liverpool::ColorBuffer::SwapMode mode) {
|
||||
return static_cast<Shader::MrtSwizzle>(mode);
|
||||
});
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return spec;
|
||||
return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx));
|
||||
}
|
||||
|
||||
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||
|
|
|
@ -4,9 +4,8 @@
|
|||
#pragma once
|
||||
|
||||
#include <tsl/robin_map.h>
|
||||
#include "common/object_pool.h"
|
||||
#include "shader_recompiler/ir/basic_block.h"
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "shader_recompiler/recompiler.h"
|
||||
#include "shader_recompiler/specialization.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
|
@ -30,19 +29,10 @@ struct Program {
|
|||
Shader::Info info;
|
||||
boost::container::small_vector<Module, 8> modules;
|
||||
|
||||
explicit Program(const Shader::Info& info_) : info{info_} {}
|
||||
};
|
||||
explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {}
|
||||
|
||||
struct GuestProgram {
|
||||
Shader::Stage stage;
|
||||
std::span<const u32, AmdGpu::Liverpool::NumShaderUserData> user_data;
|
||||
std::span<const u32> code;
|
||||
u64 hash;
|
||||
|
||||
explicit GuestProgram(const auto* pgm, Shader::Stage stage_)
|
||||
: stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} {
|
||||
const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm);
|
||||
hash = bininfo->shader_hash;
|
||||
void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) {
|
||||
modules.emplace_back(module, std::move(spec));
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -58,7 +48,8 @@ public:
|
|||
|
||||
const ComputePipeline* GetComputePipeline();
|
||||
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(const GuestProgram& pgm,
|
||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(Shader::Stage stage,
|
||||
Shader::ShaderParams params,
|
||||
u32& binding);
|
||||
|
||||
private:
|
||||
|
@ -67,11 +58,8 @@ private:
|
|||
|
||||
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
|
||||
std::string_view ext);
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, std::span<const u32> code, size_t perm_idx,
|
||||
u32& binding);
|
||||
|
||||
Shader::Info BuildShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs);
|
||||
Shader::StageSpecialization BuildStageSpec(const Shader::Info& info, u32 binding);
|
||||
vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
|
||||
std::span<const u32> code, size_t perm_idx, u32& binding);
|
||||
|
||||
private:
|
||||
const Instance& instance;
|
||||
|
@ -80,9 +68,8 @@ private:
|
|||
vk::UniquePipelineCache pipeline_cache;
|
||||
vk::UniquePipelineLayout pipeline_layout;
|
||||
Shader::Profile profile{};
|
||||
Shader::Pools pools;
|
||||
tsl::robin_map<size_t, Program*> program_cache;
|
||||
Common::ObjectPool<Shader::IR::Inst> inst_pool;
|
||||
Common::ObjectPool<Shader::IR::Block> block_pool;
|
||||
Common::ObjectPool<Program> program_pool;
|
||||
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
|
||||
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||
|
|
Loading…
Add table
Reference in a new issue