mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-08-02 22:28:45 +00:00
video_core: Refactor shader recompiler interface
* Makes it much easier to pass runtime information to the recompiler and have it treated as part of the shader key. Also pulls out most runtime state from Info struct
This commit is contained in:
parent
1a7767788c
commit
f7a8ceb395
25 changed files with 517 additions and 411 deletions
|
@ -419,6 +419,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
|
||||||
src/shader_recompiler/profile.h
|
src/shader_recompiler/profile.h
|
||||||
src/shader_recompiler/recompiler.cpp
|
src/shader_recompiler/recompiler.cpp
|
||||||
src/shader_recompiler/recompiler.h
|
src/shader_recompiler/recompiler.h
|
||||||
|
src/shader_recompiler/info.h
|
||||||
|
src/shader_recompiler/params.h
|
||||||
src/shader_recompiler/runtime_info.h
|
src/shader_recompiler/runtime_info.h
|
||||||
src/shader_recompiler/specialization.h
|
src/shader_recompiler/specialization.h
|
||||||
src/shader_recompiler/backend/spirv/emit_spirv.cpp
|
src/shader_recompiler/backend/spirv/emit_spirv.cpp
|
||||||
|
|
|
@ -210,7 +210,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
|
||||||
}
|
}
|
||||||
switch (program.info.stage) {
|
switch (program.info.stage) {
|
||||||
case Stage::Compute: {
|
case Stage::Compute: {
|
||||||
const std::array<u32, 3> workgroup_size{program.info.workgroup_size};
|
const std::array<u32, 3> workgroup_size{ctx.runtime_info.cs_info.workgroup_size};
|
||||||
execution_model = spv::ExecutionModel::GLCompute;
|
execution_model = spv::ExecutionModel::GLCompute;
|
||||||
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
ctx.AddExecutionMode(main, spv::ExecutionMode::LocalSize, workgroup_size[0],
|
||||||
workgroup_size[1], workgroup_size[2]);
|
workgroup_size[1], workgroup_size[2]);
|
||||||
|
@ -258,8 +258,9 @@ void PatchPhiNodes(const IR::Program& program, EmitContext& ctx) {
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
std::vector<u32> EmitSPIRV(const Profile& profile, const IR::Program& program, u32& binding) {
|
std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||||
EmitContext ctx{profile, program.info, binding};
|
const IR::Program& program, u32& binding) {
|
||||||
|
EmitContext ctx{profile, runtime_info, program.info, binding};
|
||||||
const Id main{DefineMain(ctx, program)};
|
const Id main{DefineMain(ctx, program)};
|
||||||
DefineEntryPoint(program, ctx, main);
|
DefineEntryPoint(program, ctx, main);
|
||||||
if (program.info.stage == Stage::Vertex) {
|
if (program.info.stage == Stage::Vertex) {
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const IR::Program& program,
|
[[nodiscard]] std::vector<u32> EmitSPIRV(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||||
u32& binding);
|
const IR::Program& program, u32& binding);
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -59,7 +59,7 @@ Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
|
||||||
case IR::Attribute::Position2:
|
case IR::Attribute::Position2:
|
||||||
case IR::Attribute::Position3: {
|
case IR::Attribute::Position3: {
|
||||||
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
|
const u32 index = u32(attr) - u32(IR::Attribute::Position1);
|
||||||
return VsOutputAttrPointer(ctx, ctx.info.vs_outputs[index][element]);
|
return VsOutputAttrPointer(ctx, ctx.runtime_info.vs_info.outputs[index][element]);
|
||||||
}
|
}
|
||||||
case IR::Attribute::RenderTarget0:
|
case IR::Attribute::RenderTarget0:
|
||||||
case IR::Attribute::RenderTarget1:
|
case IR::Attribute::RenderTarget1:
|
||||||
|
|
|
@ -41,9 +41,10 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
EmitContext::EmitContext(const Profile& profile_, const Shader::Info& info_, u32& binding_)
|
EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_info_,
|
||||||
: Sirit::Module(profile_.supported_spirv), info{info_}, profile{profile_}, stage{info.stage},
|
const Shader::Info& info_, u32& binding_)
|
||||||
binding{binding_} {
|
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
|
||||||
|
profile{profile_}, stage{info.stage}, binding{binding_} {
|
||||||
AddCapability(spv::Capability::Shader);
|
AddCapability(spv::Capability::Shader);
|
||||||
DefineArithmeticTypes();
|
DefineArithmeticTypes();
|
||||||
DefineInterfaces();
|
DefineInterfaces();
|
||||||
|
@ -247,7 +248,7 @@ void EmitContext::DefineInputs() {
|
||||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||||
for (const auto& input : info.ps_inputs) {
|
for (const auto& input : runtime_info.fs_info.inputs) {
|
||||||
const u32 semantic = input.param_index;
|
const u32 semantic = input.param_index;
|
||||||
if (input.is_default && !input.is_flat) {
|
if (input.is_default && !input.is_flat) {
|
||||||
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1],
|
input_params[semantic] = {MakeDefaultValue(*this, input.default_value), F32[1],
|
||||||
|
@ -554,7 +555,7 @@ void EmitContext::DefineSharedMemory() {
|
||||||
if (!info.uses_shared) {
|
if (!info.uses_shared) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
u32 shared_memory_size = info.shared_memory_size;
|
u32 shared_memory_size = runtime_info.cs_info.shared_memory_size;
|
||||||
if (shared_memory_size == 0) {
|
if (shared_memory_size == 0) {
|
||||||
shared_memory_size = DefaultSharedMemSize;
|
shared_memory_size = DefaultSharedMemSize;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,9 +6,9 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <sirit/sirit.h>
|
#include <sirit/sirit.h>
|
||||||
|
|
||||||
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/profile.h"
|
#include "shader_recompiler/profile.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
|
||||||
|
|
||||||
namespace Shader::Backend::SPIRV {
|
namespace Shader::Backend::SPIRV {
|
||||||
|
|
||||||
|
@ -36,7 +36,8 @@ struct VectorIds {
|
||||||
|
|
||||||
class EmitContext final : public Sirit::Module {
|
class EmitContext final : public Sirit::Module {
|
||||||
public:
|
public:
|
||||||
explicit EmitContext(const Profile& profile, const Shader::Info& info, u32& binding);
|
explicit EmitContext(const Profile& profile, const RuntimeInfo& runtime_info,
|
||||||
|
const Shader::Info& info, u32& binding);
|
||||||
~EmitContext();
|
~EmitContext();
|
||||||
|
|
||||||
Id Def(const IR::Value& value);
|
Id Def(const IR::Value& value);
|
||||||
|
@ -125,6 +126,7 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
const Info& info;
|
const Info& info;
|
||||||
|
const RuntimeInfo& runtime_info;
|
||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
Stage stage{};
|
Stage stage{};
|
||||||
|
|
||||||
|
|
|
@ -602,13 +602,14 @@ public:
|
||||||
Common::ObjectPool<IR::Block>& block_pool_,
|
Common::ObjectPool<IR::Block>& block_pool_,
|
||||||
Common::ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
Common::ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
||||||
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
|
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
|
||||||
Info& info_, const Profile& profile_)
|
Info& info_, const RuntimeInfo& runtime_info_, const Profile& profile_)
|
||||||
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
||||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} {
|
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_},
|
||||||
|
runtime_info{runtime_info_}, profile{profile_} {
|
||||||
Visit(root_stmt, nullptr, nullptr);
|
Visit(root_stmt, nullptr, nullptr);
|
||||||
|
|
||||||
IR::Block& first_block{*syntax_list.front().data.block};
|
IR::Block& first_block{*syntax_list.front().data.block};
|
||||||
Translator{&first_block, info, profile}.EmitPrologue();
|
Translator{&first_block, info, runtime_info, profile}.EmitPrologue();
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -637,7 +638,7 @@ private:
|
||||||
const u32 start = stmt.block->begin_index;
|
const u32 start = stmt.block->begin_index;
|
||||||
const u32 size = stmt.block->end_index - start + 1;
|
const u32 size = stmt.block->end_index - start + 1;
|
||||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
||||||
info, profile);
|
info, runtime_info, profile);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -817,19 +818,20 @@ private:
|
||||||
const Block dummy_flow_block{.is_dummy = true};
|
const Block dummy_flow_block{.is_dummy = true};
|
||||||
std::span<const GcnInst> inst_list;
|
std::span<const GcnInst> inst_list;
|
||||||
Info& info;
|
Info& info;
|
||||||
|
const RuntimeInfo& runtime_info;
|
||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
};
|
};
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||||
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg, Info& info,
|
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg, Info& info,
|
||||||
const Profile& profile) {
|
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||||
Common::ObjectPool<Statement> stmt_pool{64};
|
Common::ObjectPool<Statement> stmt_pool{64};
|
||||||
GotoPass goto_pass{cfg, stmt_pool};
|
GotoPass goto_pass{cfg, stmt_pool};
|
||||||
Statement& root{goto_pass.RootStatement()};
|
Statement& root{goto_pass.RootStatement()};
|
||||||
IR::AbstractSyntaxList syntax_list;
|
IR::AbstractSyntaxList syntax_list;
|
||||||
TranslatePass{inst_pool, block_pool, stmt_pool, root,
|
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list,
|
||||||
syntax_list, cfg.inst_list, info, profile};
|
cfg.inst_list, info, runtime_info, profile};
|
||||||
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
|
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
|
||||||
return syntax_list;
|
return syntax_list;
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,12 +11,14 @@
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
struct Info;
|
struct Info;
|
||||||
struct Profile;
|
struct Profile;
|
||||||
|
struct RuntimeInfo;
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
[[nodiscard]] IR::AbstractSyntaxList BuildASL(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||||
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
Common::ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
||||||
Info& info, const Profile& profile);
|
Info& info, const RuntimeInfo& runtime_info,
|
||||||
|
const Profile& profile);
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
#include "shader_recompiler/specialization.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
|
@ -25,7 +25,7 @@ void Translator::EmitExport(const GcnInst& inst) {
|
||||||
return comp;
|
return comp;
|
||||||
}
|
}
|
||||||
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
const u32 index = u32(attrib) - u32(IR::Attribute::RenderTarget0);
|
||||||
switch (info.mrt_swizzles[index]) {
|
switch (runtime_info.fs_info.mrt_swizzles[index]) {
|
||||||
case MrtSwizzle::Identity:
|
case MrtSwizzle::Identity:
|
||||||
return comp;
|
return comp;
|
||||||
case MrtSwizzle::Alt:
|
case MrtSwizzle::Alt:
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include "shader_recompiler/exception.h"
|
#include "shader_recompiler/exception.h"
|
||||||
#include "shader_recompiler/frontend/fetch_shader.h"
|
#include "shader_recompiler/frontend/fetch_shader.h"
|
||||||
#include "shader_recompiler/frontend/translate/translate.h"
|
#include "shader_recompiler/frontend/translate/translate.h"
|
||||||
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/runtime_info.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
|
|
||||||
|
@ -16,8 +17,9 @@
|
||||||
|
|
||||||
namespace Shader::Gcn {
|
namespace Shader::Gcn {
|
||||||
|
|
||||||
Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_)
|
Translator::Translator(IR::Block* block_, Info& info_, const RuntimeInfo& runtime_info_,
|
||||||
: ir{*block_, block_->begin()}, info{info_}, profile{profile_} {}
|
const Profile& profile_)
|
||||||
|
: ir{*block_, block_->begin()}, info{info_}, runtime_info{runtime_info_}, profile{profile_} {}
|
||||||
|
|
||||||
void Translator::EmitPrologue() {
|
void Translator::EmitPrologue() {
|
||||||
ir.Prologue();
|
ir.Prologue();
|
||||||
|
@ -25,7 +27,7 @@ void Translator::EmitPrologue() {
|
||||||
|
|
||||||
// Initialize user data.
|
// Initialize user data.
|
||||||
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
IR::ScalarReg dst_sreg = IR::ScalarReg::S0;
|
||||||
for (u32 i = 0; i < info.num_user_data; i++) {
|
for (u32 i = 0; i < runtime_info.num_user_data; i++) {
|
||||||
ir.SetScalarReg(dst_sreg, ir.GetUserData(dst_sreg));
|
ir.SetScalarReg(dst_sreg, ir.GetUserData(dst_sreg));
|
||||||
++dst_sreg;
|
++dst_sreg;
|
||||||
}
|
}
|
||||||
|
@ -36,15 +38,15 @@ void Translator::EmitPrologue() {
|
||||||
// v0: vertex ID, always present
|
// v0: vertex ID, always present
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
|
||||||
// v1: instance ID, step rate 0
|
// v1: instance ID, step rate 0
|
||||||
if (info.num_input_vgprs > 0) {
|
if (runtime_info.num_input_vgprs > 0) {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId0));
|
||||||
}
|
}
|
||||||
// v2: instance ID, step rate 1
|
// v2: instance ID, step rate 1
|
||||||
if (info.num_input_vgprs > 1) {
|
if (runtime_info.num_input_vgprs > 1) {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId1));
|
||||||
}
|
}
|
||||||
// v3: instance ID, plain
|
// v3: instance ID, plain
|
||||||
if (info.num_input_vgprs > 2) {
|
if (runtime_info.num_input_vgprs > 2) {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::InstanceId));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -64,13 +66,13 @@ void Translator::EmitPrologue() {
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 1));
|
||||||
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 2));
|
||||||
|
|
||||||
if (info.tgid_enable[0]) {
|
if (runtime_info.cs_info.tgid_enable[0]) {
|
||||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0));
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 0));
|
||||||
}
|
}
|
||||||
if (info.tgid_enable[1]) {
|
if (runtime_info.cs_info.tgid_enable[1]) {
|
||||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1));
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 1));
|
||||||
}
|
}
|
||||||
if (info.tgid_enable[2]) {
|
if (runtime_info.cs_info.tgid_enable[2]) {
|
||||||
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
ir.SetScalarReg(dst_sreg++, ir.GetAttributeU32(IR::Attribute::WorkgroupId, 2));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -452,11 +454,11 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
|
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info,
|
||||||
const Profile& profile) {
|
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||||
if (inst_list.empty()) {
|
if (inst_list.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
Translator translator{block, info, profile};
|
Translator translator{block, info, runtime_info, profile};
|
||||||
for (const auto& inst : inst_list) {
|
for (const auto& inst : inst_list) {
|
||||||
pc += inst.length;
|
pc += inst.length;
|
||||||
|
|
||||||
|
|
|
@ -5,9 +5,9 @@
|
||||||
|
|
||||||
#include <span>
|
#include <span>
|
||||||
#include "shader_recompiler/frontend/instruction.h"
|
#include "shader_recompiler/frontend/instruction.h"
|
||||||
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
struct Info;
|
struct Info;
|
||||||
|
@ -55,7 +55,8 @@ enum class NegateMode : u32 {
|
||||||
|
|
||||||
class Translator {
|
class Translator {
|
||||||
public:
|
public:
|
||||||
explicit Translator(IR::Block* block_, Info& info, const Profile& profile);
|
explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info,
|
||||||
|
const Profile& profile);
|
||||||
|
|
||||||
// Instruction categories
|
// Instruction categories
|
||||||
void EmitPrologue();
|
void EmitPrologue();
|
||||||
|
@ -237,12 +238,13 @@ private:
|
||||||
private:
|
private:
|
||||||
IR::IREmitter ir;
|
IR::IREmitter ir;
|
||||||
Info& info;
|
Info& info;
|
||||||
|
const RuntimeInfo& runtime_info;
|
||||||
const Profile& profile;
|
const Profile& profile;
|
||||||
IR::U32 m0_value;
|
IR::U32 m0_value;
|
||||||
bool opcode_missing = false;
|
bool opcode_missing = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
|
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info,
|
||||||
const Profile& profile);
|
const RuntimeInfo& runtime_info, const Profile& profile);
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -7,14 +7,14 @@ namespace Shader::Gcn {
|
||||||
|
|
||||||
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
|
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||||
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
void Translator::V_INTERP_MOV_F32(const GcnInst& inst) {
|
||||||
const IR::VectorReg dst_reg{inst.dst[0].code};
|
const IR::VectorReg dst_reg{inst.dst[0].code};
|
||||||
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
|
auto& attr = runtime_info.fs_info.inputs.at(inst.control.vintrp.attr);
|
||||||
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
|
||||||
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
|
||||||
}
|
}
|
||||||
|
|
232
src/shader_recompiler/info.h
Normal file
232
src/shader_recompiler/info.h
Normal file
|
@ -0,0 +1,232 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <span>
|
||||||
|
#include <boost/container/small_vector.hpp>
|
||||||
|
#include <boost/container/static_vector.hpp>
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/types.h"
|
||||||
|
#include "shader_recompiler/ir/attribute.h"
|
||||||
|
#include "shader_recompiler/ir/reg.h"
|
||||||
|
#include "shader_recompiler/ir/type.h"
|
||||||
|
#include "shader_recompiler/params.h"
|
||||||
|
#include "shader_recompiler/runtime_info.h"
|
||||||
|
#include "video_core/amdgpu/resource.h"
|
||||||
|
|
||||||
|
namespace Shader {
|
||||||
|
|
||||||
|
static constexpr size_t NumUserDataRegs = 16;
|
||||||
|
|
||||||
|
enum class TextureType : u32 {
|
||||||
|
Color1D,
|
||||||
|
ColorArray1D,
|
||||||
|
Color2D,
|
||||||
|
ColorArray2D,
|
||||||
|
Color3D,
|
||||||
|
ColorCube,
|
||||||
|
Buffer,
|
||||||
|
};
|
||||||
|
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
||||||
|
|
||||||
|
struct Info;
|
||||||
|
|
||||||
|
struct BufferResource {
|
||||||
|
u32 sgpr_base;
|
||||||
|
u32 dword_offset;
|
||||||
|
IR::Type used_types;
|
||||||
|
AmdGpu::Buffer inline_cbuf;
|
||||||
|
bool is_instance_data{};
|
||||||
|
bool is_written{};
|
||||||
|
|
||||||
|
bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
|
||||||
|
static constexpr size_t MaxUboSize = 65536;
|
||||||
|
return buffer.GetSize() > MaxUboSize || is_written;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||||
|
};
|
||||||
|
using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
|
||||||
|
|
||||||
|
struct TextureBufferResource {
|
||||||
|
u32 sgpr_base;
|
||||||
|
u32 dword_offset;
|
||||||
|
AmdGpu::NumberFormat nfmt;
|
||||||
|
bool is_written{};
|
||||||
|
|
||||||
|
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
||||||
|
};
|
||||||
|
using TextureBufferResourceList = boost::container::small_vector<TextureBufferResource, 16>;
|
||||||
|
|
||||||
|
struct ImageResource {
|
||||||
|
u32 sgpr_base;
|
||||||
|
u32 dword_offset;
|
||||||
|
AmdGpu::ImageType type;
|
||||||
|
AmdGpu::NumberFormat nfmt;
|
||||||
|
bool is_storage;
|
||||||
|
bool is_depth;
|
||||||
|
bool is_atomic{};
|
||||||
|
|
||||||
|
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
|
||||||
|
};
|
||||||
|
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;
|
||||||
|
|
||||||
|
struct SamplerResource {
|
||||||
|
u32 sgpr_base;
|
||||||
|
u32 dword_offset;
|
||||||
|
AmdGpu::Sampler inline_sampler{};
|
||||||
|
u32 associated_image : 4;
|
||||||
|
u32 disable_aniso : 1;
|
||||||
|
|
||||||
|
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
|
||||||
|
};
|
||||||
|
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;
|
||||||
|
|
||||||
|
struct PushData {
|
||||||
|
static constexpr size_t BufOffsetIndex = 2;
|
||||||
|
|
||||||
|
u32 step0;
|
||||||
|
u32 step1;
|
||||||
|
std::array<u8, 32> buf_offsets;
|
||||||
|
|
||||||
|
void AddOffset(u32 binding, u32 offset) {
|
||||||
|
ASSERT(offset < 256 && binding < buf_offsets.size());
|
||||||
|
buf_offsets[binding] = offset;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Contains general information generated by the shader recompiler for an input program.
|
||||||
|
*/
|
||||||
|
struct Info {
|
||||||
|
struct VsInput {
|
||||||
|
enum InstanceIdType : u8 {
|
||||||
|
None = 0,
|
||||||
|
OverStepRate0 = 1,
|
||||||
|
OverStepRate1 = 2,
|
||||||
|
Plain = 3,
|
||||||
|
};
|
||||||
|
|
||||||
|
AmdGpu::NumberFormat fmt;
|
||||||
|
u16 binding;
|
||||||
|
u16 num_components;
|
||||||
|
u8 sgpr_base;
|
||||||
|
u8 dword_offset;
|
||||||
|
InstanceIdType instance_step_rate;
|
||||||
|
s32 instance_data_buf;
|
||||||
|
};
|
||||||
|
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
||||||
|
|
||||||
|
struct AttributeFlags {
|
||||||
|
bool Get(IR::Attribute attrib, u32 comp = 0) const {
|
||||||
|
return flags[Index(attrib)] & (1 << comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GetAny(IR::Attribute attrib) const {
|
||||||
|
return flags[Index(attrib)];
|
||||||
|
}
|
||||||
|
|
||||||
|
void Set(IR::Attribute attrib, u32 comp = 0) {
|
||||||
|
flags[Index(attrib)] |= (1 << comp);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 NumComponents(IR::Attribute attrib) const {
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t Index(IR::Attribute attrib) {
|
||||||
|
return static_cast<size_t>(attrib);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<u8, IR::NumAttributes> flags;
|
||||||
|
};
|
||||||
|
AttributeFlags loads{};
|
||||||
|
AttributeFlags stores{};
|
||||||
|
|
||||||
|
s8 vertex_offset_sgpr = -1;
|
||||||
|
s8 instance_offset_sgpr = -1;
|
||||||
|
|
||||||
|
BufferResourceList buffers;
|
||||||
|
TextureBufferResourceList texture_buffers;
|
||||||
|
ImageResourceList images;
|
||||||
|
SamplerResourceList samplers;
|
||||||
|
|
||||||
|
std::span<const u32> user_data;
|
||||||
|
Stage stage;
|
||||||
|
|
||||||
|
u64 pgm_hash{};
|
||||||
|
VAddr pgm_base;
|
||||||
|
bool has_storage_images{};
|
||||||
|
bool has_image_buffers{};
|
||||||
|
bool has_texel_buffers{};
|
||||||
|
bool has_discard{};
|
||||||
|
bool has_image_gather{};
|
||||||
|
bool has_image_query{};
|
||||||
|
bool uses_lane_id{};
|
||||||
|
bool uses_group_quad{};
|
||||||
|
bool uses_shared{};
|
||||||
|
bool uses_fp16{};
|
||||||
|
bool uses_step_rates{};
|
||||||
|
bool translation_failed{}; // indicates that shader has unsupported instructions
|
||||||
|
|
||||||
|
explicit Info(Stage stage_, ShaderParams params)
|
||||||
|
: stage{stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},
|
||||||
|
user_data{params.user_data} {}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
|
||||||
|
T data;
|
||||||
|
const u32* base = user_data.data();
|
||||||
|
if (ptr_index != IR::NumScalarRegs) {
|
||||||
|
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
|
||||||
|
}
|
||||||
|
std::memcpy(&data, base + dword_offset, sizeof(T));
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t NumBindings() const noexcept {
|
||||||
|
return buffers.size() + texture_buffers.size() + images.size() + samplers.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept {
|
||||||
|
u32 vertex_offset = 0;
|
||||||
|
u32 instance_offset = 0;
|
||||||
|
if (vertex_offset_sgpr != -1) {
|
||||||
|
vertex_offset = user_data[vertex_offset_sgpr];
|
||||||
|
}
|
||||||
|
if (instance_offset_sgpr != -1) {
|
||||||
|
instance_offset = user_data[instance_offset_sgpr];
|
||||||
|
}
|
||||||
|
return {vertex_offset, instance_offset};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
||||||
|
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
|
||||||
|
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
||||||
|
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
||||||
|
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Shader
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<Shader::Stage> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
|
auto format(const Shader::Stage stage, format_context& ctx) const {
|
||||||
|
constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"};
|
||||||
|
return fmt::format_to(ctx.out(), "{}", names[static_cast<size_t>(stage)]);
|
||||||
|
}
|
||||||
|
};
|
|
@ -4,11 +4,11 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/ir/breadth_first_search.h"
|
#include "shader_recompiler/ir/breadth_first_search.h"
|
||||||
#include "shader_recompiler/ir/ir_emitter.h"
|
#include "shader_recompiler/ir/ir_emitter.h"
|
||||||
#include "shader_recompiler/ir/program.h"
|
#include "shader_recompiler/ir/program.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
|
|
||||||
namespace Shader::Optimization {
|
namespace Shader::Optimization {
|
||||||
|
|
|
@ -5,9 +5,9 @@
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include "shader_recompiler/frontend/instruction.h"
|
#include "shader_recompiler/frontend/instruction.h"
|
||||||
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/ir/abstract_syntax_list.h"
|
#include "shader_recompiler/ir/abstract_syntax_list.h"
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
#include "shader_recompiler/ir/basic_block.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
|
||||||
|
|
||||||
namespace Shader::IR {
|
namespace Shader::IR {
|
||||||
|
|
||||||
|
|
26
src/shader_recompiler/params.h
Normal file
26
src/shader_recompiler/params.h
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <span>
|
||||||
|
#include "common/types.h"
|
||||||
|
|
||||||
|
namespace Shader {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compilation parameters used to identify and locate a guest shader program.
|
||||||
|
*/
|
||||||
|
struct ShaderParams {
|
||||||
|
static constexpr u32 NumShaderUserData = 16;
|
||||||
|
|
||||||
|
std::span<const u32, NumShaderUserData> user_data;
|
||||||
|
std::span<const u32> code;
|
||||||
|
u64 hash;
|
||||||
|
|
||||||
|
VAddr Base() const noexcept {
|
||||||
|
return reinterpret_cast<VAddr>(code.data());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Shader
|
|
@ -28,29 +28,32 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
||||||
return blocks;
|
return blocks;
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||||
Common::ObjectPool<IR::Block>& block_pool, std::span<const u32> token,
|
const RuntimeInfo& runtime_info, const Profile& profile) {
|
||||||
Info& info, const Profile& profile) {
|
|
||||||
// Ensure first instruction is expected.
|
// Ensure first instruction is expected.
|
||||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||||
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
ASSERT_MSG(code[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||||
|
|
||||||
Gcn::GcnCodeSlice slice(token.data(), token.data() + token.size());
|
Gcn::GcnCodeSlice slice(code.data(), code.data() + code.size());
|
||||||
Gcn::GcnDecodeContext decoder;
|
Gcn::GcnDecodeContext decoder;
|
||||||
|
|
||||||
// Decode and save instructions
|
// Decode and save instructions
|
||||||
IR::Program program{info};
|
IR::Program program{info};
|
||||||
program.ins_list.reserve(token.size());
|
program.ins_list.reserve(code.size());
|
||||||
while (!slice.atEnd()) {
|
while (!slice.atEnd()) {
|
||||||
program.ins_list.emplace_back(decoder.decodeInstruction(slice));
|
program.ins_list.emplace_back(decoder.decodeInstruction(slice));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clear any previous pooled data.
|
||||||
|
pools.ReleaseContents();
|
||||||
|
|
||||||
// Create control flow graph
|
// Create control flow graph
|
||||||
Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
|
Common::ObjectPool<Gcn::Block> gcn_block_pool{64};
|
||||||
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
|
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
|
||||||
|
|
||||||
// Structurize control flow graph and create program.
|
// Structurize control flow graph and create program.
|
||||||
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile);
|
program.syntax_list = Shader::Gcn::BuildASL(pools.inst_pool, pools.block_pool, cfg,
|
||||||
|
program.info, runtime_info, profile);
|
||||||
program.blocks = GenerateBlocks(program.syntax_list);
|
program.blocks = GenerateBlocks(program.syntax_list);
|
||||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||||
|
|
||||||
|
@ -64,7 +67,6 @@ IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||||
Shader::Optimization::CollectShaderInfoPass(program);
|
Shader::Optimization::CollectShaderInfoPass(program);
|
||||||
LOG_DEBUG(Render_Vulkan, "{}", Shader::IR::DumpProgram(program));
|
|
||||||
|
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,10 +10,24 @@
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
struct Profile;
|
struct Profile;
|
||||||
|
struct RuntimeInfo;
|
||||||
|
|
||||||
[[nodiscard]] IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
struct Pools {
|
||||||
Common::ObjectPool<IR::Block>& block_pool,
|
static constexpr u32 InstPoolSize = 8192;
|
||||||
std::span<const u32> code, Info& info,
|
static constexpr u32 BlockPoolSize = 32;
|
||||||
const Profile& profile);
|
|
||||||
|
Common::ObjectPool<IR::Inst> inst_pool;
|
||||||
|
Common::ObjectPool<IR::Block> block_pool;
|
||||||
|
|
||||||
|
explicit Pools() : inst_pool{InstPoolSize}, block_pool{BlockPoolSize} {}
|
||||||
|
|
||||||
|
void ReleaseContents() {
|
||||||
|
inst_pool.ReleaseContents();
|
||||||
|
block_pool.ReleaseContents();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
[[nodiscard]] IR::Program TranslateProgram(std::span<const u32> code, Pools& pools, Info& info,
|
||||||
|
const RuntimeInfo& runtime_info, const Profile& profile);
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
|
@ -3,28 +3,14 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <span>
|
#include <algorithm>
|
||||||
#include <boost/container/small_vector.hpp>
|
|
||||||
#include <boost/container/static_vector.hpp>
|
#include <boost/container/static_vector.hpp>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/ir/attribute.h"
|
|
||||||
#include "shader_recompiler/ir/reg.h"
|
|
||||||
#include "shader_recompiler/ir/type.h"
|
|
||||||
#include "video_core/amdgpu/resource.h"
|
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
static constexpr size_t NumUserDataRegs = 16;
|
|
||||||
static constexpr size_t MaxColorBuffers = 8;
|
|
||||||
|
|
||||||
enum class MrtSwizzle : u32 {
|
|
||||||
Identity = 0,
|
|
||||||
Alt = 1,
|
|
||||||
Reverse = 2,
|
|
||||||
ReverseAlt = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum class Stage : u32 {
|
enum class Stage : u32 {
|
||||||
Fragment,
|
Fragment,
|
||||||
Vertex,
|
Vertex,
|
||||||
|
@ -37,21 +23,18 @@ enum class Stage : u32 {
|
||||||
constexpr u32 MaxStageTypes = 6;
|
constexpr u32 MaxStageTypes = 6;
|
||||||
|
|
||||||
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
|
[[nodiscard]] constexpr Stage StageFromIndex(size_t index) noexcept {
|
||||||
return static_cast<Stage>(static_cast<size_t>(Stage::Vertex) + index);
|
return static_cast<Stage>(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
enum class TextureType : u32 {
|
enum class MrtSwizzle : u8 {
|
||||||
Color1D,
|
Identity = 0,
|
||||||
ColorArray1D,
|
Alt = 1,
|
||||||
Color2D,
|
Reverse = 2,
|
||||||
ColorArray2D,
|
ReverseAlt = 3,
|
||||||
Color3D,
|
|
||||||
ColorCube,
|
|
||||||
Buffer,
|
|
||||||
};
|
};
|
||||||
constexpr u32 NUM_TEXTURE_TYPES = 7;
|
static constexpr u32 MaxColorBuffers = 8;
|
||||||
|
|
||||||
enum class VsOutput : u32 {
|
enum class VsOutput : u8 {
|
||||||
None,
|
None,
|
||||||
PointSprite,
|
PointSprite,
|
||||||
EdgeFlag,
|
EdgeFlag,
|
||||||
|
@ -78,212 +61,67 @@ enum class VsOutput : u32 {
|
||||||
};
|
};
|
||||||
using VsOutputMap = std::array<VsOutput, 4>;
|
using VsOutputMap = std::array<VsOutput, 4>;
|
||||||
|
|
||||||
struct Info;
|
struct VertexRuntimeInfo {
|
||||||
|
boost::container::static_vector<VsOutputMap, 3> outputs;
|
||||||
|
|
||||||
struct BufferResource {
|
bool operator==(const VertexRuntimeInfo& other) const noexcept {
|
||||||
u32 sgpr_base;
|
return true;
|
||||||
u32 dword_offset;
|
|
||||||
IR::Type used_types;
|
|
||||||
AmdGpu::Buffer inline_cbuf;
|
|
||||||
bool is_instance_data{};
|
|
||||||
bool is_written{};
|
|
||||||
|
|
||||||
bool IsStorage(AmdGpu::Buffer buffer) const noexcept {
|
|
||||||
static constexpr size_t MaxUboSize = 65536;
|
|
||||||
return buffer.GetSize() > MaxUboSize || is_written;
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
|
||||||
};
|
|
||||||
using BufferResourceList = boost::container::small_vector<BufferResource, 16>;
|
|
||||||
|
|
||||||
struct TextureBufferResource {
|
|
||||||
u32 sgpr_base;
|
|
||||||
u32 dword_offset;
|
|
||||||
AmdGpu::NumberFormat nfmt;
|
|
||||||
bool is_written{};
|
|
||||||
|
|
||||||
constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept;
|
|
||||||
};
|
|
||||||
using TextureBufferResourceList = boost::container::small_vector<TextureBufferResource, 16>;
|
|
||||||
|
|
||||||
struct ImageResource {
|
|
||||||
u32 sgpr_base;
|
|
||||||
u32 dword_offset;
|
|
||||||
AmdGpu::ImageType type;
|
|
||||||
AmdGpu::NumberFormat nfmt;
|
|
||||||
bool is_storage;
|
|
||||||
bool is_depth;
|
|
||||||
bool is_atomic{};
|
|
||||||
|
|
||||||
constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept;
|
|
||||||
};
|
|
||||||
using ImageResourceList = boost::container::small_vector<ImageResource, 16>;
|
|
||||||
|
|
||||||
struct SamplerResource {
|
|
||||||
u32 sgpr_base;
|
|
||||||
u32 dword_offset;
|
|
||||||
AmdGpu::Sampler inline_sampler{};
|
|
||||||
u32 associated_image : 4;
|
|
||||||
u32 disable_aniso : 1;
|
|
||||||
|
|
||||||
constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept;
|
|
||||||
};
|
|
||||||
using SamplerResourceList = boost::container::small_vector<SamplerResource, 16>;
|
|
||||||
|
|
||||||
struct PushData {
|
|
||||||
static constexpr size_t BufOffsetIndex = 2;
|
|
||||||
|
|
||||||
u32 step0;
|
|
||||||
u32 step1;
|
|
||||||
std::array<u8, 32> buf_offsets;
|
|
||||||
|
|
||||||
void AddOffset(u32 binding, u32 offset) {
|
|
||||||
ASSERT(offset < 256 && binding < buf_offsets.size());
|
|
||||||
buf_offsets[binding] = offset;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Info {
|
struct FragmentRuntimeInfo {
|
||||||
struct VsInput {
|
|
||||||
enum InstanceIdType : u8 {
|
|
||||||
None = 0,
|
|
||||||
OverStepRate0 = 1,
|
|
||||||
OverStepRate1 = 2,
|
|
||||||
Plain = 3,
|
|
||||||
};
|
|
||||||
|
|
||||||
AmdGpu::NumberFormat fmt;
|
|
||||||
u16 binding;
|
|
||||||
u16 num_components;
|
|
||||||
u8 sgpr_base;
|
|
||||||
u8 dword_offset;
|
|
||||||
InstanceIdType instance_step_rate;
|
|
||||||
s32 instance_data_buf;
|
|
||||||
};
|
|
||||||
boost::container::static_vector<VsInput, 32> vs_inputs{};
|
|
||||||
|
|
||||||
struct PsInput {
|
struct PsInput {
|
||||||
u32 param_index;
|
u8 param_index;
|
||||||
bool is_default;
|
bool is_default;
|
||||||
bool is_flat;
|
bool is_flat;
|
||||||
u32 default_value;
|
u8 default_value;
|
||||||
|
|
||||||
|
auto operator<=>(const PsInput&) const noexcept = default;
|
||||||
};
|
};
|
||||||
boost::container::static_vector<PsInput, 32> ps_inputs{};
|
boost::container::static_vector<PsInput, 32> inputs;
|
||||||
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
|
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
|
||||||
|
|
||||||
struct AttributeFlags {
|
bool operator==(const FragmentRuntimeInfo& other) const noexcept {
|
||||||
bool Get(IR::Attribute attrib, u32 comp = 0) const {
|
return std::ranges::equal(mrt_swizzles, other.mrt_swizzles) &&
|
||||||
return flags[Index(attrib)] & (1 << comp);
|
std::ranges::equal(inputs, other.inputs);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
bool GetAny(IR::Attribute attrib) const {
|
struct ComputeRuntimeInfo {
|
||||||
return flags[Index(attrib)];
|
u32 shared_memory_size;
|
||||||
}
|
std::array<u32, 3> workgroup_size;
|
||||||
|
|
||||||
void Set(IR::Attribute attrib, u32 comp = 0) {
|
|
||||||
flags[Index(attrib)] |= (1 << comp);
|
|
||||||
}
|
|
||||||
|
|
||||||
u32 NumComponents(IR::Attribute attrib) const {
|
|
||||||
return 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t Index(IR::Attribute attrib) {
|
|
||||||
return static_cast<size_t>(attrib);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::array<u8, IR::NumAttributes> flags;
|
|
||||||
};
|
|
||||||
AttributeFlags loads{};
|
|
||||||
AttributeFlags stores{};
|
|
||||||
boost::container::static_vector<VsOutputMap, 3> vs_outputs;
|
|
||||||
|
|
||||||
s8 vertex_offset_sgpr = -1;
|
|
||||||
s8 instance_offset_sgpr = -1;
|
|
||||||
|
|
||||||
BufferResourceList buffers;
|
|
||||||
TextureBufferResourceList texture_buffers;
|
|
||||||
ImageResourceList images;
|
|
||||||
SamplerResourceList samplers;
|
|
||||||
|
|
||||||
std::array<u32, 3> workgroup_size{};
|
|
||||||
std::array<bool, 3> tgid_enable;
|
std::array<bool, 3> tgid_enable;
|
||||||
|
|
||||||
|
bool operator==(const ComputeRuntimeInfo& other) const noexcept {
|
||||||
|
return workgroup_size == other.workgroup_size && tgid_enable == other.tgid_enable;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores information relevant to shader compilation sourced from liverpool registers.
|
||||||
|
* It may potentially differ with the same shader module so must be checked.
|
||||||
|
* It's also possible to store any other custom information that needs to be part of shader key.
|
||||||
|
*/
|
||||||
|
struct RuntimeInfo {
|
||||||
|
Stage stage;
|
||||||
u32 num_user_data;
|
u32 num_user_data;
|
||||||
u32 num_input_vgprs;
|
u32 num_input_vgprs;
|
||||||
std::span<const u32> user_data;
|
VertexRuntimeInfo vs_info;
|
||||||
Stage stage;
|
FragmentRuntimeInfo fs_info;
|
||||||
|
ComputeRuntimeInfo cs_info;
|
||||||
|
|
||||||
uintptr_t pgm_base{};
|
RuntimeInfo(Stage stage_) : stage{stage_} {}
|
||||||
u64 pgm_hash{};
|
|
||||||
u32 shared_memory_size{};
|
|
||||||
bool has_storage_images{};
|
|
||||||
bool has_image_buffers{};
|
|
||||||
bool has_texel_buffers{};
|
|
||||||
bool has_discard{};
|
|
||||||
bool has_image_gather{};
|
|
||||||
bool has_image_query{};
|
|
||||||
bool uses_lane_id{};
|
|
||||||
bool uses_group_quad{};
|
|
||||||
bool uses_shared{};
|
|
||||||
bool uses_fp16{};
|
|
||||||
bool uses_step_rates{};
|
|
||||||
bool translation_failed{}; // indicates that shader has unsupported instructions
|
|
||||||
|
|
||||||
template <typename T>
|
bool operator==(const RuntimeInfo& other) const noexcept {
|
||||||
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
|
if (stage == Stage::Fragment) {
|
||||||
T data;
|
return fs_info == other.fs_info;
|
||||||
const u32* base = user_data.data();
|
} else if (stage == Stage::Vertex) {
|
||||||
if (ptr_index != IR::NumScalarRegs) {
|
return vs_info == other.vs_info;
|
||||||
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
|
} else if (stage == Stage::Compute) {
|
||||||
|
return cs_info == other.cs_info;
|
||||||
}
|
}
|
||||||
std::memcpy(&data, base + dword_offset, sizeof(T));
|
UNREACHABLE();
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t NumBindings() const noexcept {
|
|
||||||
return buffers.size() + texture_buffers.size() + images.size() + samplers.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
[[nodiscard]] std::pair<u32, u32> GetDrawOffsets() const noexcept {
|
|
||||||
u32 vertex_offset = 0;
|
|
||||||
u32 instance_offset = 0;
|
|
||||||
if (vertex_offset_sgpr != -1) {
|
|
||||||
vertex_offset = user_data[vertex_offset_sgpr];
|
|
||||||
}
|
|
||||||
if (instance_offset_sgpr != -1) {
|
|
||||||
instance_offset = user_data[instance_offset_sgpr];
|
|
||||||
}
|
|
||||||
return {vertex_offset, instance_offset};
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
constexpr AmdGpu::Buffer BufferResource::GetSharp(const Info& info) const noexcept {
|
|
||||||
return inline_cbuf ? inline_cbuf : info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr AmdGpu::Buffer TextureBufferResource::GetSharp(const Info& info) const noexcept {
|
|
||||||
return info.ReadUd<AmdGpu::Buffer>(sgpr_base, dword_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr AmdGpu::Image ImageResource::GetSharp(const Info& info) const noexcept {
|
|
||||||
return info.ReadUd<AmdGpu::Image>(sgpr_base, dword_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr AmdGpu::Sampler SamplerResource::GetSharp(const Info& info) const noexcept {
|
|
||||||
return inline_sampler ? inline_sampler : info.ReadUd<AmdGpu::Sampler>(sgpr_base, dword_offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace Shader
|
} // namespace Shader
|
||||||
|
|
||||||
template <>
|
|
||||||
struct fmt::formatter<Shader::Stage> {
|
|
||||||
constexpr auto parse(format_parse_context& ctx) {
|
|
||||||
return ctx.begin();
|
|
||||||
}
|
|
||||||
auto format(const Shader::Stage stage, format_context& ctx) const {
|
|
||||||
constexpr static std::array names = {"fs", "vs", "gs", "es", "hs", "ls", "cs"};
|
|
||||||
return fmt::format_to(ctx.out(), "{}", names[static_cast<size_t>(stage)]);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/info.h"
|
||||||
|
|
||||||
namespace Shader {
|
namespace Shader {
|
||||||
|
|
||||||
|
@ -30,18 +30,42 @@ struct ImageSpecialization {
|
||||||
auto operator<=>(const ImageSpecialization&) const = default;
|
auto operator<=>(const ImageSpecialization&) const = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Alongside runtime information, this structure also checks bound resources
|
||||||
|
* for compatibility. Can be used as a key for storing shader permutations.
|
||||||
|
* Is separate from runtime information, because resource layout can only be deduced
|
||||||
|
* after the first compilation of a module.
|
||||||
|
*/
|
||||||
struct StageSpecialization {
|
struct StageSpecialization {
|
||||||
static constexpr size_t MaxStageResources = 32;
|
static constexpr size_t MaxStageResources = 32;
|
||||||
|
|
||||||
const Shader::Info* info;
|
const Shader::Info* info;
|
||||||
|
RuntimeInfo runtime_info;
|
||||||
std::bitset<MaxStageResources> bitset{};
|
std::bitset<MaxStageResources> bitset{};
|
||||||
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
boost::container::small_vector<BufferSpecialization, 16> buffers;
|
||||||
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
boost::container::small_vector<TextureBufferSpecialization, 8> tex_buffers;
|
||||||
boost::container::small_vector<ImageSpecialization, 8> images;
|
boost::container::small_vector<ImageSpecialization, 8> images;
|
||||||
std::array<MrtSwizzle, MaxColorBuffers> mrt_swizzles;
|
|
||||||
u32 start_binding{};
|
u32 start_binding{};
|
||||||
|
|
||||||
explicit StageSpecialization(const Shader::Info& info_) : info{&info_} {}
|
explicit StageSpecialization(const Shader::Info& info_, RuntimeInfo runtime_info_,
|
||||||
|
u32 start_binding_)
|
||||||
|
: info{&info_}, runtime_info{runtime_info_}, start_binding{start_binding_} {
|
||||||
|
u32 binding{};
|
||||||
|
ForEachSharp(binding, buffers, info->buffers,
|
||||||
|
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
|
spec.stride = sharp.GetStride();
|
||||||
|
spec.is_storage = desc.IsStorage(sharp);
|
||||||
|
});
|
||||||
|
ForEachSharp(binding, tex_buffers, info->texture_buffers,
|
||||||
|
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
||||||
|
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||||
|
});
|
||||||
|
ForEachSharp(binding, images, info->images,
|
||||||
|
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
||||||
|
spec.type = sharp.GetType();
|
||||||
|
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
void ForEachSharp(u32& binding, auto& spec_list, auto& desc_list, auto&& func) {
|
||||||
for (const auto& desc : desc_list) {
|
for (const auto& desc : desc_list) {
|
||||||
|
@ -60,8 +84,7 @@ struct StageSpecialization {
|
||||||
if (start_binding != other.start_binding) {
|
if (start_binding != other.start_binding) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (info->stage == Shader::Stage::Fragment &&
|
if (runtime_info != other.runtime_info) {
|
||||||
!std::ranges::equal(mrt_swizzles, other.mrt_swizzles)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#include "common/polyfill_thread.h"
|
#include "common/polyfill_thread.h"
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
#include "common/unique_function.h"
|
#include "common/unique_function.h"
|
||||||
|
#include "shader_recompiler/params.h"
|
||||||
#include "video_core/amdgpu/pixel_format.h"
|
#include "video_core/amdgpu/pixel_format.h"
|
||||||
#include "video_core/amdgpu/resource.h"
|
#include "video_core/amdgpu/resource.h"
|
||||||
|
|
||||||
|
@ -171,6 +172,15 @@ struct Liverpool {
|
||||||
return bininfo;
|
return bininfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static constexpr Shader::ShaderParams GetParams(const auto& sh) {
|
||||||
|
auto* bininfo = GetBinaryInfo(sh);
|
||||||
|
return {
|
||||||
|
.user_data = sh.user_data,
|
||||||
|
.code = sh.Code(),
|
||||||
|
.hash = bininfo->shader_hash,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
union PsInputControl {
|
union PsInputControl {
|
||||||
u32 raw;
|
u32 raw;
|
||||||
BitField<0, 5, u32> input_offset;
|
BitField<0, 5, u32> input_offset;
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "video_core/amdgpu/liverpool.h"
|
#include "video_core/amdgpu/liverpool.h"
|
||||||
#include "video_core/buffer_cache/buffer_cache.h"
|
#include "video_core/buffer_cache/buffer_cache.h"
|
||||||
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
#include "video_core/renderer_vulkan/liverpool_to_vk.h"
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include "shader_recompiler/runtime_info.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "video_core/renderer_vulkan/vk_common.h"
|
#include "video_core/renderer_vulkan/vk_common.h"
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
|
@ -7,8 +7,7 @@
|
||||||
#include "common/io_file.h"
|
#include "common/io_file.h"
|
||||||
#include "common/path_util.h"
|
#include "common/path_util.h"
|
||||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||||
#include "shader_recompiler/recompiler.h"
|
#include "shader_recompiler/info.h"
|
||||||
#include "shader_recompiler/runtime_info.h"
|
|
||||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||||
#include "video_core/renderer_vulkan/vk_instance.h"
|
#include "video_core/renderer_vulkan/vk_instance.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
|
@ -21,11 +20,16 @@ namespace Vulkan {
|
||||||
|
|
||||||
using Shader::VsOutput;
|
using Shader::VsOutput;
|
||||||
|
|
||||||
void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
||||||
|
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
void GatherVertexOutputs(Shader::VertexRuntimeInfo& info,
|
||||||
|
const AmdGpu::Liverpool::VsOutputControl& ctl) {
|
||||||
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
const auto add_output = [&](VsOutput x, VsOutput y, VsOutput z, VsOutput w) {
|
||||||
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
if (x != VsOutput::None || y != VsOutput::None || z != VsOutput::None ||
|
||||||
w != VsOutput::None) {
|
w != VsOutput::None) {
|
||||||
info.vs_outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
info.outputs.emplace_back(Shader::VsOutputMap{x, y, z, w});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
// VS_OUT_MISC_VEC
|
// VS_OUT_MISC_VEC
|
||||||
|
@ -65,14 +69,51 @@ void BuildVsOutputs(Shader::Info& info, const AmdGpu::Liverpool::VsOutputControl
|
||||||
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
: (ctl.IsCullDistEnabled(7) ? VsOutput::CullDist7 : VsOutput::None));
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] inline u64 HashCombine(const u64 seed, const u64 hash) {
|
Shader::RuntimeInfo BuildRuntimeInfo(Shader::Stage stage, const GraphicsPipelineKey& key,
|
||||||
return seed ^ (hash + 0x9e3779b9 + (seed << 6) + (seed >> 2));
|
const AmdGpu::Liverpool::Regs& regs) {
|
||||||
|
auto info = Shader::RuntimeInfo{stage};
|
||||||
|
switch (stage) {
|
||||||
|
case Shader::Stage::Vertex: {
|
||||||
|
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
||||||
|
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
||||||
|
GatherVertexOutputs(info.vs_info, regs.vs_output_control);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Shader::Stage::Fragment: {
|
||||||
|
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
||||||
|
std::ranges::transform(key.mrt_swizzles, info.fs_info.mrt_swizzles.begin(),
|
||||||
|
[](Liverpool::ColorBuffer::SwapMode mode) {
|
||||||
|
return static_cast<Shader::MrtSwizzle>(mode);
|
||||||
|
});
|
||||||
|
for (u32 i = 0; i < regs.num_interp; i++) {
|
||||||
|
info.fs_info.inputs.push_back({
|
||||||
|
.param_index = u8(regs.ps_inputs[i].input_offset.Value()),
|
||||||
|
.is_default = bool(regs.ps_inputs[i].use_default),
|
||||||
|
.is_flat = bool(regs.ps_inputs[i].flat_shade),
|
||||||
|
.default_value = u8(regs.ps_inputs[i].default_value),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case Shader::Stage::Compute: {
|
||||||
|
const auto& cs_pgm = regs.cs_program;
|
||||||
|
info.num_user_data = cs_pgm.settings.num_user_regs;
|
||||||
|
info.cs_info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
||||||
|
cs_pgm.num_thread_z.full};
|
||||||
|
info.cs_info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
||||||
|
cs_pgm.IsTgidEnabled(2)};
|
||||||
|
info.cs_info.shared_memory_size = cs_pgm.SharedMemSize();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
return info;
|
||||||
}
|
}
|
||||||
|
|
||||||
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
||||||
AmdGpu::Liverpool* liverpool_)
|
AmdGpu::Liverpool* liverpool_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
|
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_} {
|
||||||
block_pool{512} {
|
|
||||||
profile = Shader::Profile{
|
profile = Shader::Profile{
|
||||||
.supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
|
.supported_spirv = instance.ApiVersion() >= VK_API_VERSION_1_3 ? 0x00010600U : 0x00010500U,
|
||||||
.subgroup_size = instance.SubgroupSize(),
|
.subgroup_size = instance.SubgroupSize(),
|
||||||
|
@ -243,9 +284,9 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||||
if (ShouldSkipShader(bininfo->shader_hash, "graphics")) {
|
if (ShouldSkipShader(bininfo->shader_hash, "graphics")) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const auto stage = Shader::Stage{i};
|
const auto stage = Shader::StageFromIndex(i);
|
||||||
const auto guest_pgm = GuestProgram{pgm, stage};
|
const auto params = Liverpool::GetParams(*pgm);
|
||||||
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(guest_pgm, binding);
|
std::tie(infos[i], modules[i], key.stage_hashes[i]) = GetProgram(stage, params, binding);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -253,149 +294,68 @@ bool PipelineCache::RefreshGraphicsKey() {
|
||||||
bool PipelineCache::RefreshComputeKey() {
|
bool PipelineCache::RefreshComputeKey() {
|
||||||
u32 binding{};
|
u32 binding{};
|
||||||
const auto* cs_pgm = &liverpool->regs.cs_program;
|
const auto* cs_pgm = &liverpool->regs.cs_program;
|
||||||
const GuestProgram guest_pgm{cs_pgm, Shader::Stage::Compute};
|
const auto cs_params = Liverpool::GetParams(*cs_pgm);
|
||||||
if (ShouldSkipShader(guest_pgm.hash, "compute")) {
|
if (ShouldSkipShader(cs_params.hash, "compute")) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::tie(infos[0], modules[0], compute_key) = GetProgram(guest_pgm, binding);
|
std::tie(infos[0], modules[0], compute_key) =
|
||||||
|
GetProgram(Shader::Stage::Compute, cs_params, binding);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info, std::span<const u32> code,
|
vk::ShaderModule PipelineCache::CompileModule(Shader::Info& info,
|
||||||
size_t perm_idx, u32& binding) {
|
const Shader::RuntimeInfo& runtime_info,
|
||||||
|
std::span<const u32> code, size_t perm_idx,
|
||||||
|
u32& binding) {
|
||||||
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
LOG_INFO(Render_Vulkan, "Compiling {} shader {:#x} {}", info.stage, info.pgm_hash,
|
||||||
perm_idx != 0 ? "(permutation)" : "");
|
perm_idx != 0 ? "(permutation)" : "");
|
||||||
|
|
||||||
if (Config::dumpShaders()) {
|
if (Config::dumpShaders()) {
|
||||||
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
|
DumpShader(code, info.pgm_hash, info.stage, perm_idx, "bin");
|
||||||
}
|
}
|
||||||
|
|
||||||
block_pool.ReleaseContents();
|
const auto ir_program = Shader::TranslateProgram(code, pools, info, runtime_info, profile);
|
||||||
inst_pool.ReleaseContents();
|
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, runtime_info, ir_program, binding);
|
||||||
const auto ir_program = Shader::TranslateProgram(inst_pool, block_pool, code, info, profile);
|
|
||||||
|
|
||||||
// Compile IR to SPIR-V
|
|
||||||
const auto spv = Shader::Backend::SPIRV::EmitSPIRV(profile, ir_program, binding);
|
|
||||||
if (Config::dumpShaders()) {
|
if (Config::dumpShaders()) {
|
||||||
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
|
DumpShader(spv, info.pgm_hash, info.stage, perm_idx, "spv");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create module and set name to hash in renderdoc
|
|
||||||
const auto module = CompileSPV(spv, instance.GetDevice());
|
const auto module = CompileSPV(spv, instance.GetDevice());
|
||||||
ASSERT(module != VK_NULL_HANDLE);
|
|
||||||
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
|
const auto name = fmt::format("{}_{:#x}_{}", info.stage, info.pgm_hash, perm_idx);
|
||||||
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
Vulkan::SetObjectName(instance.GetDevice(), module, name);
|
||||||
return module;
|
return module;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram(
|
std::tuple<const Shader::Info*, vk::ShaderModule, u64> PipelineCache::GetProgram(
|
||||||
const GuestProgram& pgm, u32& binding) {
|
Shader::Stage stage, Shader::ShaderParams params, u32& binding) {
|
||||||
auto [it_pgm, new_program] = program_cache.try_emplace(pgm.hash);
|
const auto runtime_info = BuildRuntimeInfo(stage, graphics_key, liverpool->regs);
|
||||||
|
auto [it_pgm, new_program] = program_cache.try_emplace(params.hash);
|
||||||
if (new_program) {
|
if (new_program) {
|
||||||
Program* program = program_pool.Create(BuildShaderInfo(pgm, liverpool->regs));
|
Program* program = program_pool.Create(stage, params);
|
||||||
u32 start_binding = binding;
|
u32 start_binding = binding;
|
||||||
const auto module = CompileModule(program->info, pgm.code, 0, binding);
|
const auto module = CompileModule(program->info, runtime_info, params.code, 0, binding);
|
||||||
const auto spec = BuildStageSpec(program->info, start_binding);
|
const auto spec = Shader::StageSpecialization(program->info, runtime_info, start_binding);
|
||||||
program->modules.emplace_back(module, spec);
|
program->AddPermut(module, std::move(spec));
|
||||||
it_pgm.value() = program;
|
it_pgm.value() = program;
|
||||||
return std::make_tuple(&program->info, module, HashCombine(pgm.hash, 0));
|
return std::make_tuple(&program->info, module, HashCombine(params.hash, 0));
|
||||||
}
|
}
|
||||||
|
|
||||||
Program* program = it_pgm->second;
|
Program* program = it_pgm->second;
|
||||||
const auto& info = program->info;
|
const auto& info = program->info;
|
||||||
const auto spec = BuildStageSpec(info, binding);
|
const auto spec = Shader::StageSpecialization(info, runtime_info, binding);
|
||||||
size_t perm_idx = program->modules.size();
|
size_t perm_idx = program->modules.size();
|
||||||
vk::ShaderModule module{};
|
vk::ShaderModule module{};
|
||||||
|
|
||||||
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
const auto it = std::ranges::find(program->modules, spec, &Program::Module::spec);
|
||||||
if (it == program->modules.end()) {
|
if (it == program->modules.end()) {
|
||||||
auto new_info = BuildShaderInfo(pgm, liverpool->regs);
|
auto new_info = Shader::Info(stage, params);
|
||||||
module = CompileModule(new_info, pgm.code, perm_idx, binding);
|
module = CompileModule(new_info, runtime_info, params.code, perm_idx, binding);
|
||||||
program->modules.emplace_back(module, std::move(spec));
|
program->AddPermut(module, std::move(spec));
|
||||||
} else {
|
} else {
|
||||||
binding += info.NumBindings();
|
binding += info.NumBindings();
|
||||||
module = it->module;
|
module = it->module;
|
||||||
perm_idx = std::distance(program->modules.begin(), it);
|
perm_idx = std::distance(program->modules.begin(), it);
|
||||||
}
|
}
|
||||||
return std::make_tuple(&info, module, HashCombine(pgm.hash, perm_idx));
|
return std::make_tuple(&info, module, HashCombine(params.hash, perm_idx));
|
||||||
}
|
|
||||||
|
|
||||||
Shader::Info PipelineCache::BuildShaderInfo(const GuestProgram& pgm,
|
|
||||||
const AmdGpu::Liverpool::Regs& regs) {
|
|
||||||
Shader::Info info{};
|
|
||||||
info.user_data = pgm.user_data;
|
|
||||||
info.pgm_base = VAddr(pgm.code.data());
|
|
||||||
info.pgm_hash = pgm.hash;
|
|
||||||
info.stage = pgm.stage;
|
|
||||||
switch (pgm.stage) {
|
|
||||||
case Shader::Stage::Vertex: {
|
|
||||||
info.num_user_data = regs.vs_program.settings.num_user_regs;
|
|
||||||
info.num_input_vgprs = regs.vs_program.settings.vgpr_comp_cnt;
|
|
||||||
BuildVsOutputs(info, regs.vs_output_control);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Shader::Stage::Fragment: {
|
|
||||||
info.num_user_data = regs.ps_program.settings.num_user_regs;
|
|
||||||
std::ranges::transform(graphics_key.mrt_swizzles, info.mrt_swizzles.begin(),
|
|
||||||
[](Liverpool::ColorBuffer::SwapMode mode) {
|
|
||||||
return static_cast<Shader::MrtSwizzle>(mode);
|
|
||||||
});
|
|
||||||
for (u32 i = 0; i < regs.num_interp; i++) {
|
|
||||||
info.ps_inputs.push_back({
|
|
||||||
.param_index = regs.ps_inputs[i].input_offset.Value(),
|
|
||||||
.is_default = bool(regs.ps_inputs[i].use_default),
|
|
||||||
.is_flat = bool(regs.ps_inputs[i].flat_shade),
|
|
||||||
.default_value = regs.ps_inputs[i].default_value,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Shader::Stage::Compute: {
|
|
||||||
const auto& cs_pgm = regs.cs_program;
|
|
||||||
info.num_user_data = cs_pgm.settings.num_user_regs;
|
|
||||||
info.workgroup_size = {cs_pgm.num_thread_x.full, cs_pgm.num_thread_y.full,
|
|
||||||
cs_pgm.num_thread_z.full};
|
|
||||||
info.tgid_enable = {cs_pgm.IsTgidEnabled(0), cs_pgm.IsTgidEnabled(1),
|
|
||||||
cs_pgm.IsTgidEnabled(2)};
|
|
||||||
info.shared_memory_size = cs_pgm.SharedMemSize();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return info;
|
|
||||||
}
|
|
||||||
|
|
||||||
Shader::StageSpecialization PipelineCache::BuildStageSpec(const Shader::Info& info,
|
|
||||||
u32 start_binding) {
|
|
||||||
u32 binding{};
|
|
||||||
auto spec = Shader::StageSpecialization{info};
|
|
||||||
spec.start_binding = start_binding;
|
|
||||||
spec.ForEachSharp(binding, spec.buffers, info.buffers,
|
|
||||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
|
||||||
spec.stride = sharp.GetStride();
|
|
||||||
spec.is_storage = desc.IsStorage(sharp);
|
|
||||||
});
|
|
||||||
spec.ForEachSharp(binding, spec.tex_buffers, info.texture_buffers,
|
|
||||||
[](auto& spec, const auto& desc, AmdGpu::Buffer sharp) {
|
|
||||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
|
||||||
});
|
|
||||||
spec.ForEachSharp(binding, spec.images, info.images,
|
|
||||||
[](auto& spec, const auto& desc, AmdGpu::Image sharp) {
|
|
||||||
spec.type = sharp.GetType();
|
|
||||||
spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt());
|
|
||||||
});
|
|
||||||
switch (info.stage) {
|
|
||||||
case Shader::Stage::Fragment:
|
|
||||||
std::ranges::transform(graphics_key.mrt_swizzles, spec.mrt_swizzles.begin(),
|
|
||||||
[](Liverpool::ColorBuffer::SwapMode mode) {
|
|
||||||
return static_cast<Shader::MrtSwizzle>(mode);
|
|
||||||
});
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return spec;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
void PipelineCache::DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage,
|
||||||
|
|
|
@ -4,9 +4,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <tsl/robin_map.h>
|
#include <tsl/robin_map.h>
|
||||||
#include "common/object_pool.h"
|
|
||||||
#include "shader_recompiler/ir/basic_block.h"
|
|
||||||
#include "shader_recompiler/profile.h"
|
#include "shader_recompiler/profile.h"
|
||||||
|
#include "shader_recompiler/recompiler.h"
|
||||||
#include "shader_recompiler/specialization.h"
|
#include "shader_recompiler/specialization.h"
|
||||||
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
|
||||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||||
|
@ -30,19 +29,10 @@ struct Program {
|
||||||
Shader::Info info;
|
Shader::Info info;
|
||||||
boost::container::small_vector<Module, 8> modules;
|
boost::container::small_vector<Module, 8> modules;
|
||||||
|
|
||||||
explicit Program(const Shader::Info& info_) : info{info_} {}
|
explicit Program(Shader::Stage stage, Shader::ShaderParams params) : info{stage, params} {}
|
||||||
};
|
|
||||||
|
|
||||||
struct GuestProgram {
|
void AddPermut(vk::ShaderModule module, const Shader::StageSpecialization&& spec) {
|
||||||
Shader::Stage stage;
|
modules.emplace_back(module, std::move(spec));
|
||||||
std::span<const u32, AmdGpu::Liverpool::NumShaderUserData> user_data;
|
|
||||||
std::span<const u32> code;
|
|
||||||
u64 hash;
|
|
||||||
|
|
||||||
explicit GuestProgram(const auto* pgm, Shader::Stage stage_)
|
|
||||||
: stage{stage_}, user_data{pgm->user_data}, code{pgm->Code()} {
|
|
||||||
const auto* bininfo = AmdGpu::Liverpool::GetBinaryInfo(*pgm);
|
|
||||||
hash = bininfo->shader_hash;
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -58,7 +48,8 @@ public:
|
||||||
|
|
||||||
const ComputePipeline* GetComputePipeline();
|
const ComputePipeline* GetComputePipeline();
|
||||||
|
|
||||||
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(const GuestProgram& pgm,
|
std::tuple<const Shader::Info*, vk::ShaderModule, u64> GetProgram(Shader::Stage stage,
|
||||||
|
Shader::ShaderParams params,
|
||||||
u32& binding);
|
u32& binding);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -67,11 +58,8 @@ private:
|
||||||
|
|
||||||
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
|
void DumpShader(std::span<const u32> code, u64 hash, Shader::Stage stage, size_t perm_idx,
|
||||||
std::string_view ext);
|
std::string_view ext);
|
||||||
vk::ShaderModule CompileModule(Shader::Info& info, std::span<const u32> code, size_t perm_idx,
|
vk::ShaderModule CompileModule(Shader::Info& info, const Shader::RuntimeInfo& runtime_info,
|
||||||
u32& binding);
|
std::span<const u32> code, size_t perm_idx, u32& binding);
|
||||||
|
|
||||||
Shader::Info BuildShaderInfo(const GuestProgram& pgm, const AmdGpu::Liverpool::Regs& regs);
|
|
||||||
Shader::StageSpecialization BuildStageSpec(const Shader::Info& info, u32 binding);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Instance& instance;
|
const Instance& instance;
|
||||||
|
@ -80,9 +68,8 @@ private:
|
||||||
vk::UniquePipelineCache pipeline_cache;
|
vk::UniquePipelineCache pipeline_cache;
|
||||||
vk::UniquePipelineLayout pipeline_layout;
|
vk::UniquePipelineLayout pipeline_layout;
|
||||||
Shader::Profile profile{};
|
Shader::Profile profile{};
|
||||||
|
Shader::Pools pools;
|
||||||
tsl::robin_map<size_t, Program*> program_cache;
|
tsl::robin_map<size_t, Program*> program_cache;
|
||||||
Common::ObjectPool<Shader::IR::Inst> inst_pool;
|
|
||||||
Common::ObjectPool<Shader::IR::Block> block_pool;
|
|
||||||
Common::ObjectPool<Program> program_pool;
|
Common::ObjectPool<Program> program_pool;
|
||||||
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
|
tsl::robin_map<size_t, std::unique_ptr<ComputePipeline>> compute_pipelines;
|
||||||
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
tsl::robin_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_pipelines;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue