shader_recompiler: Implement attribute loads/stores

This commit is contained in:
raphaelthegreat 2024-05-24 03:02:21 +03:00
parent 08e155946e
commit 0eaa7d5859
28 changed files with 399 additions and 197 deletions

View file

@ -324,6 +324,8 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/frontend/control_flow_graph.h
src/shader_recompiler/frontend/decode.cpp
src/shader_recompiler/frontend/decode.h
src/shader_recompiler/frontend/fetch_shader.cpp
src/shader_recompiler/frontend/fetch_shader.h
src/shader_recompiler/frontend/format.cpp
src/shader_recompiler/frontend/instruction.cpp
src/shader_recompiler/frontend/instruction.h
@ -333,6 +335,7 @@ set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/ir/passes/ssa_rewrite_pass.cpp
src/shader_recompiler/ir/passes/resource_tracking_pass.cpp
src/shader_recompiler/ir/passes/constant_propogation_pass.cpp
src/shader_recompiler/ir/passes/info_collection.cpp
src/shader_recompiler/ir/passes/passes.h
src/shader_recompiler/ir/abstract_syntax_list.h
src/shader_recompiler/ir/attribute.cpp

View file

@ -20,7 +20,6 @@
#include "core/libraries/libs.h"
#include "core/libraries/videoout/video_out.h"
#include "core/linker.h"
#include "core/tls.h"
#include "input/controller.h"
#include "sdl_window.h"

View file

@ -171,7 +171,7 @@ Id DefineMain(EmitContext& ctx, IR::Program& program) {
void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
const std::span interfaces(ctx.interfaces.data(), ctx.interfaces.size());
spv::ExecutionModel execution_model{};
switch (program.stage) {
switch (program.info.stage) {
case Stage::Compute: {
// const std::array<u32, 3> workgroup_size{program.workgroup_size};
// execution_model = spv::ExecutionModel::GLCompute;
@ -194,7 +194,7 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
// }
break;
default:
throw NotImplementedException("Stage {}", u32(program.stage));
throw NotImplementedException("Stage {}", u32(program.info.stage));
}
ctx.AddEntryPoint(execution_model, main, "main", interfaces);
}
@ -222,7 +222,7 @@ std::vector<u32> EmitSPIRV(const Profile& profile, IR::Program& program, Binding
EmitContext ctx{profile, program, bindings};
const Id main{DefineMain(ctx, program)};
DefineEntryPoint(program, ctx, main);
if (program.stage == Stage::Vertex) {
if (program.info.stage == Stage::Vertex) {
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}

View file

@ -10,12 +10,11 @@ namespace {
Id OutputAttrPointer(EmitContext& ctx, IR::Attribute attr, u32 element) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& info{ctx.output_params.at(index).at(element)};
const auto& info{ctx.output_params.at(index)};
if (info.num_components == 1) {
return info.id;
} else {
const u32 index_element{element - info.first_element};
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(index_element));
return ctx.OpAccessChain(ctx.output_f32, info.id, ctx.ConstU32(element));
}
}
switch (attr) {
@ -68,22 +67,21 @@ Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& binding, const IR::
throw LogicError("Unreachable instruction");
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex) {
const u32 element{static_cast<u32>(attr) % 4};
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp) {
if (IR::IsParam(attr)) {
const u32 index{u32(attr) - u32(IR::Attribute::Param0)};
const auto& param{ctx.input_params.at(index)};
if (!ValidId(param.id)) {
// Attribute is disabled or varying component is not written
return ctx.ConstF32(element == 3 ? 1.0f : 0.0f);
return ctx.ConstF32(comp == 3 ? 1.0f : 0.0f);
}
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(element))};
const Id pointer{ctx.OpAccessChain(param.pointer_type, param.id, ctx.ConstU32(comp))};
return ctx.OpLoad(param.component_type, pointer);
}
throw NotImplementedException("Read attribute {}", attr);
}
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp) {
switch (attr) {
case IR::Attribute::VertexId:
return ctx.OpLoad(ctx.U32[1], ctx.vertex_index);
@ -93,9 +91,6 @@ Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id) {
}
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element) {
if (attr == IR::Attribute::Param0) {
return;
}
const Id pointer{OutputAttrPointer(ctx, attr, element)};
ctx.OpStore(pointer, value);
}

View file

@ -46,9 +46,9 @@ Id EmitReadConstBuffer(EmitContext& ctx, const IR::Value& handle, const IR::Valu
const IR::Value& offset);
Id EmitReadConstBufferF32(EmitContext& ctx, const IR::Value& handle, const IR::Value& index,
const IR::Value& offset);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, Id vertex);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, Id vertex);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 element);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr, u32 comp);
Id EmitGetAttributeU32(EmitContext& ctx, IR::Attribute attr, u32 comp);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value, u32 comp);
void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, Id value);
void EmitSetSampleMask(EmitContext& ctx, Id value);
void EmitSetFragDepth(EmitContext& ctx, Id value);

View file

@ -36,7 +36,7 @@ void Name(EmitContext& ctx, Id object, std::string_view format_str, Args&&... ar
} // Anonymous namespace
EmitContext::EmitContext(const Profile& profile_, IR::Program& program, Bindings& bindings)
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.stage} {
: Sirit::Module(profile_.supported_spirv), profile{profile_}, stage{program.info.stage} {
u32& uniform_binding{bindings.unified};
u32& storage_binding{bindings.unified};
u32& texture_binding{bindings.unified};
@ -98,6 +98,10 @@ void EmitContext::DefineArithmeticTypes() {
u32_zero_value = ConstU32(0U);
f32_zero_value = ConstF32(0.0f);
input_f32 = Name(TypePointer(spv::StorageClass::Input, F32[1]), "input_f32");
input_u32 = Name(TypePointer(spv::StorageClass::Input, U32[1]), "input_u32");
input_s32 = Name(TypePointer(spv::StorageClass::Input, S32[1]), "input_s32");
output_f32 = Name(TypePointer(spv::StorageClass::Output, F32[1]), "output_f32");
output_u32 = Name(TypePointer(spv::StorageClass::Output, U32[1]), "output_u32");
}
@ -107,26 +111,119 @@ void EmitContext::DefineInterfaces(const IR::Program& program) {
DefineOutputs(program);
}
Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
return ctx.F32[4];
case AmdGpu::NumberFormat::Sint:
return ctx.S32[4];
case AmdGpu::NumberFormat::Uint:
return ctx.U32[4];
case AmdGpu::NumberFormat::Sscaled:
return ctx.F32[4];
case AmdGpu::NumberFormat::Uscaled:
return ctx.F32[4];
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
}
EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id) {
switch (fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uint:
return {id, input_u32, U32[1], 4};
case AmdGpu::NumberFormat::Sint:
return {id, input_s32, S32[1], 4};
case AmdGpu::NumberFormat::Sscaled:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uscaled:
return {id, input_f32, F32[1], 4};
default:
break;
}
throw InvalidArgument("Invalid attribute type {}", fmt);
}
Id MakeDefaultValue(EmitContext& ctx, u32 default_value) {
switch (default_value) {
case 0:
return ctx.ConstF32(0.f, 0.f, 0.f, 0.f);
case 1:
return ctx.ConstF32(0.f, 0.f, 0.f, 1.f);
case 2:
return ctx.ConstF32(1.f, 1.f, 1.f, 0.f);
case 3:
return ctx.ConstF32(1.f, 1.f, 1.f, 1.f);
default:
UNREACHABLE();
}
}
void EmitContext::DefineInputs(const IR::Program& program) {
const auto& info = program.info;
switch (stage) {
case Stage::Vertex:
vertex_index = DefineVariable(U32[1], spv::BuiltIn::VertexIndex, spv::StorageClass::Input);
base_vertex = DefineVariable(U32[1], spv::BuiltIn::BaseVertex, spv::StorageClass::Input);
for (const auto& input : info.vs_inputs) {
const Id type{GetAttributeType(*this, input.fmt)};
const Id id{DefineInput(type, input.binding)};
Name(id, fmt::format("vs_in_attr{}", input.binding));
input_params[input.binding] = GetAttributeInfo(input.fmt, id);
}
break;
case Stage::Fragment:
for (const auto& input : info.ps_inputs) {
if (input.is_default) {
input_params[input.semantic] = {MakeDefaultValue(*this, input.default_value), input_f32, F32[1]};
continue;
}
const IR::Attribute param{IR::Attribute::Param0 + input.param_index};
const u32 num_components = info.loads.NumComponents(param);
const Id type{F32[num_components]};
const Id id{DefineInput(type, input.semantic)};
if (input.is_flat) {
Decorate(id, spv::Decoration::Flat);
}
Name(id, fmt::format("fs_in_attr{}", input.semantic));
input_params[input.semantic] = {id, input_f32, F32[1], num_components};
}
default:
break;
}
}
void EmitContext::DefineOutputs(const IR::Program& program) {
const auto& info = program.info;
switch (stage) {
case Stage::Vertex:
output_position = DefineVariable(F32[4], spv::BuiltIn::Position, spv::StorageClass::Output);
for (u32 i = 0; i < IR::NumParams; i++) {
const IR::Attribute param{IR::Attribute::Param0 + i};
if (!info.stores.GetAny(param)) {
continue;
}
const u32 num_components = info.stores.NumComponents(param);
const Id id{DefineOutput(F32[num_components], i)};
Name(id, fmt::format("out_attr{}", i));
output_params[i] = {id, output_f32, F32[1], num_components};
}
break;
case Stage::Fragment:
frag_color[0] = DefineOutput(F32[4], 0);
Name(frag_color[0], fmt::format("frag_color{}", 0));
interfaces.push_back(frag_color[0]);
for (u32 i = 0; i < IR::NumRenderTargets; i++) {
const IR::Attribute mrt{IR::Attribute::RenderTarget0 + i};
if (!info.stores.GetAny(mrt)) {
continue;
}
frag_color[i] = DefineOutput(F32[4], i);
Name(frag_color[i], fmt::format("frag_color{}", i));
interfaces.push_back(frag_color[i]);
}
break;
default:
break;

View file

@ -135,6 +135,9 @@ public:
Id u32_zero_value{};
Id f32_zero_value{};
Id input_u32{};
Id input_f32{};
Id input_s32{};
Id output_u32{};
Id output_f32{};
@ -145,25 +148,22 @@ public:
Id base_vertex{};
std::array<Id, 8> frag_color{};
struct InputParamInfo {
struct SpirvAttribute {
Id id;
Id pointer_type;
Id component_type;
u32 num_components;
};
std::array<InputParamInfo, 32> input_params{};
struct ParamElementInfo {
Id id{};
u32 first_element{};
u32 num_components{};
};
std::array<std::array<ParamElementInfo, 4>, 32> output_params{};
std::array<SpirvAttribute, 32> input_params{};
std::array<SpirvAttribute, 32> output_params{};
private:
void DefineArithmeticTypes();
void DefineInterfaces(const IR::Program& program);
void DefineInputs(const IR::Program& program);
void DefineOutputs(const IR::Program& program);
SpirvAttribute GetAttributeInfo(AmdGpu::NumberFormat fmt, Id id);
};
} // namespace Shader::Backend::SPIRV

View file

@ -32,9 +32,9 @@ namespace Shader::Gcn {
* We take the reverse way, extract the original input semantics from these instructions.
**/
std::vector<VertexAttribute> ParseFetchShader(std::span<const u32> code) {
std::vector<VertexAttribute> ParseFetchShader(const u32* code) {
std::vector<VertexAttribute> attributes;
GcnCodeSlice code_slice(code.data(), code.data() + std::numeric_limits<u32>::max());
GcnCodeSlice code_slice(code, code + std::numeric_limits<u32>::max());
GcnDecodeContext decoder;
struct VsharpLoad {

View file

@ -3,7 +3,6 @@
#pragma once
#include <span>
#include <vector>
#include "common/types.h"
@ -17,6 +16,6 @@ struct VertexAttribute {
u8 dword_offset; ///< The dword offset of the V# that describes this attribute.
};
std::vector<VertexAttribute> ParseFetchShader(std::span<const u32> code);
std::vector<VertexAttribute> ParseFetchShader(const u32* code);
} // namespace Shader::Gcn

View file

@ -600,9 +600,9 @@ public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Stage stage_)
Info& info_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
syntax_list{syntax_list_}, inst_list{inst_list_}, stage{stage_} {
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} {
Visit(root_stmt, nullptr, nullptr);
IR::Block& first_block{*syntax_list.front().data.block};
@ -633,7 +633,7 @@ private:
ensure_block();
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stage, inst_list.subspan(start, size));
Translate(current_block, inst_list.subspan(start, size), info);
break;
}
case StatementType::SetVariable: {
@ -811,17 +811,17 @@ private:
IR::AbstractSyntaxList& syntax_list;
const Block dummy_flow_block{};
std::span<const GcnInst> inst_list;
Stage stage;
Info& info;
};
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
CFG& cfg, Stage stage) {
CFG& cfg, Info& info) {
ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, stage};
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
return syntax_list;
}

View file

@ -10,13 +10,13 @@
#include "shader_recompiler/object_pool.h"
namespace Shader {
enum class Stage : u32;
struct Info;
}
namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg,
Stage stage);
Info& info);
} // namespace Shader::Gcn

View file

@ -3,13 +3,15 @@
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/translate/translate.h"
#include "shader_recompiler/frontend/fetch_shader.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/amdgpu/resource.h"
namespace Shader::Gcn {
Translator::Translator(IR::Block* block_, Stage stage) : block{block_}, ir{*block} {
Translator::Translator(IR::Block* block_, Info& info_) : block{block_}, ir{*block}, info{info_} {
IR::VectorReg dst_vreg = IR::VectorReg::V0;
switch (stage) {
switch (info.stage) {
case Stage::Vertex:
// https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L146C1-L146C23
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::VertexId));
@ -92,11 +94,38 @@ void Translator::SetDst(const InstOperand& operand, const IR::U32F32& value) {
}
}
void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list) {
void Translator::EmitFetch(const GcnInst& inst) {
// Read the pointer to the fetch shader assembly.
const u32 sgpr_base = inst.src[0].code;
const u32* code;
std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code));
// Parse the assembly to generate a list of attributes.
const auto attribs = ParseFetchShader(code);
for (const auto& attrib : attribs) {
IR::VectorReg dst_reg{attrib.dest_vgpr};
const IR::Attribute attr{IR::Attribute::Param0 + attrib.semantic};
for (u32 i = 0; i < attrib.num_elements; i++) {
ir.SetVectorReg(dst_reg++, ir.GetAttribute(attr, i));
}
// Read the V# of the attribute to figure out component number and type.
const auto buffer = info.ReadUd<AmdGpu::Buffer>(attrib.sgpr_base,
attrib.dword_offset);
const u32 num_components = AmdGpu::NumComponents(buffer.data_format);
info.vs_inputs.push_back({
.fmt = buffer.num_format,
.binding = attrib.semantic,
.num_components = std::min<u16>(attrib.num_elements, num_components),
});
}
}
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info) {
if (inst_list.empty()) {
return;
}
Translator translator{block, stage};
Translator translator{block, info};
for (const auto& inst : inst_list) {
switch (inst.opcode) {
case Opcode::S_MOV_B32:
@ -115,6 +144,9 @@ void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list
translator.V_MUL_F32(inst);
break;
case Opcode::S_SWAPPC_B64:
ASSERT(info.stage == Stage::Vertex);
translator.EmitFetch(inst);
break;
case Opcode::S_WAITCNT:
break; // Ignore for now.
case Opcode::S_BUFFER_LOAD_DWORDX16:

View file

@ -7,9 +7,10 @@
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/ir/ir_emitter.h"
#include "shader_recompiler/runtime_info.h"
namespace Shader {
enum class Stage : u32;
struct Info;
}
namespace Shader::Gcn {
@ -25,7 +26,9 @@ enum class ConditionOp : u32 {
class Translator {
public:
explicit Translator(IR::Block* block_, Stage stage);
explicit Translator(IR::Block* block_, Info& info);
void EmitFetch(const GcnInst& inst);
// Scalar ALU
void S_MOV(const GcnInst& inst);
@ -66,8 +69,9 @@ private:
private:
IR::Block* block;
IR::IREmitter ir;
Info& info;
};
void Translate(IR::Block* block, Stage stage, std::span<const GcnInst> inst_list);
void Translate(IR::Block* block, std::span<const GcnInst> inst_list, Info& info);
} // namespace Shader::Gcn

View file

@ -20,9 +20,8 @@ void Translator::V_MAC_F32(const GcnInst& inst) {
void Translator::V_CVT_PKRTZ_F16_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Value vec_f32 = ir.CompositeConstruct(ir.FPConvert(16, GetSrc(inst.src[0])),
ir.FPConvert(16, GetSrc(inst.src[1])));
ir.SetVectorReg(dst_reg, ir.PackFloat2x16(vec_f32));
const IR::Value vec_f32 = ir.CompositeConstruct(GetSrc(inst.src[0]), GetSrc(inst.src[1]));
ir.SetVectorReg(dst_reg, ir.PackHalf2x16(vec_f32));
}
void Translator::V_MUL_F32(const GcnInst& inst) {

View file

@ -7,7 +7,9 @@ namespace Shader::Gcn {
void Translator::V_INTERP_P2_F32(const GcnInst& inst) {
const IR::VectorReg dst_reg{inst.dst[0].code};
const IR::Attribute attrib{IR::Attribute::Param0 + inst.control.vintrp.attr};
auto& attr = info.ps_inputs.at(inst.control.vintrp.attr);
attr.semantic = inst.control.vintrp.attr;
const IR::Attribute attrib{IR::Attribute::Param0 + attr.param_index};
ir.SetVectorReg(dst_reg, ir.GetAttribute(attrib, inst.control.vintrp.chan));
}

View file

@ -72,10 +72,12 @@ enum class Attribute : u64 {
LocalInvocationId = 75,
LocalInvocationIndex = 76,
FragCoord = 77,
Max,
};
constexpr size_t EXP_NUM_POS = 4;
constexpr size_t EXP_NUM_PARAM = 32;
constexpr size_t NumAttributes = static_cast<size_t>(Attribute::Max);
constexpr size_t NumRenderTargets = 8;
constexpr size_t NumParams = 32;
[[nodiscard]] bool IsParam(Attribute attribute) noexcept;
@ -86,7 +88,7 @@ constexpr size_t EXP_NUM_PARAM = 32;
if (result > static_cast<int>(Attribute::Param31)) {
throw LogicError("Overflow on register arithmetic");
}
if (result < static_cast<int>(Attribute::Param0)) {
if (result < static_cast<int>(Attribute::RenderTarget0)) {
throw LogicError("Underflow on register arithmetic");
}
return static_cast<Attribute>(result);

View file

@ -174,18 +174,10 @@ void IREmitter::SetVcc(const U1& value) {
Inst(Opcode::SetVcc, value);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute) {
return GetAttribute(attribute, 0);
}
F32 IREmitter::GetAttribute(IR::Attribute attribute, u32 comp) {
return Inst<F32>(Opcode::GetAttribute, attribute, Imm32(comp));
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute) {
return GetAttributeU32(attribute, 0);
}
U32 IREmitter::GetAttributeU32(IR::Attribute attribute, u32 comp) {
return Inst<U32>(Opcode::GetAttributeU32, attribute, Imm32(comp));
}

View file

@ -58,11 +58,9 @@ public:
[[nodiscard]] U1 Condition(IR::Condition cond);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
[[nodiscard]] F32 GetAttribute(IR::Attribute attribute, u32 comp);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute);
[[nodiscard]] U32 GetAttributeU32(IR::Attribute attribute, u32 comp);
void SetAttribute(IR::Attribute attribute, const F32& value, u32 comp);
[[nodiscard]] F32 GetAttribute(Attribute attribute, u32 comp = 0);
[[nodiscard]] U32 GetAttributeU32(Attribute attribute, u32 comp = 0);
void SetAttribute(Attribute attribute, const F32& value, u32 comp = 0);
[[nodiscard]] U32U64 ReadShared(int bit_size, bool is_signed, const U32& offset);
void WriteShared(int bit_size, const Value& value, const U32& offset);

View file

@ -0,0 +1,33 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/ir/program.h"
namespace Shader::Optimization {
void Visit(Info& info, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::GetAttribute:
case IR::Opcode::GetAttributeU32: {
info.loads.Set(inst.Arg(0).Attribute(), inst.Arg(1).U32());
break;
}
case IR::Opcode::SetAttribute: {
info.stores.Set(inst.Arg(0).Attribute(), inst.Arg(2).U32());
break;
}
default:
break;
}
}
void CollectShaderInfoPass(IR::Program& program) {
Info& info{program.info};
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
Visit(info, inst);
}
}
}
} // namespace Shader::Optimization

View file

@ -13,5 +13,6 @@ void IdentityRemovalPass(IR::BlockList& program);
void DeadCodeEliminationPass(IR::BlockList& program);
void ConstantPropagationPass(IR::BlockList& program);
void ResourceTrackingPass(IR::Program& program);
void CollectShaderInfoPass(IR::Program& program);
} // namespace Shader::Optimization

View file

@ -3,27 +3,22 @@
#pragma once
#include <array>
#include <string>
#include "shader_recompiler/frontend/instruction.h"
#include "shader_recompiler/ir/abstract_syntax_list.h"
#include "shader_recompiler/ir/basic_block.h"
namespace Shader {
enum class Stage : u32;
}
#include "shader_recompiler/runtime_info.h"
namespace Shader::IR {
static constexpr size_t NumUserDataRegs = 16;
struct Program {
explicit Program(const Info&& info_) : info{info_} {}
AbstractSyntaxList syntax_list;
BlockList blocks;
BlockList post_order_blocks;
std::vector<Gcn::GcnInst> ins_list;
std::array<u32, NumUserDataRegs> user_data;
Stage stage;
Info info;
};
[[nodiscard]] std::string DumpProgram(const Program& program);

View file

@ -2,7 +2,6 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <fstream>
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/frontend/control_flow_graph.h"
#include "shader_recompiler/frontend/decode.h"
#include "shader_recompiler/frontend/structured_control_flow.h"
@ -30,10 +29,8 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
return blocks;
}
std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage,
std::span<const u32, IR::NumUserDataRegs> ud_regs,
std::span<const u32> token) {
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
std::span<const u32> token, const Info&& info) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -47,7 +44,7 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
file.close();
// Decode and save instructions
IR::Program program;
IR::Program program{std::move(info)};
program.ins_list.reserve(token.size());
while (!slice.atEnd()) {
program.ins_list.emplace_back(decoder.decodeInstruction(slice));
@ -58,11 +55,9 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Gcn::CFG cfg{gcn_block_pool, program.ins_list};
// Structurize control flow graph and create program.
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, stage);
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
program.stage = stage;
std::ranges::copy(ud_regs, program.user_data.begin());
// Run optimization passes
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
@ -70,14 +65,13 @@ std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
Shader::Optimization::IdentityRemovalPass(program.blocks);
Shader::Optimization::ResourceTrackingPass(program);
Shader::Optimization::DeadCodeEliminationPass(program.blocks);
Shader::Optimization::CollectShaderInfoPass(program);
for (const auto& block : program.blocks) {
fmt::print("{}\n", IR::DumpBlock(*block));
}
// TODO: Pass profile from vulkan backend
const auto code = Backend::SPIRV::EmitSPIRV(Profile{}, program);
return code;
return program;
}
} // namespace Shader

View file

@ -3,6 +3,8 @@
#pragma once
#include "shader_recompiler/ir/basic_block.h"
#include "shader_recompiler/object_pool.h"
#include "shader_recompiler/ir/program.h"
namespace Shader {
@ -26,9 +28,9 @@ struct BinaryInfo {
u32 crc32;
};
[[nodiscard]] std::vector<u32> TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, Stage stage,
std::span<const u32, IR::NumUserDataRegs> ud_regs,
std::span<const u32> code);
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool,
std::span<const u32> code,
const Info&& info);
} // namespace Shader

View file

@ -3,39 +3,16 @@
#pragma once
#include <array>
#include <boost/container/small_vector.hpp>
#include "shader_recompiler/ir/type.h"
#include <span>
#include <boost/container/static_vector.hpp>
#include "common/assert.h"
#include "common/types.h"
#include "shader_recompiler/ir/attribute.h"
#include "video_core/amdgpu/pixel_format.h"
namespace Shader {
enum class AttributeType : u8 {
Float,
SignedInt,
UnsignedInt,
SignedScaled,
UnsignedScaled,
Disabled,
};
enum class InputTopology {
Points,
Lines,
LinesAdjacency,
Triangles,
TrianglesAdjacency,
};
enum class CompareFunction {
Never,
Less,
Equal,
LessThanEqual,
Greater,
NotEqual,
GreaterThanEqual,
Always,
};
static constexpr size_t NumUserDataRegs = 16;
enum class Stage : u32 {
Vertex,
@ -62,78 +39,60 @@ enum class TextureType : u32 {
};
constexpr u32 NUM_TEXTURE_TYPES = 7;
enum class Interpolation {
Smooth,
Flat,
NoPerspective,
};
struct ConstantBufferDescriptor {
u32 index;
u32 count;
auto operator<=>(const ConstantBufferDescriptor&) const = default;
};
struct TextureDescriptor {
TextureType type;
bool is_eud;
bool is_depth;
bool is_multisample;
bool is_storage;
u32 count;
u32 eud_offset_dwords;
u32 ud_index_dwords;
auto operator<=>(const TextureDescriptor&) const = default;
};
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
struct Info {
bool uses_workgroup_id{};
bool uses_local_invocation_id{};
bool uses_invocation_id{};
bool uses_invocation_info{};
bool uses_sample_id{};
explicit Info(std::span<const u32, 16> user_data_) : user_data{user_data_} {}
std::array<Interpolation, 32> interpolation{};
// VaryingState loads;
// VaryingState stores;
// VaryingState passthrough;
struct VsInput {
AmdGpu::NumberFormat fmt;
u16 binding;
u16 num_components;
};
boost::container::static_vector<VsInput, 32> vs_inputs{};
std::array<bool, 8> stores_frag_color{};
bool stores_sample_mask{};
bool stores_frag_depth{};
struct PsInput {
u32 param_index;
u32 semantic;
bool is_default;
bool is_flat;
u32 default_value;
};
boost::container::static_vector<PsInput, 32> ps_inputs{};
bool uses_fp16{};
bool uses_fp64{};
bool uses_fp16_denorms_flush{};
bool uses_fp16_denorms_preserve{};
bool uses_fp32_denorms_flush{};
bool uses_fp32_denorms_preserve{};
bool uses_int8{};
bool uses_int16{};
bool uses_int64{};
bool uses_image_1d{};
bool uses_sampled_1d{};
bool uses_subgroup_vote{};
bool uses_subgroup_mask{};
bool uses_derivatives{};
struct AttributeFlags {
bool Get(IR::Attribute attrib, u32 comp = 0) const {
return flags[static_cast<size_t>(attrib)] & (1 << comp);
}
IR::Type used_constant_buffer_types{};
IR::Type used_storage_buffer_types{};
IR::Type used_indirect_cbuf_types{};
bool GetAny(IR::Attribute attrib) const {
return flags[static_cast<size_t>(attrib)];
}
// std::array<u32, MAX_CBUFS> constant_buffer_used_sizes{};
u32 used_clip_distances{};
void Set(IR::Attribute attrib, u32 comp = 0) {
flags[static_cast<size_t>(attrib)] |= (1 << comp);
}
// boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
// constant_buffer_descriptors;
// boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS>
// storage_buffers_descriptors; TextureBufferDescriptors texture_buffer_descriptors;
// ImageBufferDescriptors image_buffer_descriptors;
// TextureDescriptors texture_descriptors;
// ImageDescriptors image_descriptors;
u32 NumComponents(IR::Attribute attrib) const {
const u8 mask = flags[static_cast<size_t>(attrib)];
ASSERT(mask != 0b1011 || mask != 0b1101);
return std::popcount(mask);
}
std::array<u8, IR::NumAttributes> flags;
};
AttributeFlags loads{};
AttributeFlags stores{};
std::span<const u32, 16> user_data;
Stage stage;
template <typename T>
T ReadUd(u32 ptr_index, u32 dword_offset) const noexcept {
T data;
u32* base;
std::memcpy(&base, &user_data[ptr_index], sizeof(base));
std::memcpy(&data, base + dword_offset, sizeof(T));
return data;
}
};
} // namespace Shader

View file

@ -33,13 +33,13 @@ struct Liverpool {
static constexpr u32 NumColorBuffers = 8;
static constexpr u32 NumViewports = 16;
static constexpr u32 NumClipPlanes = 6;
static constexpr u32 NumWordsShaderUserData = 16;
static constexpr u32 NumShaderUserData = 16;
static constexpr u32 UconfigRegWordOffset = 0xC000;
static constexpr u32 ContextRegWordOffset = 0xA000;
static constexpr u32 ShRegWordOffset = 0x2C00;
static constexpr u32 NumRegs = 0xD000;
using UserData = std::array<u32, NumWordsShaderUserData>;
using UserData = std::array<u32, NumShaderUserData>;
struct ShaderProgram {
u32 address_lo;
@ -58,6 +58,14 @@ struct Liverpool {
}
};
union PsInputControl {
u32 raw;
BitField<0, 5, u32> input_offset;
BitField<5, 1, u32> use_default;
BitField<8, 2, u32> default_value;
BitField<10, 1, u32> flat_shade;
};
enum class ShaderExportComp : u32 {
None = 0,
OneComp = 1,
@ -552,9 +560,12 @@ struct Liverpool {
INSERT_PADDING_WORDS(1);
std::array<ViewportBounds, NumViewports> viewports;
std::array<ClipUserData, NumClipPlanes> clip_user_data;
INSERT_PADDING_WORDS(0xA1B1 - 0xA187);
INSERT_PADDING_WORDS(0xA191 - 0xA187);
std::array<PsInputControl, 32> ps_inputs;
VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B1 - 1);
INSERT_PADDING_WORDS(4);
BitField<0, 6, u32> num_interp;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1);
ShaderPosFormat shader_pos_format;
ShaderExportFormat z_export_format;
ColorExportFormat color_export_format;
@ -631,7 +642,9 @@ static_assert(GFX6_3D_REG_INDEX(viewport_scissors) == 0xA094);
static_assert(GFX6_3D_REG_INDEX(stencil_control) == 0xA10B);
static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
static_assert(GFX6_3D_REG_INDEX(color_export_format) == 0xA1C5);

View file

@ -2,10 +2,44 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include "common/assert.h"
#include "video_core/amdgpu/pixel_format.h"
namespace AmdGpu {
std::string_view NameOf(NumberFormat fmt) {
switch (fmt) {
case NumberFormat::Unorm:
return "Unorm";
case NumberFormat::Snorm:
return "Snorm";
case NumberFormat::Uscaled:
return "Uscaled";
case NumberFormat::Sscaled:
return "Sscaled";
case NumberFormat::Uint:
return "Uint";
case NumberFormat::Sint:
return "Sint";
case NumberFormat::SnormNz:
return "SnormNz";
case NumberFormat::Float:
return "Float";
case NumberFormat::Srgb:
return "Srgb";
case NumberFormat::Ubnorm:
return "Ubnorm";
case NumberFormat::UbnromNz:
return "UbnormNz";
case NumberFormat::Ubint:
return "Ubint";
case NumberFormat::Ubscaled:
return "Unscaled";
default:
UNREACHABLE();
}
}
u32 NumComponents(DataFormat format) {
constexpr std::array numComponentsPerElement = {
0, 1, 1, 2, 1, 2, 3, 3, 4, 4, 4, 2, 4, 3, 4, -1, 3, 4, 4, 4, 2,

View file

@ -3,6 +3,8 @@
#pragma once
#include <string_view>
#include <fmt/format.h>
#include "common/types.h"
namespace AmdGpu {
@ -59,6 +61,19 @@ enum class NumberFormat : u32 {
Ubscaled = 13,
};
[[nodiscard]] std::string_view NameOf(NumberFormat fmt);
u32 NumComponents(DataFormat format);
} // namespace AmdGpu
template <>
struct fmt::formatter<AmdGpu::NumberFormat> {
constexpr auto parse(format_parse_context& ctx) {
return ctx.begin();
}
auto format(AmdGpu::NumberFormat fmt, format_context& ctx) const {
return fmt::format_to(ctx.out(), "{}", AmdGpu::NameOf(fmt));
}
};

View file

@ -1,7 +1,9 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <fstream>
#include "common/scope_exit.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/recompiler.h"
#include "shader_recompiler/runtime_info.h"
#include "video_core/renderer_vulkan/vk_instance.h"
@ -11,9 +13,31 @@
namespace Vulkan {
Shader::Info MakeShaderInfo(Shader::Stage stage, std::span<const u32, 16> user_data,
AmdGpu::Liverpool::Regs& regs) {
Shader::Info info{user_data};
info.stage = stage;
switch (stage) {
case Shader::Stage::Fragment: {
for (u32 i = 0; i < regs.num_interp; i++) {
info.ps_inputs.push_back({
.param_index = regs.ps_inputs[i].input_offset.Value(),
.is_default = bool(regs.ps_inputs[i].use_default),
.is_flat = bool(regs.ps_inputs[i].flat_shade),
.default_value = regs.ps_inputs[i].default_value,
});
}
break;
}
default:
break;
}
return info;
}
PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
AmdGpu::Liverpool* liverpool_)
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{4096},
: instance{instance_}, scheduler{scheduler_}, liverpool{liverpool_}, inst_pool{8192},
block_pool{512} {
const vk::PipelineLayoutCreateInfo layout_info = {
.setLayoutCount = 0U,
@ -50,8 +74,18 @@ void PipelineCache::BindPipeline() {
// Compile and cache shader.
const auto data = std::span{token, bininfo.length / sizeof(u32)};
const auto program = Shader::TranslateProgram(inst_pool, block_pool, stage, pgm.user_data, data);
return CompileSPV(program, instance.GetDevice());
block_pool.ReleaseContents();
inst_pool.ReleaseContents();
const auto info = MakeShaderInfo(stage, pgm.user_data, liverpool->regs);
auto program = Shader::TranslateProgram(inst_pool, block_pool, data, std::move(info));
const auto code = Shader::Backend::SPIRV::EmitSPIRV(Shader::Profile{}, program);
static int counter = 0;
std::ofstream file(fmt::format("shader{}.spv", counter++), std::ios::out | std::ios::binary);
file.write((const char*)code.data(), code.size() * sizeof(u32));
file.close();
return CompileSPV(code, instance.GetDevice());
};
// Retrieve shader stage modules.