vector_alu: Proper V_MBCNT_U32_B32

This commit is contained in:
IndecisiveTurtle 2024-07-30 19:42:14 +03:00
parent d5383c8c34
commit 5e35a30607
18 changed files with 91 additions and 24 deletions

View file

@ -390,6 +390,7 @@ Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
Id EmitLaneId(EmitContext& ctx);
Id EmitWarpId(EmitContext& ctx);
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
} // namespace Shader::Backend::SPIRV

View file

@ -10,6 +10,10 @@ Id SubgroupScope(EmitContext& ctx) {
return ctx.ConstU32(static_cast<u32>(spv::Scope::Subgroup));
}
Id EmitWarpId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_id);
}
Id EmitLaneId(EmitContext& ctx) {
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
}

View file

@ -127,6 +127,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
return ctx.F32[4];
case AmdGpu::NumberFormat::Sint:
return ctx.S32[4];
@ -147,6 +148,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
case AmdGpu::NumberFormat::Float:
case AmdGpu::NumberFormat::Unorm:
case AmdGpu::NumberFormat::Snorm:
case AmdGpu::NumberFormat::SnormNz:
return {id, input_f32, F32[1], 4};
case AmdGpu::NumberFormat::Uint:
return {id, input_u32, U32[1], 4};
@ -223,11 +225,10 @@ void EmitContext::DefineInputs(const Info& info) {
break;
}
case Stage::Fragment:
if (info.uses_group_quad) {
subgroup_local_invocation_id = DefineVariable(
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
}
subgroup_id = DefineVariable(U32[1], spv::BuiltIn::SubgroupId, spv::StorageClass::Input);
subgroup_local_invocation_id = DefineVariable(
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);

View file

@ -180,6 +180,7 @@ public:
Id workgroup_id{};
Id local_invocation_id{};
Id subgroup_id{};
Id subgroup_local_invocation_id{};
Id image_u32{};

View file

@ -600,13 +600,13 @@ public:
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
Info& info_)
Info& info_, const Profile& profile_)
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} {
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} {
Visit(root_stmt, nullptr, nullptr);
IR::Block& first_block{*syntax_list.front().data.block};
Translator{&first_block, info}.EmitPrologue();
Translator{&first_block, info, profile}.EmitPrologue();
}
private:
@ -635,7 +635,7 @@ private:
const u32 start = stmt.block->begin_index;
const u32 size = stmt.block->end_index - start + 1;
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
info);
info, profile);
}
break;
}
@ -815,16 +815,17 @@ private:
const Block dummy_flow_block{.is_dummy = true};
std::span<const GcnInst> inst_list;
Info& info;
const Profile& profile;
};
} // Anonymous namespace
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
CFG& cfg, Info& info) {
CFG& cfg, Info& info, const Profile& profile) {
ObjectPool<Statement> stmt_pool{64};
GotoPass goto_pass{cfg, stmt_pool};
Statement& root{goto_pass.RootStatement()};
IR::AbstractSyntaxList syntax_list;
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info, profile};
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
return syntax_list;
}

View file

@ -11,12 +11,13 @@
namespace Shader {
struct Info;
struct Profile;
}
namespace Shader::Gcn {
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool, CFG& cfg,
Info& info);
Info& info, const Profile& profile);
} // namespace Shader::Gcn

View file

@ -16,8 +16,8 @@
namespace Shader::Gcn {
Translator::Translator(IR::Block* block_, Info& info_)
: ir{*block_, block_->begin()}, info{info_} {}
Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_)
: ir{*block_, block_->begin()}, info{info_}, profile{profile_} {}
void Translator::EmitPrologue() {
ir.Prologue();
@ -487,11 +487,12 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
info.translation_failed = true;
}
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info) {
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list,
Info& info, const Profile& profile) {
if (inst_list.empty()) {
return;
}
Translator translator{block, info};
Translator translator{block, info, profile};
for (const auto& inst : inst_list) {
pc += inst.length;

View file

@ -11,6 +11,7 @@
namespace Shader {
struct Info;
struct Profile;
}
namespace Shader::Gcn {
@ -53,7 +54,7 @@ enum class NegateMode : u32 {
class Translator {
public:
explicit Translator(IR::Block* block_, Info& info);
explicit Translator(IR::Block* block_, Info& info, const Profile& profile);
// Instruction categories
void EmitPrologue();
@ -176,6 +177,7 @@ public:
void V_CVT_FLR_I32_F32(const GcnInst& inst);
void V_CMP_CLASS_F32(const GcnInst& inst);
void V_FFBL_B32(const GcnInst& inst);
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
// Vector Memory
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
@ -216,9 +218,11 @@ private:
private:
IR::IREmitter ir;
Info& info;
const Profile& profile;
bool opcode_missing = false;
};
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list,
Info& info, const Profile& profile);
} // namespace Shader::Gcn

View file

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include "shader_recompiler/profile.h"
#include "shader_recompiler/frontend/translate/translate.h"
namespace Shader::Gcn {
@ -292,6 +293,11 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_CMP_U32(ConditionOp::GE, false, true, inst);
case Opcode::V_CMPX_TRU_U32:
return V_CMP_U32(ConditionOp::TRU, false, true, inst);
case Opcode::V_MBCNT_LO_U32_B32:
return V_MBCNT_U32_B32(true, inst);
case Opcode::V_MBCNT_HI_U32_B32:
return V_MBCNT_U32_B32(false, inst);
default:
LogMissingOpcode(inst);
}
@ -910,4 +916,29 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FindILsb(src0));
}
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
const IR::U32 src0{GetSrc(inst.src[0])};
const IR::U32 src1{GetSrc(inst.src[1])};
const IR::U32 lane_id = ir.LaneId();
const auto [warp_half, mask_shift] = [&]() -> std::pair<IR::U32, IR::U32> {
if (profile.subgroup_size == 32) {
const IR::U32 warp_half = ir.BitwiseAnd(ir.WarpId(), ir.Imm32(1));
return std::make_pair(warp_half, lane_id);
}
const IR::U32 warp_half = ir.ShiftRightLogical(lane_id, ir.Imm32(5));
const IR::U32 mask_shift = ir.BitwiseAnd(lane_id, ir.Imm32(0x1F));
return std::make_pair(warp_half, mask_shift);
}();
const IR::U32 thread_mask = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), mask_shift), ir.Imm32(1));
const IR::U1 is_odd_warp = ir.INotEqual(warp_half, ir.Imm32(0));
const IR::U32 mask = IR::U32{ir.Select(is_odd_warp,
is_low ? ir.Imm32(~0U) : thread_mask,
is_low ? thread_mask : ir.Imm32(0))};
const IR::U32 masked_value = ir.BitwiseAnd(src0, mask);
const IR::U32 result = ir.IAdd(src1, ir.BitCount(masked_value));
SetDst(inst.dst[0], result);
}
} // namespace Shader::Gcn

View file

@ -373,6 +373,10 @@ U32 IREmitter::LaneId() {
return Inst<U32>(Opcode::LaneId);
}
U32 IREmitter::WarpId() {
return Inst<U32>(Opcode::WarpId);
}
U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
return Inst<U32>(Opcode::QuadShuffle, value, index);
}

View file

@ -95,6 +95,7 @@ public:
BufferInstInfo info);
[[nodiscard]] U32 LaneId();
[[nodiscard]] U32 WarpId();
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);

View file

@ -326,4 +326,5 @@ OPCODE(ImageAtomicExchange32, U32, Opaq
// Warp operations
OPCODE(LaneId, U32, )
OPCODE(WarpId, U32, )
OPCODE(QuadShuffle, U32, U32, U32 )

View file

@ -9,6 +9,7 @@ namespace Shader {
struct Profile {
u32 supported_spirv{0x00010000};
u32 subgroup_size{};
bool unified_descriptor_binding{};
bool support_descriptor_aliasing{};
bool support_int8{};

View file

@ -28,7 +28,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
}
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
std::span<const u32> token, const Info&& info) {
std::span<const u32> token, const Info&& info, const Profile& profile) {
// Ensure first instruction is expected.
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
@ -49,7 +49,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
// Structurize control flow graph and create program.
program.info = std::move(info);
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile);
program.blocks = GenerateBlocks(program.syntax_list);
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());

View file

@ -9,8 +9,11 @@
namespace Shader {
struct Profile;
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
ObjectPool<IR::Block>& block_pool,
std::span<const u32> code, const Info&& info);
std::span<const u32> code, const Info&& info,
const Profile& profile);
} // namespace Shader

View file

@ -167,7 +167,10 @@ bool Instance::CreateDevice() {
const vk::StructureChain properties_chain =
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT,
vk::PhysicalDeviceVulkan11Properties>();
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
features = feature_chain.get().features;
if (available_extensions.empty()) {

View file

@ -188,6 +188,11 @@ public:
return properties.limits.nonCoherentAtomSize;
}
/// Returns the subgroup size of the selected physical device.
u32 SubgroupSize() const {
return subgroup_size;
}
/// Returns the maximum supported elements in a texel buffer
u32 MaxTexelBufferElements() const {
return properties.limits.maxTexelBufferElements;
@ -249,6 +254,7 @@ private:
bool workgroup_memory_explicit_layout{};
bool color_write_en{};
u64 min_imported_host_pointer_alignment{};
u32 subgroup_size{};
bool tooling_info{};
bool debug_utils_supported{};
bool has_nsight_graphics{};

View file

@ -109,6 +109,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
profile = Shader::Profile{
.supported_spirv = 0x00010600U,
.subgroup_size = instance.SubgroupSize(),
.support_explicit_workgroup_layout = true,
};
}
@ -268,7 +269,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
info.pgm_base = pgm->Address<uintptr_t>();
info.pgm_hash = hash;
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info),
profile);
// Compile IR to SPIR-V
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
@ -308,7 +310,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
Shader::Info info =
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
info.pgm_base = cs_pgm.Address<uintptr_t>();
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info),
profile);
// Compile IR to SPIR-V
u32 binding{};