mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 03:54:45 +00:00
vector_alu: Proper V_MBCNT_U32_B32
This commit is contained in:
parent
d5383c8c34
commit
5e35a30607
18 changed files with 91 additions and 24 deletions
|
@ -390,6 +390,7 @@ Id EmitImageAtomicXor32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
|||
Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value);
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx);
|
||||
Id EmitWarpId(EmitContext& ctx);
|
||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -10,6 +10,10 @@ Id SubgroupScope(EmitContext& ctx) {
|
|||
return ctx.ConstU32(static_cast<u32>(spv::Scope::Subgroup));
|
||||
}
|
||||
|
||||
Id EmitWarpId(EmitContext& ctx) {
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_id);
|
||||
}
|
||||
|
||||
Id EmitLaneId(EmitContext& ctx) {
|
||||
return ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id);
|
||||
}
|
||||
|
|
|
@ -127,6 +127,7 @@ Id GetAttributeType(EmitContext& ctx, AmdGpu::NumberFormat fmt) {
|
|||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return ctx.F32[4];
|
||||
case AmdGpu::NumberFormat::Sint:
|
||||
return ctx.S32[4];
|
||||
|
@ -147,6 +148,7 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f
|
|||
case AmdGpu::NumberFormat::Float:
|
||||
case AmdGpu::NumberFormat::Unorm:
|
||||
case AmdGpu::NumberFormat::Snorm:
|
||||
case AmdGpu::NumberFormat::SnormNz:
|
||||
return {id, input_f32, F32[1], 4};
|
||||
case AmdGpu::NumberFormat::Uint:
|
||||
return {id, input_u32, U32[1], 4};
|
||||
|
@ -223,11 +225,10 @@ void EmitContext::DefineInputs(const Info& info) {
|
|||
break;
|
||||
}
|
||||
case Stage::Fragment:
|
||||
if (info.uses_group_quad) {
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
}
|
||||
subgroup_id = DefineVariable(U32[1], spv::BuiltIn::SubgroupId, spv::StorageClass::Input);
|
||||
subgroup_local_invocation_id = DefineVariable(
|
||||
U32[1], spv::BuiltIn::SubgroupLocalInvocationId, spv::StorageClass::Input);
|
||||
Decorate(subgroup_local_invocation_id, spv::Decoration::Flat);
|
||||
frag_coord = DefineVariable(F32[4], spv::BuiltIn::FragCoord, spv::StorageClass::Input);
|
||||
frag_depth = DefineVariable(F32[1], spv::BuiltIn::FragDepth, spv::StorageClass::Output);
|
||||
front_facing = DefineVariable(U1[1], spv::BuiltIn::FrontFacing, spv::StorageClass::Input);
|
||||
|
|
|
@ -180,6 +180,7 @@ public:
|
|||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
Id subgroup_id{};
|
||||
Id subgroup_local_invocation_id{};
|
||||
Id image_u32{};
|
||||
|
||||
|
|
|
@ -600,13 +600,13 @@ public:
|
|||
TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_,
|
||||
ObjectPool<Statement>& stmt_pool_, Statement& root_stmt,
|
||||
IR::AbstractSyntaxList& syntax_list_, std::span<const GcnInst> inst_list_,
|
||||
Info& info_)
|
||||
Info& info_, const Profile& profile_)
|
||||
: stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_},
|
||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_} {
|
||||
syntax_list{syntax_list_}, inst_list{inst_list_}, info{info_}, profile{profile_} {
|
||||
Visit(root_stmt, nullptr, nullptr);
|
||||
|
||||
IR::Block& first_block{*syntax_list.front().data.block};
|
||||
Translator{&first_block, info}.EmitPrologue();
|
||||
Translator{&first_block, info, profile}.EmitPrologue();
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -635,7 +635,7 @@ private:
|
|||
const u32 start = stmt.block->begin_index;
|
||||
const u32 size = stmt.block->end_index - start + 1;
|
||||
Translate(current_block, stmt.block->begin, inst_list.subspan(start, size),
|
||||
info);
|
||||
info, profile);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -815,16 +815,17 @@ private:
|
|||
const Block dummy_flow_block{.is_dummy = true};
|
||||
std::span<const GcnInst> inst_list;
|
||||
Info& info;
|
||||
const Profile& profile;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
CFG& cfg, Info& info) {
|
||||
CFG& cfg, Info& info, const Profile& profile) {
|
||||
ObjectPool<Statement> stmt_pool{64};
|
||||
GotoPass goto_pass{cfg, stmt_pool};
|
||||
Statement& root{goto_pass.RootStatement()};
|
||||
IR::AbstractSyntaxList syntax_list;
|
||||
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info};
|
||||
TranslatePass{inst_pool, block_pool, stmt_pool, root, syntax_list, cfg.inst_list, info, profile};
|
||||
ASSERT_MSG(!info.translation_failed, "Shader translation has failed");
|
||||
return syntax_list;
|
||||
}
|
||||
|
|
|
@ -11,12 +11,13 @@
|
|||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
struct Profile;
|
||||
}
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
[[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool, CFG& cfg,
|
||||
Info& info);
|
||||
Info& info, const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
namespace Shader::Gcn {
|
||||
|
||||
Translator::Translator(IR::Block* block_, Info& info_)
|
||||
: ir{*block_, block_->begin()}, info{info_} {}
|
||||
Translator::Translator(IR::Block* block_, Info& info_, const Profile& profile_)
|
||||
: ir{*block_, block_->begin()}, info{info_}, profile{profile_} {}
|
||||
|
||||
void Translator::EmitPrologue() {
|
||||
ir.Prologue();
|
||||
|
@ -487,11 +487,12 @@ void Translator::LogMissingOpcode(const GcnInst& inst) {
|
|||
info.translation_failed = true;
|
||||
}
|
||||
|
||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list, Info& info) {
|
||||
void Translate(IR::Block* block, u32 pc, std::span<const GcnInst> inst_list,
|
||||
Info& info, const Profile& profile) {
|
||||
if (inst_list.empty()) {
|
||||
return;
|
||||
}
|
||||
Translator translator{block, info};
|
||||
Translator translator{block, info, profile};
|
||||
for (const auto& inst : inst_list) {
|
||||
pc += inst.length;
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
namespace Shader {
|
||||
struct Info;
|
||||
struct Profile;
|
||||
}
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
@ -53,7 +54,7 @@ enum class NegateMode : u32 {
|
|||
|
||||
class Translator {
|
||||
public:
|
||||
explicit Translator(IR::Block* block_, Info& info);
|
||||
explicit Translator(IR::Block* block_, Info& info, const Profile& profile);
|
||||
|
||||
// Instruction categories
|
||||
void EmitPrologue();
|
||||
|
@ -176,6 +177,7 @@ public:
|
|||
void V_CVT_FLR_I32_F32(const GcnInst& inst);
|
||||
void V_CMP_CLASS_F32(const GcnInst& inst);
|
||||
void V_FFBL_B32(const GcnInst& inst);
|
||||
void V_MBCNT_U32_B32(bool is_low, const GcnInst& inst);
|
||||
|
||||
// Vector Memory
|
||||
void BUFFER_LOAD_FORMAT(u32 num_dwords, bool is_typed, bool is_format, const GcnInst& inst);
|
||||
|
@ -216,9 +218,11 @@ private:
|
|||
private:
|
||||
IR::IREmitter ir;
|
||||
Info& info;
|
||||
const Profile& profile;
|
||||
bool opcode_missing = false;
|
||||
};
|
||||
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list, Info& info);
|
||||
void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_list,
|
||||
Info& info, const Profile& profile);
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/profile.h"
|
||||
#include "shader_recompiler/frontend/translate/translate.h"
|
||||
|
||||
namespace Shader::Gcn {
|
||||
|
@ -292,6 +293,11 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
|
|||
return V_CMP_U32(ConditionOp::GE, false, true, inst);
|
||||
case Opcode::V_CMPX_TRU_U32:
|
||||
return V_CMP_U32(ConditionOp::TRU, false, true, inst);
|
||||
|
||||
case Opcode::V_MBCNT_LO_U32_B32:
|
||||
return V_MBCNT_U32_B32(true, inst);
|
||||
case Opcode::V_MBCNT_HI_U32_B32:
|
||||
return V_MBCNT_U32_B32(false, inst);
|
||||
default:
|
||||
LogMissingOpcode(inst);
|
||||
}
|
||||
|
@ -910,4 +916,29 @@ void Translator::V_FFBL_B32(const GcnInst& inst) {
|
|||
SetDst(inst.dst[0], ir.FindILsb(src0));
|
||||
}
|
||||
|
||||
void Translator::V_MBCNT_U32_B32(bool is_low, const GcnInst& inst) {
|
||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
const IR::U32 lane_id = ir.LaneId();
|
||||
|
||||
const auto [warp_half, mask_shift] = [&]() -> std::pair<IR::U32, IR::U32> {
|
||||
if (profile.subgroup_size == 32) {
|
||||
const IR::U32 warp_half = ir.BitwiseAnd(ir.WarpId(), ir.Imm32(1));
|
||||
return std::make_pair(warp_half, lane_id);
|
||||
}
|
||||
const IR::U32 warp_half = ir.ShiftRightLogical(lane_id, ir.Imm32(5));
|
||||
const IR::U32 mask_shift = ir.BitwiseAnd(lane_id, ir.Imm32(0x1F));
|
||||
return std::make_pair(warp_half, mask_shift);
|
||||
}();
|
||||
|
||||
const IR::U32 thread_mask = ir.ISub(ir.ShiftLeftLogical(ir.Imm32(1), mask_shift), ir.Imm32(1));
|
||||
const IR::U1 is_odd_warp = ir.INotEqual(warp_half, ir.Imm32(0));
|
||||
const IR::U32 mask = IR::U32{ir.Select(is_odd_warp,
|
||||
is_low ? ir.Imm32(~0U) : thread_mask,
|
||||
is_low ? thread_mask : ir.Imm32(0))};
|
||||
const IR::U32 masked_value = ir.BitwiseAnd(src0, mask);
|
||||
const IR::U32 result = ir.IAdd(src1, ir.BitCount(masked_value));
|
||||
SetDst(inst.dst[0], result);
|
||||
}
|
||||
|
||||
} // namespace Shader::Gcn
|
||||
|
|
|
@ -373,6 +373,10 @@ U32 IREmitter::LaneId() {
|
|||
return Inst<U32>(Opcode::LaneId);
|
||||
}
|
||||
|
||||
U32 IREmitter::WarpId() {
|
||||
return Inst<U32>(Opcode::WarpId);
|
||||
}
|
||||
|
||||
U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
|
||||
return Inst<U32>(Opcode::QuadShuffle, value, index);
|
||||
}
|
||||
|
|
|
@ -95,6 +95,7 @@ public:
|
|||
BufferInstInfo info);
|
||||
|
||||
[[nodiscard]] U32 LaneId();
|
||||
[[nodiscard]] U32 WarpId();
|
||||
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
|
||||
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
|
||||
|
|
|
@ -326,4 +326,5 @@ OPCODE(ImageAtomicExchange32, U32, Opaq
|
|||
|
||||
// Warp operations
|
||||
OPCODE(LaneId, U32, )
|
||||
OPCODE(WarpId, U32, )
|
||||
OPCODE(QuadShuffle, U32, U32, U32 )
|
||||
|
|
|
@ -9,6 +9,7 @@ namespace Shader {
|
|||
|
||||
struct Profile {
|
||||
u32 supported_spirv{0x00010000};
|
||||
u32 subgroup_size{};
|
||||
bool unified_descriptor_binding{};
|
||||
bool support_descriptor_aliasing{};
|
||||
bool support_int8{};
|
||||
|
|
|
@ -28,7 +28,7 @@ IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
|
|||
}
|
||||
|
||||
IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> token, const Info&& info) {
|
||||
std::span<const u32> token, const Info&& info, const Profile& profile) {
|
||||
// Ensure first instruction is expected.
|
||||
constexpr u32 token_mov_vcchi = 0xBEEB03FF;
|
||||
ASSERT_MSG(token[0] == token_mov_vcchi, "First instruction is not s_mov_b32 vcc_hi, #imm");
|
||||
|
@ -49,7 +49,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
|
||||
// Structurize control flow graph and create program.
|
||||
program.info = std::move(info);
|
||||
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info);
|
||||
program.syntax_list = Shader::Gcn::BuildASL(inst_pool, block_pool, cfg, program.info, profile);
|
||||
program.blocks = GenerateBlocks(program.syntax_list);
|
||||
program.post_order_blocks = Shader::IR::PostOrder(program.syntax_list.front());
|
||||
|
||||
|
|
|
@ -9,8 +9,11 @@
|
|||
|
||||
namespace Shader {
|
||||
|
||||
struct Profile;
|
||||
|
||||
[[nodiscard]] IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool,
|
||||
ObjectPool<IR::Block>& block_pool,
|
||||
std::span<const u32> code, const Info&& info);
|
||||
std::span<const u32> code, const Info&& info,
|
||||
const Profile& profile);
|
||||
|
||||
} // namespace Shader
|
||||
|
|
|
@ -167,7 +167,10 @@ bool Instance::CreateDevice() {
|
|||
const vk::StructureChain properties_chain =
|
||||
physical_device.getProperties2<vk::PhysicalDeviceProperties2,
|
||||
vk::PhysicalDevicePortabilitySubsetPropertiesKHR,
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT>();
|
||||
vk::PhysicalDeviceExternalMemoryHostPropertiesEXT,
|
||||
vk::PhysicalDeviceVulkan11Properties>();
|
||||
subgroup_size = properties_chain.get<vk::PhysicalDeviceVulkan11Properties>().subgroupSize;
|
||||
LOG_INFO(Render_Vulkan, "Physical device subgroup size {}", subgroup_size);
|
||||
|
||||
features = feature_chain.get().features;
|
||||
if (available_extensions.empty()) {
|
||||
|
|
|
@ -188,6 +188,11 @@ public:
|
|||
return properties.limits.nonCoherentAtomSize;
|
||||
}
|
||||
|
||||
/// Returns the subgroup size of the selected physical device.
|
||||
u32 SubgroupSize() const {
|
||||
return subgroup_size;
|
||||
}
|
||||
|
||||
/// Returns the maximum supported elements in a texel buffer
|
||||
u32 MaxTexelBufferElements() const {
|
||||
return properties.limits.maxTexelBufferElements;
|
||||
|
@ -249,6 +254,7 @@ private:
|
|||
bool workgroup_memory_explicit_layout{};
|
||||
bool color_write_en{};
|
||||
u64 min_imported_host_pointer_alignment{};
|
||||
u32 subgroup_size{};
|
||||
bool tooling_info{};
|
||||
bool debug_utils_supported{};
|
||||
bool has_nsight_graphics{};
|
||||
|
|
|
@ -109,6 +109,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
|
|||
pipeline_cache = instance.GetDevice().createPipelineCacheUnique({});
|
||||
profile = Shader::Profile{
|
||||
.supported_spirv = 0x00010600U,
|
||||
.subgroup_size = instance.SubgroupSize(),
|
||||
.support_explicit_workgroup_layout = true,
|
||||
};
|
||||
}
|
||||
|
@ -268,7 +269,8 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
Shader::Info info = MakeShaderInfo(stage, pgm->user_data, regs);
|
||||
info.pgm_base = pgm->Address<uintptr_t>();
|
||||
info.pgm_hash = hash;
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
programs[i] = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info),
|
||||
profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
auto spv_code = Shader::Backend::SPIRV::EmitSPIRV(profile, programs[i], binding);
|
||||
|
@ -308,7 +310,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline() {
|
|||
Shader::Info info =
|
||||
MakeShaderInfo(Shader::Stage::Compute, cs_pgm.user_data, liverpool->regs);
|
||||
info.pgm_base = cs_pgm.Address<uintptr_t>();
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info));
|
||||
auto program = Shader::TranslateProgram(inst_pool, block_pool, code, std::move(info),
|
||||
profile);
|
||||
|
||||
// Compile IR to SPIR-V
|
||||
u32 binding{};
|
||||
|
|
Loading…
Add table
Reference in a new issue