diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 89c5c78a0..60782bb0e 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -137,6 +137,35 @@ bool IsImageInstruction(const IR::Inst& inst) { } } +IR::Value SwizzleVector(IR::IREmitter& ir, auto sharp, IR::Value texel) { + boost::container::static_vector comps; + for (u32 i = 0; i < 4; i++) { + switch (sharp.GetSwizzle(i)) { + case AmdGpu::CompSwizzle::Zero: + comps.emplace_back(ir.Imm32(0.f)); + break; + case AmdGpu::CompSwizzle::One: + comps.emplace_back(ir.Imm32(1.f)); + break; + case AmdGpu::CompSwizzle::Red: + comps.emplace_back(ir.CompositeExtract(texel, 0)); + break; + case AmdGpu::CompSwizzle::Green: + comps.emplace_back(ir.CompositeExtract(texel, 1)); + break; + case AmdGpu::CompSwizzle::Blue: + comps.emplace_back(ir.CompositeExtract(texel, 2)); + break; + case AmdGpu::CompSwizzle::Alpha: + comps.emplace_back(ir.CompositeExtract(texel, 3)); + break; + default: + UNREACHABLE(); + } + } + return ir.CompositeConstruct(comps[0], comps[1], comps[2], comps[3]); +}; + class Descriptors { public: explicit Descriptors(Info& info_) @@ -388,6 +417,15 @@ void PatchTextureBufferInstruction(IR::Block& block, IR::Inst& inst, Info& info, IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.SetArg(0, ir.Imm32(binding)); ASSERT(!buffer.swizzle_enable && !buffer.add_tid_enable); + + // Apply dst_sel swizzle on formatted buffer instructions + if (inst.GetOpcode() == IR::Opcode::StoreBufferFormatF32) { + inst.SetArg(2, SwizzleVector(ir, buffer, inst.Arg(2))); + } else { + const auto inst_info = inst.Flags(); + const auto texel = ir.LoadBufferFormat(inst.Arg(0), inst.Arg(1), inst_info); + inst.ReplaceUsesWith(SwizzleVector(ir, buffer, texel)); + } } IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value& t, @@ -711,6 +749,10 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip }(); inst.SetArg(1, coords); + if (inst.GetOpcode() == IR::Opcode::ImageWrite && !image.CanSwizzleWithFormat()) { + inst.SetArg(2, SwizzleVector(ir, image, inst.Arg(2))); + } + if (inst_info.has_lod) { ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch); ASSERT(image.GetType() != AmdGpu::ImageType::Color2DMsaa && diff --git a/src/shader_recompiler/specialization.h b/src/shader_recompiler/specialization.h index 2a3bd62f4..205ca9c75 100644 --- a/src/shader_recompiler/specialization.h +++ b/src/shader_recompiler/specialization.h @@ -31,6 +31,7 @@ struct BufferSpecialization { struct TextureBufferSpecialization { bool is_integer = false; + u32 dst_select = 0; auto operator<=>(const TextureBufferSpecialization&) const = default; }; @@ -38,8 +39,12 @@ struct TextureBufferSpecialization { struct ImageSpecialization { AmdGpu::ImageType type = AmdGpu::ImageType::Color2D; bool is_integer = false; + u32 dst_select = 0; - auto operator<=>(const ImageSpecialization&) const = default; + bool operator==(const ImageSpecialization& other) const { + return type == other.type && is_integer == other.is_integer && + (dst_select != 0 ? dst_select == other.dst_select : true); + } }; struct FMaskSpecialization { @@ -96,11 +101,15 @@ struct StageSpecialization { ForEachSharp(binding, tex_buffers, info->texture_buffers, [](auto& spec, const auto& desc, AmdGpu::Buffer sharp) { spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + spec.dst_select = sharp.DstSelect(); }); ForEachSharp(binding, images, info->images, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { spec.type = sharp.GetBoundType(); spec.is_integer = AmdGpu::IsInteger(sharp.GetNumberFmt()); + if (desc.is_storage && !sharp.CanSwizzleWithFormat()) { + spec.dst_select = sharp.DstSelect(); + } }); ForEachSharp(binding, fmasks, info->fmasks, [](auto& spec, const auto& desc, AmdGpu::Image sharp) { diff --git a/src/video_core/amdgpu/resource.h b/src/video_core/amdgpu/resource.h index ba87425f2..9b10e0d60 100644 --- a/src/video_core/amdgpu/resource.h +++ b/src/video_core/amdgpu/resource.h @@ -52,6 +52,10 @@ struct Buffer { return std::memcmp(this, &other, sizeof(Buffer)) == 0; } + u32 DstSelect() const { + return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); + } + CompSwizzle GetSwizzle(u32 comp) const noexcept { const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; return static_cast(select[comp]); @@ -204,6 +208,19 @@ struct Image { return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9); } + bool CanSwizzleWithFormat() const { + // BGRA + if (DstSelect() == 0b111100101110 && GetDataFmt() == DataFormat::Format8_8_8_8) { + return true; + } + return false; + } + + CompSwizzle GetSwizzle(u32 comp) const noexcept { + const std::array select{dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w}; + return static_cast(select[comp]); + } + static char SelectComp(u32 sel) { switch (sel) { case 0: diff --git a/src/video_core/texture_cache/image_view.cpp b/src/video_core/texture_cache/image_view.cpp index 12ad201d1..a2d65fbca 100644 --- a/src/video_core/texture_cache/image_view.cpp +++ b/src/video_core/texture_cache/image_view.cpp @@ -124,12 +124,7 @@ ImageViewInfo::ImageViewInfo(const AmdGpu::Image& image, const Shader::ImageReso const u32 num_comps = AmdGpu::NumComponents(image.GetDataFmt()); const u32 dst_sel = image.DstSelect(); if (is_storage && !IsIdentityMapping(dst_sel, num_comps)) { - if (auto new_format = TrySwizzleFormat(format, dst_sel); new_format != format) { - format = new_format; - return; - } - LOG_ERROR(Render_Vulkan, "Storage image (num_comps = {}) requires swizzling {}", num_comps, - image.DstSelectName()); + format = TrySwizzleFormat(format, dst_sel); } }