shader_recompiler: Constant propagation pass for cmp_class_f32

2025-04-21 03:54:45 +00:00 · 2024-07-30 00:48:41 +03:00 · 2024-07-30 00:48:41 +03:00 · 45db60dfd4
commit 45db60dfd4
parent 7044cbcc99
8 changed files with 56 additions and 21 deletions
--- a/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_floating_point.cpp
@ -385,4 +385,8 @@ Id EmitFPIsInf64(EmitContext& ctx, Id value) {
    return ctx.OpIsInf(ctx.U1[1], value);
 }

+void EmitFPCmpClass32(EmitContext&) {
+    UNREACHABLE();
+}
+
 } // namespace Shader::Backend::SPIRV
--- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
+++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h
@ -42,6 +42,7 @@ void EmitSetVcc(EmitContext& ctx);
 void EmitSetSccLo(EmitContext& ctx);
 void EmitSetVccLo(EmitContext& ctx);
 void EmitSetVccHi(EmitContext& ctx);
+void EmitFPCmpClass32(EmitContext& ctx);
 void EmitPrologue(EmitContext& ctx);
 void EmitEpilogue(EmitContext& ctx);
 void EmitDiscard(EmitContext& ctx);
--- a/src/shader_recompiler/frontend/translate/vector_alu.cpp
+++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp
@ -857,36 +857,27 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
 }

 void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
-    constexpr u32 SIGNALING_NAN = 1 << 0;
-    constexpr u32 QUIET_NAN = 1 << 1;
-    constexpr u32 NEGATIVE_INFINITY = 1 << 2;
-    constexpr u32 NEGATIVE_NORMAL = 1 << 3;
-    constexpr u32 NEGATIVE_DENORM = 1 << 4;
-    constexpr u32 NEGATIVE_ZERO = 1 << 5;
-    constexpr u32 POSITIVE_ZERO = 1 << 6;
-    constexpr u32 POSITIVE_DENORM = 1 << 7;
-    constexpr u32 POSITIVE_NORMAL = 1 << 8;
-    constexpr u32 POSITIVE_INFINITY = 1 << 9;
-
    const IR::F32F64 src0{GetSrc(inst.src[0])};
    const IR::U32 src1{GetSrc(inst.src[1])};
+    IR::U1 value;
    if (src1.IsImmediate()) {
-        const u32 class_mask = src1.U32();
-        IR::U1 value;
-        if ((class_mask & (SIGNALING_NAN | QUIET_NAN)) == (SIGNALING_NAN | QUIET_NAN)) {
+        const auto class_mask = static_cast<IR::FloatClassFunc>(src1.U32());
+        if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
            value = ir.FPIsNan(src0);
-        } else if ((class_mask & (POSITIVE_INFINITY | NEGATIVE_INFINITY)) ==
-                   (POSITIVE_INFINITY | NEGATIVE_INFINITY)) {
+        } else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
            value = ir.FPIsInf(src0);
        } else {
            UNREACHABLE();
        }
-        if (inst.dst[1].field == OperandField::VccLo) {
-            return ir.SetVcc(value);
-        } else {
-            UNREACHABLE();
-        }
    } else {
+        // We don't know the type yet, delay its resolution.
+        value = ir.FPCmpClass32(src0, src1);
+    }
+
+    switch (inst.dst[1].field) {
+    case OperandField::VccLo:
+        return ir.SetVcc(value);
+    default:
        UNREACHABLE();
    }
 }
--- a/src/shader_recompiler/ir/ir_emitter.cpp
+++ b/src/shader_recompiler/ir/ir_emitter.cpp
@ -876,6 +876,10 @@ U1 IREmitter::FPIsInf(const F32F64& value) {
    }
 }

+U1 IREmitter::FPCmpClass32(const F32& value, const U32& op) {
+    return Inst<U1>(Opcode::FPCmpClass32, value, op);
+}
+
 U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
    if (lhs.Type() != rhs.Type()) {
        UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
--- a/src/shader_recompiler/ir/ir_emitter.h
+++ b/src/shader_recompiler/ir/ir_emitter.h
@ -150,6 +150,7 @@ public:
    [[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
    [[nodiscard]] U1 FPIsNan(const F32F64& value);
    [[nodiscard]] U1 FPIsInf(const F32F64& value);
+    [[nodiscard]] U1 FPCmpClass32(const F32& value, const U32& op);
    [[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
    [[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
    [[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
--- a/src/shader_recompiler/ir/opcodes.inc
+++ b/src/shader_recompiler/ir/opcodes.inc
@ -219,6 +219,7 @@ OPCODE(FPIsNan32,                                           U1,             F32,
 OPCODE(FPIsNan64,                                           U1,             F64,                                                                            )
 OPCODE(FPIsInf32,                                           U1,             F32,                                                                            )
 OPCODE(FPIsInf64,                                           U1,             F64,                                                                            )
+OPCODE(FPCmpClass32,                                        U1,             F32,            U32                                                             )

 // Integer operations
 OPCODE(IAdd32,                                              U32,            U32,            U32,                                                            )
--- a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp
+++ b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp
@ -238,6 +238,18 @@ void FoldBooleanConvert(IR::Inst& inst) {
    }
 }

+void FoldCmpClass(IR::Inst& inst) {
+    ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
+    const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
+    if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
+        inst.ReplaceOpcode(IR::Opcode::FPIsNan32);
+    } else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
+        inst.ReplaceOpcode(IR::Opcode::FPIsInf32);
+    } else {
+        UNREACHABLE();
+    }
+}
+
 void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
    switch (inst.GetOpcode()) {
    case IR::Opcode::IAdd32:
@ -251,6 +263,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
    case IR::Opcode::IMul32:
        FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
        return;
+    case IR::Opcode::FPCmpClass32:
+        FoldCmpClass(inst);
+        return;
    case IR::Opcode::ShiftRightArithmetic32:
        FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
        return;
--- a/src/shader_recompiler/ir/reg.h
+++ b/src/shader_recompiler/ir/reg.h
@ -5,6 +5,7 @@

 #include "common/assert.h"
 #include "common/bit_field.h"
+#include "common/enum.h"
 #include "common/types.h"
 #include "video_core/amdgpu/pixel_format.h"

@ -24,6 +25,23 @@ enum class FpDenormMode : u32 {
    InOutAllow = 3,
 };

+enum class FloatClassFunc : u32 {
+    SignalingNan = 1 << 0,
+    QuietNan = 1 << 1,
+    NegativeInfinity = 1 << 2,
+    NegativeNormal = 1 << 3,
+    NegativeDenorm = 1 << 4,
+    NegativeZero = 1 << 5,
+    PositiveZero = 1 << 6,
+    PositiveDenorm = 1 << 7,
+    PositiveNormal = 1 << 8,
+    PositiveInfinity = 1 << 9,
+
+    NaN = SignalingNan | QuietNan,
+    Infinity = PositiveInfinity | NegativeInfinity,
+};
+DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc)
+
 union Mode {
    BitField<0, 4, FpRoundMode> fp_round;
    BitField<4, 2, FpDenormMode> fp_denorm_single;