mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-04-21 03:54:45 +00:00
shader_recompiler: Constant propagation pass for cmp_class_f32
This commit is contained in:
parent
7044cbcc99
commit
45db60dfd4
8 changed files with 56 additions and 21 deletions
|
@ -385,4 +385,8 @@ Id EmitFPIsInf64(EmitContext& ctx, Id value) {
|
|||
return ctx.OpIsInf(ctx.U1[1], value);
|
||||
}
|
||||
|
||||
void EmitFPCmpClass32(EmitContext&) {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
|
|
@ -42,6 +42,7 @@ void EmitSetVcc(EmitContext& ctx);
|
|||
void EmitSetSccLo(EmitContext& ctx);
|
||||
void EmitSetVccLo(EmitContext& ctx);
|
||||
void EmitSetVccHi(EmitContext& ctx);
|
||||
void EmitFPCmpClass32(EmitContext& ctx);
|
||||
void EmitPrologue(EmitContext& ctx);
|
||||
void EmitEpilogue(EmitContext& ctx);
|
||||
void EmitDiscard(EmitContext& ctx);
|
||||
|
|
|
@ -857,36 +857,27 @@ void Translator::V_CVT_FLR_I32_F32(const GcnInst& inst) {
|
|||
}
|
||||
|
||||
void Translator::V_CMP_CLASS_F32(const GcnInst& inst) {
|
||||
constexpr u32 SIGNALING_NAN = 1 << 0;
|
||||
constexpr u32 QUIET_NAN = 1 << 1;
|
||||
constexpr u32 NEGATIVE_INFINITY = 1 << 2;
|
||||
constexpr u32 NEGATIVE_NORMAL = 1 << 3;
|
||||
constexpr u32 NEGATIVE_DENORM = 1 << 4;
|
||||
constexpr u32 NEGATIVE_ZERO = 1 << 5;
|
||||
constexpr u32 POSITIVE_ZERO = 1 << 6;
|
||||
constexpr u32 POSITIVE_DENORM = 1 << 7;
|
||||
constexpr u32 POSITIVE_NORMAL = 1 << 8;
|
||||
constexpr u32 POSITIVE_INFINITY = 1 << 9;
|
||||
|
||||
const IR::F32F64 src0{GetSrc(inst.src[0])};
|
||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||
IR::U1 value;
|
||||
if (src1.IsImmediate()) {
|
||||
const u32 class_mask = src1.U32();
|
||||
IR::U1 value;
|
||||
if ((class_mask & (SIGNALING_NAN | QUIET_NAN)) == (SIGNALING_NAN | QUIET_NAN)) {
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(src1.U32());
|
||||
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
||||
value = ir.FPIsNan(src0);
|
||||
} else if ((class_mask & (POSITIVE_INFINITY | NEGATIVE_INFINITY)) ==
|
||||
(POSITIVE_INFINITY | NEGATIVE_INFINITY)) {
|
||||
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
||||
value = ir.FPIsInf(src0);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
if (inst.dst[1].field == OperandField::VccLo) {
|
||||
return ir.SetVcc(value);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else {
|
||||
// We don't know the type yet, delay its resolution.
|
||||
value = ir.FPCmpClass32(src0, src1);
|
||||
}
|
||||
|
||||
switch (inst.dst[1].field) {
|
||||
case OperandField::VccLo:
|
||||
return ir.SetVcc(value);
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -876,6 +876,10 @@ U1 IREmitter::FPIsInf(const F32F64& value) {
|
|||
}
|
||||
}
|
||||
|
||||
U1 IREmitter::FPCmpClass32(const F32& value, const U32& op) {
|
||||
return Inst<U1>(Opcode::FPCmpClass32, value, op);
|
||||
}
|
||||
|
||||
U1 IREmitter::FPOrdered(const F32F64& lhs, const F32F64& rhs) {
|
||||
if (lhs.Type() != rhs.Type()) {
|
||||
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
|
||||
|
|
|
@ -150,6 +150,7 @@ public:
|
|||
[[nodiscard]] U1 FPGreaterThan(const F32F64& lhs, const F32F64& rhs, bool ordered = true);
|
||||
[[nodiscard]] U1 FPIsNan(const F32F64& value);
|
||||
[[nodiscard]] U1 FPIsInf(const F32F64& value);
|
||||
[[nodiscard]] U1 FPCmpClass32(const F32& value, const U32& op);
|
||||
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
|
||||
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
|
||||
|
|
|
@ -219,6 +219,7 @@ OPCODE(FPIsNan32, U1, F32,
|
|||
OPCODE(FPIsNan64, U1, F64, )
|
||||
OPCODE(FPIsInf32, U1, F32, )
|
||||
OPCODE(FPIsInf64, U1, F64, )
|
||||
OPCODE(FPCmpClass32, U1, F32, U32 )
|
||||
|
||||
// Integer operations
|
||||
OPCODE(IAdd32, U32, U32, U32, )
|
||||
|
|
|
@ -238,6 +238,18 @@ void FoldBooleanConvert(IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void FoldCmpClass(IR::Inst& inst) {
|
||||
ASSERT_MSG(inst.Arg(1).IsImmediate(), "Unable to resolve compare operation");
|
||||
const auto class_mask = static_cast<IR::FloatClassFunc>(inst.Arg(1).U32());
|
||||
if ((class_mask & IR::FloatClassFunc::NaN) == IR::FloatClassFunc::NaN) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsNan32);
|
||||
} else if ((class_mask & IR::FloatClassFunc::Infinity) == IR::FloatClassFunc::Infinity) {
|
||||
inst.ReplaceOpcode(IR::Opcode::FPIsInf32);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
|
||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::IAdd32:
|
||||
|
@ -251,6 +263,9 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::IMul32:
|
||||
FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a * b; });
|
||||
return;
|
||||
case IR::Opcode::FPCmpClass32:
|
||||
FoldCmpClass(inst);
|
||||
return;
|
||||
case IR::Opcode::ShiftRightArithmetic32:
|
||||
FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return static_cast<u32>(a >> b); });
|
||||
return;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/bit_field.h"
|
||||
#include "common/enum.h"
|
||||
#include "common/types.h"
|
||||
#include "video_core/amdgpu/pixel_format.h"
|
||||
|
||||
|
@ -24,6 +25,23 @@ enum class FpDenormMode : u32 {
|
|||
InOutAllow = 3,
|
||||
};
|
||||
|
||||
enum class FloatClassFunc : u32 {
|
||||
SignalingNan = 1 << 0,
|
||||
QuietNan = 1 << 1,
|
||||
NegativeInfinity = 1 << 2,
|
||||
NegativeNormal = 1 << 3,
|
||||
NegativeDenorm = 1 << 4,
|
||||
NegativeZero = 1 << 5,
|
||||
PositiveZero = 1 << 6,
|
||||
PositiveDenorm = 1 << 7,
|
||||
PositiveNormal = 1 << 8,
|
||||
PositiveInfinity = 1 << 9,
|
||||
|
||||
NaN = SignalingNan | QuietNan,
|
||||
Infinity = PositiveInfinity | NegativeInfinity,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(FloatClassFunc)
|
||||
|
||||
union Mode {
|
||||
BitField<0, 4, FpRoundMode> fp_round;
|
||||
BitField<4, 2, FpDenormMode> fp_denorm_single;
|
||||
|
|
Loading…
Add table
Reference in a new issue