Implemented Legacy Max/Min using NMax/NMin

This commit is contained in:
Nokk 2024-07-08 10:08:42 +10:00
parent c279e9e6f4
commit 86f403aaf9
7 changed files with 44 additions and 11 deletions

View file

@ -51,7 +51,11 @@ Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c) {
return Decorate(ctx, inst, ctx.OpFma(ctx.F64[1], a, b, c));
}
Id EmitFPMax32(EmitContext& ctx, Id a, Id b) {
Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy) {
if (is_legacy) {
return ctx.OpNMax(ctx.F32[1], a, b);
}
return ctx.OpFMax(ctx.F32[1], a, b);
}
@ -59,7 +63,12 @@ Id EmitFPMax64(EmitContext& ctx, Id a, Id b) {
return ctx.OpFMax(ctx.F64[1], a, b);
}
Id EmitFPMin32(EmitContext& ctx, Id a, Id b) {
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy) {
if (is_legacy)
{
return ctx.OpNMin(ctx.F32[1], a, b);
}
return ctx.OpFMin(ctx.F32[1], a, b);
}

View file

@ -165,9 +165,9 @@ Id EmitFPSub32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPFma16(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPFma32(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPFma64(EmitContext& ctx, IR::Inst* inst, Id a, Id b, Id c);
Id EmitFPMax32(EmitContext& ctx, Id a, Id b);
Id EmitFPMax32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMax64(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b);
Id EmitFPMin32(EmitContext& ctx, Id a, Id b, bool is_legacy = false);
Id EmitFPMin64(EmitContext& ctx, Id a, Id b);
Id EmitFPMul16(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
Id EmitFPMul32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);

View file

@ -625,6 +625,9 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
case Opcode::V_MIN3_F32:
translator.V_MIN3_F32(inst);
break;
case Opcode::V_MIN_LEGACY_F32:
translator.V_MIN_LEGACY_F32(inst);
break;
case Opcode::V_MADMK_F32:
translator.V_MADMK_F32(inst);
break;
@ -876,7 +879,7 @@ void Translate(IR::Block* block, u32 block_base, std::span<const GcnInst> inst_l
translator.V_MAD_F32(inst);
break;
case Opcode::V_MAX_LEGACY_F32:
translator.V_MAX_F32(inst);
translator.V_MAX_LEGACY_F32(inst);
break;
case Opcode::V_RSQ_LEGACY_F32:
case Opcode::V_RSQ_CLAMP_F32:

View file

@ -113,6 +113,7 @@ public:
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst);
void V_MAX_U32(bool is_signed, const GcnInst& inst);
void V_MAX_LEGACY_F32(const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst);
void V_SIN_F32(const GcnInst& inst);
void V_LOG_F32(const GcnInst& inst);
@ -120,6 +121,7 @@ public:
void V_SQRT_F32(const GcnInst& inst);
void V_MIN_F32(const GcnInst& inst);
void V_MIN3_F32(const GcnInst& inst);
void V_MIN_LEGACY_F32(const GcnInst& inst);
void V_MADMK_F32(const GcnInst& inst);
void V_CUBEMA_F32(const GcnInst& inst);
void V_CUBESC_F32(const GcnInst& inst);

View file

@ -215,6 +215,12 @@ void Translator::V_MAX_U32(bool is_signed, const GcnInst& inst) {
SetDst(inst.dst[0], ir.IMax(src0, src1, is_signed));
}
void Translator::V_MAX_LEGACY_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPMax(src0, src1, true));
}
void Translator::V_RSQ_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
SetDst(inst.dst[0], ir.FPRecipSqrt(src0));
@ -253,6 +259,12 @@ void Translator::V_MIN3_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPMin(src0, ir.FPMin(src1, src2)));
}
void Translator::V_MIN_LEGACY_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};
SetDst(inst.dst[0], ir.FPMin(src0, src1, true));
}
void Translator::V_MADMK_F32(const GcnInst& inst) {
const IR::F32 src0{GetSrc(inst.src[0], true)};
const IR::F32 src1{GetSrc(inst.src[1], true)};

View file

@ -865,28 +865,35 @@ U1 IREmitter::FPUnordered(const F32F64& lhs, const F32F64& rhs) {
return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
}
F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs) {
F32F64 IREmitter::FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPMax32, lhs, rhs);
return Inst<F32>(Opcode::FPMax32, lhs, rhs, is_legacy);
case Type::F64:
if (is_legacy) {
UNREACHABLE_MSG("F64 cannot be used with LEGACY ops");
}
return Inst<F64>(Opcode::FPMax64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs) {
F32F64 IREmitter::FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy) {
if (lhs.Type() != rhs.Type()) {
UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F32:
return Inst<F32>(Opcode::FPMin32, lhs, rhs);
return Inst<F32>(Opcode::FPMin32, lhs, rhs, is_legacy);
case Type::F64:
if (is_legacy) {
UNREACHABLE_MSG("F64 cannot be used with LEGACY ops");
}
return Inst<F64>(Opcode::FPMin64, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());

View file

@ -149,8 +149,8 @@ public:
[[nodiscard]] U1 FPIsInf(const F32F64& value);
[[nodiscard]] U1 FPOrdered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] U1 FPUnordered(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs);
[[nodiscard]] F32F64 FPMax(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
[[nodiscard]] F32F64 FPMin(const F32F64& lhs, const F32F64& rhs, bool is_legacy = false);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] Value IAddCary(const U32& a, const U32& b);