From df257087d1c1f6189d47844ad6b9f2d42b298a3f Mon Sep 17 00:00:00 2001 From: Vinicius Rangel Date: Thu, 25 Jul 2024 20:33:59 -0300 Subject: [PATCH] shader recompiler: add V_MAD_U64_U32 vcc output - add V_MAD_U64_U32 vcc output - ILessThan for 64-bits --- .../backend/spirv/emit_spirv_instructions.h | 6 ++++-- .../backend/spirv/emit_spirv_integer.cpp | 12 ++++++++++-- .../frontend/translate/vector_alu.cpp | 12 ++++++++---- src/shader_recompiler/ir/ir_emitter.cpp | 14 ++++++++++++-- src/shader_recompiler/ir/ir_emitter.h | 2 +- src/shader_recompiler/ir/opcodes.inc | 6 ++++-- .../ir/passes/constant_propogation_pass.cpp | 10 ++++++++-- 7 files changed, 47 insertions(+), 15 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h index bb533e9cf..80dd66b16 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h +++ b/src/shader_recompiler/backend/spirv/emit_spirv_instructions.h @@ -288,8 +288,10 @@ Id EmitSMax32(EmitContext& ctx, Id a, Id b); Id EmitUMax32(EmitContext& ctx, Id a, Id b); Id EmitSClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max); -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs); -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs); +Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs); Id EmitIEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitSLessThanEqual(EmitContext& ctx, Id lhs, Id rhs); Id EmitULessThanEqual(EmitContext& ctx, Id lhs, Id rhs); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp index e238a693e..019ceb01b 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_integer.cpp @@ -242,11 +242,19 @@ Id EmitUClamp32(EmitContext& ctx, IR::Inst* inst, Id value, Id min, Id max) { return result; } -Id EmitSLessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan32(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpSLessThan(ctx.U1[1], lhs, rhs); } -Id EmitULessThan(EmitContext& ctx, Id lhs, Id rhs) { +Id EmitSLessThan64(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpSLessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThan32(EmitContext& ctx, Id lhs, Id rhs) { + return ctx.OpULessThan(ctx.U1[1], lhs, rhs); +} + +Id EmitULessThan64(EmitContext& ctx, Id lhs, Id rhs) { return ctx.OpULessThan(ctx.U1[1], lhs, rhs); } diff --git a/src/shader_recompiler/frontend/translate/vector_alu.cpp b/src/shader_recompiler/frontend/translate/vector_alu.cpp index 6e73c980f..629b3b15f 100644 --- a/src/shader_recompiler/frontend/translate/vector_alu.cpp +++ b/src/shader_recompiler/frontend/translate/vector_alu.cpp @@ -325,11 +325,15 @@ void Translator::V_MAD_U64_U32(const GcnInst& inst) { const auto src1 = GetSrc(inst.src[1]); const auto src2 = GetSrc64(inst.src[2]); - IR::U64 result; - result = ir.IMul(src0, src1); - result = ir.IAdd(ir.UConvert(64, result), src2); + IR::U64 mul_result = ir.UConvert(64, ir.IMul(src0, src1)); + IR::U64 sum_result = ir.IAdd(mul_result, src2); - SetDst64(inst.dst[0], result); + SetDst64(inst.dst[0], sum_result); + + IR::U1 less_src0 = ir.ILessThan(sum_result, mul_result, false); + IR::U1 less_src1 = ir.ILessThan(sum_result, src2, false); + IR::U1 did_overflow = ir.LogicalOr(less_src0, less_src1); + ir.SetVcc(did_overflow); } void Translator::V_CMP_U32(ConditionOp op, bool is_signed, bool set_exec, const GcnInst& inst) { diff --git a/src/shader_recompiler/ir/ir_emitter.cpp b/src/shader_recompiler/ir/ir_emitter.cpp index d203bd616..0d81b3003 100644 --- a/src/shader_recompiler/ir/ir_emitter.cpp +++ b/src/shader_recompiler/ir/ir_emitter.cpp @@ -1115,8 +1115,18 @@ U32 IREmitter::UClamp(const U32& value, const U32& min, const U32& max) { return Inst(Opcode::UClamp32, value, min, max); } -U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) { - return Inst(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs); +U1 IREmitter::ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed) { + if (lhs.Type() != rhs.Type()) { + UNREACHABLE_MSG("Mismatching types {} and {}", lhs.Type(), rhs.Type()); + } + switch (lhs.Type()) { + case Type::U32: + return Inst(is_signed ? Opcode::SLessThan32 : Opcode::ULessThan32, lhs, rhs); + case Type::U64: + return Inst(is_signed ? Opcode::SLessThan64 : Opcode::ULessThan64, lhs, rhs); + default: + ThrowInvalidType(lhs.Type()); + } } U1 IREmitter::IEqual(const U32U64& lhs, const U32U64& rhs) { diff --git a/src/shader_recompiler/ir/ir_emitter.h b/src/shader_recompiler/ir/ir_emitter.h index 423f7d59d..7ee4e8240 100644 --- a/src/shader_recompiler/ir/ir_emitter.h +++ b/src/shader_recompiler/ir/ir_emitter.h @@ -188,7 +188,7 @@ public: [[nodiscard]] U32 SClamp(const U32& value, const U32& min, const U32& max); [[nodiscard]] U32 UClamp(const U32& value, const U32& min, const U32& max); - [[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed); + [[nodiscard]] U1 ILessThan(const U32U64& lhs, const U32U64& rhs, bool is_signed); [[nodiscard]] U1 IEqual(const U32U64& lhs, const U32U64& rhs); [[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed); [[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed); diff --git a/src/shader_recompiler/ir/opcodes.inc b/src/shader_recompiler/ir/opcodes.inc index eadb6e7b5..293d69d2e 100644 --- a/src/shader_recompiler/ir/opcodes.inc +++ b/src/shader_recompiler/ir/opcodes.inc @@ -260,8 +260,10 @@ OPCODE(SMax32, U32, U32, OPCODE(UMax32, U32, U32, U32, ) OPCODE(SClamp32, U32, U32, U32, U32, ) OPCODE(UClamp32, U32, U32, U32, U32, ) -OPCODE(SLessThan, U1, U32, U32, ) -OPCODE(ULessThan, U1, U32, U32, ) +OPCODE(SLessThan32, U1, U32, U32, ) +OPCODE(SLessThan64, U1, U64, U64, ) +OPCODE(ULessThan32, U1, U32, U32, ) +OPCODE(ULessThan64, U1, U64, U64, ) OPCODE(IEqual, U1, U32, U32, ) OPCODE(SLessThanEqual, U1, U32, U32, ) OPCODE(ULessThanEqual, U1, U32, U32, ) diff --git a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp index 7cd896fbd..e6fdc680d 100644 --- a/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp +++ b/src/shader_recompiler/ir/passes/constant_propogation_pass.cpp @@ -281,12 +281,18 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) { return FoldLogicalOr(inst); case IR::Opcode::LogicalNot: return FoldLogicalNot(inst); - case IR::Opcode::SLessThan: + case IR::Opcode::SLessThan32: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a < b; }); return; - case IR::Opcode::ULessThan: + case IR::Opcode::SLessThan64: + FoldWhenAllImmediates(inst, [](s64 a, s64 b) { return a < b; }); + return; + case IR::Opcode::ULessThan32: FoldWhenAllImmediates(inst, [](u32 a, u32 b) { return a < b; }); return; + case IR::Opcode::ULessThan64: + FoldWhenAllImmediates(inst, [](u64 a, u64 b) { return a < b; }); + return; case IR::Opcode::SLessThanEqual: FoldWhenAllImmediates(inst, [](s32 a, s32 b) { return a <= b; }); return;