From bbe271eec6735632a35aca7a87f38ad9800ab15c Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 08:57:35 +0200 Subject: [PATCH 01/32] JitArm64: Refactor CR bit manipulation code This brings JitArm64 more in line with Jit64, and makes the next commit easier to implement. No functional change. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 7 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 279 +++++++++--------- 2 files changed, 147 insertions(+), 139 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 0c1ea0d647..6cfbdb6a47 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -355,8 +355,13 @@ protected: Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG); void WriteBLRExit(Arm64Gen::ARM64Reg dest); - Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); + void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); + void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in); + void ClearCRFieldBit(int field, int bit); + void SetCRFieldBit(int field, int bit); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); + Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); + void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr); void UpdateRoundingMode(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index df4ea4931a..666103467c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -20,6 +20,142 @@ using namespace Arm64Gen; +void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) +{ + ARM64Reg CR = gpr.CR(field); + ARM64Reg WCR = EncodeRegTo32(CR); + + switch (bit) + { + case PowerPC::CR_SO_BIT: // check bit 59 set + UBFX(out, CR, PowerPC::CR_EMU_SO_BIT, 1); + if (negate) + EOR(out, out, LogicalImm(1, GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 + CMP(WCR, ARM64Reg::WZR); + CSET(out, negate ? CC_NEQ : CC_EQ); + break; + + case PowerPC::CR_GT_BIT: // check val > 0 + CMP(CR, ARM64Reg::ZR); + CSET(out, negate ? CC_LE : CC_GT); + break; + + case PowerPC::CR_LT_BIT: // check bit 62 set + UBFX(out, CR, PowerPC::CR_EMU_LT_BIT, 1); + if (negate) + EOR(out, out, LogicalImm(1, GPRSize::B64)); + break; + + default: + ASSERT_MSG(DYNA_REC, false, "Invalid CR bit"); + } +} + +void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg CR = gpr.CR(field); + + if (bit != PowerPC::CR_GT_BIT) + FixGTBeforeSettingCRFieldBit(CR); + + switch (bit) + { + case PowerPC::CR_SO_BIT: // set bit 59 to input + BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1); + break; + + case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input + AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); + EOR(in, in, LogicalImm(1, GPRSize::B64)); + ORR(CR, CR, in); + break; + + case PowerPC::CR_GT_BIT: // set bit 63 to !input + EOR(in, in, LogicalImm(1, GPRSize::B64)); + BFI(CR, in, 63, 1); + break; + + case PowerPC::CR_LT_BIT: // set bit 62 to input + BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1); + break; + } + + ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64)); +} + +void JitArm64::ClearCRFieldBit(int field, int bit) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg XA = gpr.CR(field); + + switch (bit) + { + case PowerPC::CR_SO_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: + FixGTBeforeSettingCRFieldBit(XA); + ORR(XA, XA, LogicalImm(1, GPRSize::B64)); + break; + + case PowerPC::CR_GT_BIT: + ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64)); + break; + + case PowerPC::CR_LT_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64)); + break; + } +} + +void JitArm64::SetCRFieldBit(int field, int bit) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg XA = gpr.CR(field); + + if (bit != PowerPC::CR_GT_BIT) + FixGTBeforeSettingCRFieldBit(XA); + + switch (bit) + { + case PowerPC::CR_SO_BIT: + ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: + AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); + break; + + case PowerPC::CR_GT_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64)); + break; + + case PowerPC::CR_LT_BIT: + ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); + break; + } + + ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64)); +} + +void JitArm64::FixGTBeforeSettingCRFieldBit(ARM64Reg reg) +{ + // GT is considered unset if the internal representation is <= 0, or in other words, + // if the internal representation either has bit 63 set or has all bits set to zero. + // If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT + // doesn't accidentally become considered set. Gross but necessary; this can break actual games. + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64)); + CMP(reg, ARM64Reg::ZR); + CSEL(reg, reg, XA, CC_NEQ); +} + FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) { ARM64Reg XA = gpr.CR(field); @@ -42,19 +178,6 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) } } -void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg) -{ - // GT is considered unset if the internal representation is <= 0, or in other words, - // if the internal representation either has bit 63 set or has all bits set to zero. - // If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT - // doesn't accidentally become considered set. Gross but necessary; this can break actual games. - auto WA = gpr.GetScopedReg(); - ARM64Reg XA = EncodeRegTo64(WA); - ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64)); - CMP(reg, ARM64Reg::ZR); - CSEL(reg, reg, XA, CC_NEQ); -} - void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr) { auto WA = gpr.GetScopedReg(); @@ -471,67 +594,14 @@ void JitArm64::crXXX(UGeckoInstruction inst) // Special case: crclr if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) { - // Clear CR field bit - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg XA = gpr.CR(field); - switch (bit) - { - case PowerPC::CR_SO_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: - FixGTBeforeSettingCRFieldBit(XA); - ORR(XA, XA, LogicalImm(1, GPRSize::B64)); - break; - - case PowerPC::CR_GT_BIT: - ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64)); - break; - - case PowerPC::CR_LT_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64)); - break; - } + ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); return; } // Special case: crset if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) { - // SetCRFieldBit - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg XA = gpr.CR(field); - - if (bit != PowerPC::CR_GT_BIT) - FixGTBeforeSettingCRFieldBit(XA); - - switch (bit) - { - case PowerPC::CR_SO_BIT: - ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: - AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - break; - - case PowerPC::CR_GT_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64)); - break; - - case PowerPC::CR_LT_BIT: - ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); - break; - } - - ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); return; } @@ -547,44 +617,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) bool negateB = inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - // GetCRFieldBit - for (int i = 0; i < 2; i++) - { - int field = i ? inst.CRBB >> 2 : inst.CRBA >> 2; - int bit = i ? 3 - (inst.CRBB & 3) : 3 - (inst.CRBA & 3); - ARM64Reg out = i ? XB : XA; - bool negate = i ? negateB : negateA; - - ARM64Reg XC = gpr.CR(field); - ARM64Reg WC = EncodeRegTo32(XC); - switch (bit) - { - case PowerPC::CR_SO_BIT: // check bit 59 set - UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 - CMP(WC, ARM64Reg::WZR); - CSET(out, negate ? CC_NEQ : CC_EQ); - break; - - case PowerPC::CR_GT_BIT: // check val > 0 - CMP(XC, ARM64Reg::ZR); - CSET(out, negate ? CC_LE : CC_GT); - break; - - case PowerPC::CR_LT_BIT: // check bit 62 set - UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); - break; - - default: - ASSERT_MSG(DYNA_REC, false, "Invalid CR bit"); - } - } + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, negateA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negateB); // Compute combined bit switch (inst.SUBOP10) @@ -609,38 +643,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) } // Store result bit in CRBD - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg CR = gpr.CR(field); - - if (bit != PowerPC::CR_GT_BIT) - FixGTBeforeSettingCRFieldBit(CR); - - switch (bit) - { - case PowerPC::CR_SO_BIT: // set bit 59 to input - BFI(CR, XA, PowerPC::CR_EMU_SO_BIT, 1); - break; - - case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input - AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - EOR(XA, XA, LogicalImm(1, GPRSize::B64)); - ORR(CR, CR, XA); - break; - - case PowerPC::CR_GT_BIT: // set bit 63 to !input - EOR(XA, XA, LogicalImm(1, GPRSize::B64)); - BFI(CR, XA, 63, 1); - break; - - case PowerPC::CR_LT_BIT: // set bit 62 to input - BFI(CR, XA, PowerPC::CR_EMU_LT_BIT, 1); - break; - } - - ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); } void JitArm64::mfcr(UGeckoInstruction inst) From 7fddd39d97d529bb8e2f551ea01392e3650b58ef Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 16:23:37 +0200 Subject: [PATCH 02/32] JitArm64: Port some crXXX optimizations from Jit64 --- .../JitArm64/JitArm64_SystemRegisters.cpp | 47 ++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 666103467c..7f9cf8fbd4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -591,18 +591,43 @@ void JitArm64::crXXX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); - // Special case: crclr - if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) + if (inst.CRBA == inst.CRBB) { - ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); - return; - } - - // Special case: crset - if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) - { - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); - return; + switch (inst.SUBOP10) + { + // crclr + case 129: // crandc: A && ~B => 0 + case 193: // crxor: A ^ B => 0 + { + ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); + return; + } + // crset + case 289: // creqv: ~(A ^ B) => 1 + case 417: // crorc: A || ~B => 1 + { + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); + return; + } + case 257: // crand: A && B => A + case 449: // cror: A || B => A + { + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + return; + } + case 33: // crnor: ~(A || B) => ~A + case 225: // crnand: ~(A && B) => ~A + { + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, true); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + return; + } + } } auto WA = gpr.GetScopedReg(); From 9246bcad556995a971a65151cdff7d5e9341493e Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 17:17:08 +0200 Subject: [PATCH 03/32] JitArm64: Add negate parameter to SetCRFieldBit Unlike on x64, inverting EQ or GT in SetCRFieldBit saves us one instruction. Also unlike on x64, inverting SO or LT in GetCRFieldBit requires an extra instruction (just like in SetCRFieldBit). Due to this, replacing an invert in GetCRFieldBit with an invert in SetCRFieldBit when possible is either equally good or better - never worse. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 41 +++++++++++-------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 6cfbdb6a47..07b3f95187 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -356,7 +356,7 @@ protected: void WriteBLRExit(Arm64Gen::ARM64Reg dest); void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); - void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in); + void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false); void ClearCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 7f9cf8fbd4..7be7740d61 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -54,7 +54,7 @@ void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) } } -void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) +void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in, bool negate) { gpr.BindCRToRegister(field, true); ARM64Reg CR = gpr.CR(field); @@ -66,21 +66,27 @@ void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) { case PowerPC::CR_SO_BIT: // set bit 59 to input BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1); + if (negate) + EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); break; case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - EOR(in, in, LogicalImm(1, GPRSize::B64)); ORR(CR, CR, in); + if (!negate) + EOR(CR, CR, LogicalImm(1ULL << 0, GPRSize::B64)); break; case PowerPC::CR_GT_BIT: // set bit 63 to !input - EOR(in, in, LogicalImm(1, GPRSize::B64)); BFI(CR, in, 63, 1); + if (!negate) + EOR(CR, CR, LogicalImm(1ULL << 63, GPRSize::B64)); break; case PowerPC::CR_LT_BIT: // set bit 62 to input BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1); + if (negate) + EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); break; } @@ -615,7 +621,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false); return; } case 33: // crnor: ~(A || B) => ~A @@ -623,43 +629,42 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, true); - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true); return; } } } + // crandc or crorc + const bool negate_b = inst.SUBOP10 == 129 || inst.SUBOP10 == 417; + // crnor or crnand or creqv + const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225 || inst.SUBOP10 == 289; + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); { auto WB = gpr.GetScopedReg(); ARM64Reg XB = EncodeRegTo64(WB); - // creqv or crnand or crnor - bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - // crandc or crorc or crnand or crnor - bool negateB = - inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, negateA); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negateB); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negate_b); // Compute combined bit switch (inst.SUBOP10) { - case 33: // crnor: ~(A || B) == (~A && ~B) case 129: // crandc: A && ~B + case 225: // crnand: ~(A && B) case 257: // crand: A && B AND(XA, XA, XB); break; case 193: // crxor: A ^ B - case 289: // creqv: ~(A ^ B) = ~A ^ B + case 289: // creqv: ~(A ^ B) EOR(XA, XA, XB); break; - case 225: // crnand: ~(A && B) == (~A || ~B) + case 33: // crnor: ~(A || B) case 417: // crorc: A || ~B case 449: // cror: A || B ORR(XA, XA, XB); @@ -668,7 +673,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) } // Store result bit in CRBD - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, negate_result); } void JitArm64::mfcr(UGeckoInstruction inst) From 71e97665192ef45487262fcdd4df2286208e61cd Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 17:35:42 +0200 Subject: [PATCH 04/32] JitArm64: Use BIC/EON/ORN in crXXX This lets us save an instruction in certain scenarios. --- .../JitArm64/JitArm64_SystemRegisters.cpp | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 7be7740d61..91a048a353 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -636,10 +636,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) } } - // crandc or crorc - const bool negate_b = inst.SUBOP10 == 129 || inst.SUBOP10 == 417; - // crnor or crnand or creqv - const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225 || inst.SUBOP10 == 289; + // crnor or crnand + const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225; auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -648,27 +646,36 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg XB = EncodeRegTo64(WB); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negate_b); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, false); // Compute combined bit switch (inst.SUBOP10) { - case 129: // crandc: A && ~B case 225: // crnand: ~(A && B) case 257: // crand: A && B AND(XA, XA, XB); break; + case 129: // crandc: A && ~B + BIC(XA, XA, XB); + break; + case 193: // crxor: A ^ B - case 289: // creqv: ~(A ^ B) EOR(XA, XA, XB); break; + case 289: // creqv: ~(A ^ B) = A ^ ~B + EON(XA, XA, XB); + break; + case 33: // crnor: ~(A || B) - case 417: // crorc: A || ~B case 449: // cror: A || B ORR(XA, XA, XB); break; + + case 417: // crorc: A || ~B + ORN(XA, XA, XB); + break; } } From 980a7263131b18f851d83a6ac72b4abe3bed831b Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 2 Nov 2024 10:33:04 +0100 Subject: [PATCH 05/32] JitArm64: Drop GetCRFieldBit's negate parameter No caller is using it anymore. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 07b3f95187..3f9c471e11 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -355,7 +355,7 @@ protected: Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG); void WriteBLRExit(Arm64Gen::ARM64Reg dest); - void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); + void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out); void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false); void ClearCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 91a048a353..a0084953b9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -20,7 +20,7 @@ using namespace Arm64Gen; -void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) +void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out) { ARM64Reg CR = gpr.CR(field); ARM64Reg WCR = EncodeRegTo32(CR); @@ -29,24 +29,20 @@ void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) { case PowerPC::CR_SO_BIT: // check bit 59 set UBFX(out, CR, PowerPC::CR_EMU_SO_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); break; case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 CMP(WCR, ARM64Reg::WZR); - CSET(out, negate ? CC_NEQ : CC_EQ); + CSET(out, CC_EQ); break; case PowerPC::CR_GT_BIT: // check val > 0 CMP(CR, ARM64Reg::ZR); - CSET(out, negate ? CC_LE : CC_GT); + CSET(out, CC_GT); break; case PowerPC::CR_LT_BIT: // check bit 62 set UBFX(out, CR, PowerPC::CR_EMU_LT_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); break; default: @@ -620,7 +616,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false); return; } @@ -629,7 +625,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true); return; } @@ -645,8 +641,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) auto WB = gpr.GetScopedReg(); ARM64Reg XB = EncodeRegTo64(WB); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB); // Compute combined bit switch (inst.SUBOP10) From 120a28e9d486a4fc42dbe9527847f055f2dcc049 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 08:57:35 +0200 Subject: [PATCH 06/32] JitArm64: Refactor CR bit manipulation code This brings JitArm64 more in line with Jit64, and makes the next commit easier to implement. No functional change. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 7 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 279 +++++++++--------- 2 files changed, 147 insertions(+), 139 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 0c1ea0d647..6cfbdb6a47 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -355,8 +355,13 @@ protected: Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG); void WriteBLRExit(Arm64Gen::ARM64Reg dest); - Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); + void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); + void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in); + void ClearCRFieldBit(int field, int bit); + void SetCRFieldBit(int field, int bit); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); + Arm64Gen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); + void UpdateFPExceptionSummary(Arm64Gen::ARM64Reg fpscr); void UpdateRoundingMode(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index df4ea4931a..666103467c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -20,6 +20,142 @@ using namespace Arm64Gen; +void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) +{ + ARM64Reg CR = gpr.CR(field); + ARM64Reg WCR = EncodeRegTo32(CR); + + switch (bit) + { + case PowerPC::CR_SO_BIT: // check bit 59 set + UBFX(out, CR, PowerPC::CR_EMU_SO_BIT, 1); + if (negate) + EOR(out, out, LogicalImm(1, GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 + CMP(WCR, ARM64Reg::WZR); + CSET(out, negate ? CC_NEQ : CC_EQ); + break; + + case PowerPC::CR_GT_BIT: // check val > 0 + CMP(CR, ARM64Reg::ZR); + CSET(out, negate ? CC_LE : CC_GT); + break; + + case PowerPC::CR_LT_BIT: // check bit 62 set + UBFX(out, CR, PowerPC::CR_EMU_LT_BIT, 1); + if (negate) + EOR(out, out, LogicalImm(1, GPRSize::B64)); + break; + + default: + ASSERT_MSG(DYNA_REC, false, "Invalid CR bit"); + } +} + +void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg CR = gpr.CR(field); + + if (bit != PowerPC::CR_GT_BIT) + FixGTBeforeSettingCRFieldBit(CR); + + switch (bit) + { + case PowerPC::CR_SO_BIT: // set bit 59 to input + BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1); + break; + + case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input + AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); + EOR(in, in, LogicalImm(1, GPRSize::B64)); + ORR(CR, CR, in); + break; + + case PowerPC::CR_GT_BIT: // set bit 63 to !input + EOR(in, in, LogicalImm(1, GPRSize::B64)); + BFI(CR, in, 63, 1); + break; + + case PowerPC::CR_LT_BIT: // set bit 62 to input + BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1); + break; + } + + ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64)); +} + +void JitArm64::ClearCRFieldBit(int field, int bit) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg XA = gpr.CR(field); + + switch (bit) + { + case PowerPC::CR_SO_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: + FixGTBeforeSettingCRFieldBit(XA); + ORR(XA, XA, LogicalImm(1, GPRSize::B64)); + break; + + case PowerPC::CR_GT_BIT: + ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64)); + break; + + case PowerPC::CR_LT_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64)); + break; + } +} + +void JitArm64::SetCRFieldBit(int field, int bit) +{ + gpr.BindCRToRegister(field, true); + ARM64Reg XA = gpr.CR(field); + + if (bit != PowerPC::CR_GT_BIT) + FixGTBeforeSettingCRFieldBit(XA); + + switch (bit) + { + case PowerPC::CR_SO_BIT: + ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); + break; + + case PowerPC::CR_EQ_BIT: + AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); + break; + + case PowerPC::CR_GT_BIT: + AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64)); + break; + + case PowerPC::CR_LT_BIT: + ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); + break; + } + + ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64)); +} + +void JitArm64::FixGTBeforeSettingCRFieldBit(ARM64Reg reg) +{ + // GT is considered unset if the internal representation is <= 0, or in other words, + // if the internal representation either has bit 63 set or has all bits set to zero. + // If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT + // doesn't accidentally become considered set. Gross but necessary; this can break actual games. + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64)); + CMP(reg, ARM64Reg::ZR); + CSEL(reg, reg, XA, CC_NEQ); +} + FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) { ARM64Reg XA = gpr.CR(field); @@ -42,19 +178,6 @@ FixupBranch JitArm64::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) } } -void JitArm64::FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg) -{ - // GT is considered unset if the internal representation is <= 0, or in other words, - // if the internal representation either has bit 63 set or has all bits set to zero. - // If all bits are zero and we set some bit that's unrelated to GT, we need to set bit 63 so GT - // doesn't accidentally become considered set. Gross but necessary; this can break actual games. - auto WA = gpr.GetScopedReg(); - ARM64Reg XA = EncodeRegTo64(WA); - ORR(XA, reg, LogicalImm(1ULL << 63, GPRSize::B64)); - CMP(reg, ARM64Reg::ZR); - CSEL(reg, reg, XA, CC_NEQ); -} - void JitArm64::UpdateFPExceptionSummary(ARM64Reg fpscr) { auto WA = gpr.GetScopedReg(); @@ -471,67 +594,14 @@ void JitArm64::crXXX(UGeckoInstruction inst) // Special case: crclr if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) { - // Clear CR field bit - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg XA = gpr.CR(field); - switch (bit) - { - case PowerPC::CR_SO_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_SO_BIT), GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: - FixGTBeforeSettingCRFieldBit(XA); - ORR(XA, XA, LogicalImm(1, GPRSize::B64)); - break; - - case PowerPC::CR_GT_BIT: - ORR(XA, XA, LogicalImm(u64(1) << 63, GPRSize::B64)); - break; - - case PowerPC::CR_LT_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << PowerPC::CR_EMU_LT_BIT), GPRSize::B64)); - break; - } + ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); return; } // Special case: crset if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) { - // SetCRFieldBit - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg XA = gpr.CR(field); - - if (bit != PowerPC::CR_GT_BIT) - FixGTBeforeSettingCRFieldBit(XA); - - switch (bit) - { - case PowerPC::CR_SO_BIT: - ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: - AND(XA, XA, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - break; - - case PowerPC::CR_GT_BIT: - AND(XA, XA, LogicalImm(~(u64(1) << 63), GPRSize::B64)); - break; - - case PowerPC::CR_LT_BIT: - ORR(XA, XA, LogicalImm(u64(1) << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); - break; - } - - ORR(XA, XA, LogicalImm(u64(1) << 32, GPRSize::B64)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); return; } @@ -547,44 +617,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) bool negateB = inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - // GetCRFieldBit - for (int i = 0; i < 2; i++) - { - int field = i ? inst.CRBB >> 2 : inst.CRBA >> 2; - int bit = i ? 3 - (inst.CRBB & 3) : 3 - (inst.CRBA & 3); - ARM64Reg out = i ? XB : XA; - bool negate = i ? negateB : negateA; - - ARM64Reg XC = gpr.CR(field); - ARM64Reg WC = EncodeRegTo32(XC); - switch (bit) - { - case PowerPC::CR_SO_BIT: // check bit 59 set - UBFX(out, XC, PowerPC::CR_EMU_SO_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); - break; - - case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 - CMP(WC, ARM64Reg::WZR); - CSET(out, negate ? CC_NEQ : CC_EQ); - break; - - case PowerPC::CR_GT_BIT: // check val > 0 - CMP(XC, ARM64Reg::ZR); - CSET(out, negate ? CC_LE : CC_GT); - break; - - case PowerPC::CR_LT_BIT: // check bit 62 set - UBFX(out, XC, PowerPC::CR_EMU_LT_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); - break; - - default: - ASSERT_MSG(DYNA_REC, false, "Invalid CR bit"); - } - } + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, negateA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negateB); // Compute combined bit switch (inst.SUBOP10) @@ -609,38 +643,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) } // Store result bit in CRBD - int field = inst.CRBD >> 2; - int bit = 3 - (inst.CRBD & 3); - - gpr.BindCRToRegister(field, true); - ARM64Reg CR = gpr.CR(field); - - if (bit != PowerPC::CR_GT_BIT) - FixGTBeforeSettingCRFieldBit(CR); - - switch (bit) - { - case PowerPC::CR_SO_BIT: // set bit 59 to input - BFI(CR, XA, PowerPC::CR_EMU_SO_BIT, 1); - break; - - case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input - AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - EOR(XA, XA, LogicalImm(1, GPRSize::B64)); - ORR(CR, CR, XA); - break; - - case PowerPC::CR_GT_BIT: // set bit 63 to !input - EOR(XA, XA, LogicalImm(1, GPRSize::B64)); - BFI(CR, XA, 63, 1); - break; - - case PowerPC::CR_LT_BIT: // set bit 62 to input - BFI(CR, XA, PowerPC::CR_EMU_LT_BIT, 1); - break; - } - - ORR(CR, CR, LogicalImm(1ULL << 32, GPRSize::B64)); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); } void JitArm64::mfcr(UGeckoInstruction inst) From 77a57393337c64e171631af8208a5f8c49f09b94 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 16:23:37 +0200 Subject: [PATCH 07/32] JitArm64: Port some crXXX optimizations from Jit64 --- .../JitArm64/JitArm64_SystemRegisters.cpp | 47 ++++++++++++++----- 1 file changed, 36 insertions(+), 11 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 666103467c..7f9cf8fbd4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -591,18 +591,43 @@ void JitArm64::crXXX(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); - // Special case: crclr - if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 193) + if (inst.CRBA == inst.CRBB) { - ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); - return; - } - - // Special case: crset - if (inst.CRBA == inst.CRBB && inst.CRBA == inst.CRBD && inst.SUBOP10 == 289) - { - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); - return; + switch (inst.SUBOP10) + { + // crclr + case 129: // crandc: A && ~B => 0 + case 193: // crxor: A ^ B => 0 + { + ClearCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); + return; + } + // crset + case 289: // creqv: ~(A ^ B) => 1 + case 417: // crorc: A || ~B => 1 + { + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3)); + return; + } + case 257: // crand: A && B => A + case 449: // cror: A || B => A + { + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + return; + } + case 33: // crnor: ~(A || B) => ~A + case 225: // crnand: ~(A && B) => ~A + { + auto WA = gpr.GetScopedReg(); + ARM64Reg XA = EncodeRegTo64(WA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, true); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + return; + } + } } auto WA = gpr.GetScopedReg(); From 123aa3b9ec5ac8e22b0d205a8f85237ff7f2f62c Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 17:17:08 +0200 Subject: [PATCH 08/32] JitArm64: Add negate parameter to SetCRFieldBit Unlike on x64, inverting EQ or GT in SetCRFieldBit saves us one instruction. Also unlike on x64, inverting SO or LT in GetCRFieldBit requires an extra instruction (just like in SetCRFieldBit). Due to this, replacing an invert in GetCRFieldBit with an invert in SetCRFieldBit when possible is either equally good or better - never worse. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 41 +++++++++++-------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 6cfbdb6a47..07b3f95187 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -356,7 +356,7 @@ protected: void WriteBLRExit(Arm64Gen::ARM64Reg dest); void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); - void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in); + void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false); void ClearCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit); void FixGTBeforeSettingCRFieldBit(Arm64Gen::ARM64Reg reg); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 7f9cf8fbd4..7be7740d61 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -54,7 +54,7 @@ void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) } } -void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) +void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in, bool negate) { gpr.BindCRToRegister(field, true); ARM64Reg CR = gpr.CR(field); @@ -66,21 +66,27 @@ void JitArm64::SetCRFieldBit(int field, int bit, ARM64Reg in) { case PowerPC::CR_SO_BIT: // set bit 59 to input BFI(CR, in, PowerPC::CR_EMU_SO_BIT, 1); + if (negate) + EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_SO_BIT, GPRSize::B64)); break; case PowerPC::CR_EQ_BIT: // clear low 32 bits, set bit 0 to !input AND(CR, CR, LogicalImm(0xFFFF'FFFF'0000'0000, GPRSize::B64)); - EOR(in, in, LogicalImm(1, GPRSize::B64)); ORR(CR, CR, in); + if (!negate) + EOR(CR, CR, LogicalImm(1ULL << 0, GPRSize::B64)); break; case PowerPC::CR_GT_BIT: // set bit 63 to !input - EOR(in, in, LogicalImm(1, GPRSize::B64)); BFI(CR, in, 63, 1); + if (!negate) + EOR(CR, CR, LogicalImm(1ULL << 63, GPRSize::B64)); break; case PowerPC::CR_LT_BIT: // set bit 62 to input BFI(CR, in, PowerPC::CR_EMU_LT_BIT, 1); + if (negate) + EOR(CR, CR, LogicalImm(1ULL << PowerPC::CR_EMU_LT_BIT, GPRSize::B64)); break; } @@ -615,7 +621,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false); return; } case 33: // crnor: ~(A || B) => ~A @@ -623,43 +629,42 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, true); - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true); return; } } } + // crandc or crorc + const bool negate_b = inst.SUBOP10 == 129 || inst.SUBOP10 == 417; + // crnor or crnand or creqv + const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225 || inst.SUBOP10 == 289; + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); { auto WB = gpr.GetScopedReg(); ARM64Reg XB = EncodeRegTo64(WB); - // creqv or crnand or crnor - bool negateA = inst.SUBOP10 == 289 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - // crandc or crorc or crnand or crnor - bool negateB = - inst.SUBOP10 == 129 || inst.SUBOP10 == 417 || inst.SUBOP10 == 225 || inst.SUBOP10 == 33; - - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, negateA); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negateB); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negate_b); // Compute combined bit switch (inst.SUBOP10) { - case 33: // crnor: ~(A || B) == (~A && ~B) case 129: // crandc: A && ~B + case 225: // crnand: ~(A && B) case 257: // crand: A && B AND(XA, XA, XB); break; case 193: // crxor: A ^ B - case 289: // creqv: ~(A ^ B) = ~A ^ B + case 289: // creqv: ~(A ^ B) EOR(XA, XA, XB); break; - case 225: // crnand: ~(A && B) == (~A || ~B) + case 33: // crnor: ~(A || B) case 417: // crorc: A || ~B case 449: // cror: A || B ORR(XA, XA, XB); @@ -668,7 +673,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) } // Store result bit in CRBD - SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA); + SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, negate_result); } void JitArm64::mfcr(UGeckoInstruction inst) From 6ecd292db4621fec1f3cecbfa397c70a159d2952 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 25 May 2024 17:35:42 +0200 Subject: [PATCH 09/32] JitArm64: Use BIC/EON/ORN in crXXX This lets us save an instruction in certain scenarios. --- .../JitArm64/JitArm64_SystemRegisters.cpp | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 7be7740d61..91a048a353 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -636,10 +636,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) } } - // crandc or crorc - const bool negate_b = inst.SUBOP10 == 129 || inst.SUBOP10 == 417; - // crnor or crnand or creqv - const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225 || inst.SUBOP10 == 289; + // crnor or crnand + const bool negate_result = inst.SUBOP10 == 33 || inst.SUBOP10 == 225; auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -648,27 +646,36 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg XB = EncodeRegTo64(WB); GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, negate_b); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, false); // Compute combined bit switch (inst.SUBOP10) { - case 129: // crandc: A && ~B case 225: // crnand: ~(A && B) case 257: // crand: A && B AND(XA, XA, XB); break; + case 129: // crandc: A && ~B + BIC(XA, XA, XB); + break; + case 193: // crxor: A ^ B - case 289: // creqv: ~(A ^ B) EOR(XA, XA, XB); break; + case 289: // creqv: ~(A ^ B) = A ^ ~B + EON(XA, XA, XB); + break; + case 33: // crnor: ~(A || B) - case 417: // crorc: A || ~B case 449: // cror: A || B ORR(XA, XA, XB); break; + + case 417: // crorc: A || ~B + ORN(XA, XA, XB); + break; } } From 30bcb2b8ddb13ca2c0ce4ec4dd5a3554b3dd9c89 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 2 Nov 2024 10:33:04 +0100 Subject: [PATCH 10/32] JitArm64: Drop GetCRFieldBit's negate parameter No caller is using it anymore. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 +- .../JitArm64/JitArm64_SystemRegisters.cpp | 18 +++++++----------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 07b3f95187..3f9c471e11 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -355,7 +355,7 @@ protected: Arm64Gen::ARM64Reg exit_address_after_return_reg = Arm64Gen::ARM64Reg::INVALID_REG); void WriteBLRExit(Arm64Gen::ARM64Reg dest); - void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out, bool negate = false); + void GetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg out); void SetCRFieldBit(int field, int bit, Arm64Gen::ARM64Reg in, bool negate = false); void ClearCRFieldBit(int field, int bit); void SetCRFieldBit(int field, int bit); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 91a048a353..a0084953b9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -20,7 +20,7 @@ using namespace Arm64Gen; -void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) +void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out) { ARM64Reg CR = gpr.CR(field); ARM64Reg WCR = EncodeRegTo32(CR); @@ -29,24 +29,20 @@ void JitArm64::GetCRFieldBit(int field, int bit, ARM64Reg out, bool negate) { case PowerPC::CR_SO_BIT: // check bit 59 set UBFX(out, CR, PowerPC::CR_EMU_SO_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); break; case PowerPC::CR_EQ_BIT: // check bits 31-0 == 0 CMP(WCR, ARM64Reg::WZR); - CSET(out, negate ? CC_NEQ : CC_EQ); + CSET(out, CC_EQ); break; case PowerPC::CR_GT_BIT: // check val > 0 CMP(CR, ARM64Reg::ZR); - CSET(out, negate ? CC_LE : CC_GT); + CSET(out, CC_GT); break; case PowerPC::CR_LT_BIT: // check bit 62 set UBFX(out, CR, PowerPC::CR_EMU_LT_BIT, 1); - if (negate) - EOR(out, out, LogicalImm(1, GPRSize::B64)); break; default: @@ -620,7 +616,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, false); return; } @@ -629,7 +625,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) { auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), XA, true); return; } @@ -645,8 +641,8 @@ void JitArm64::crXXX(UGeckoInstruction inst) auto WB = gpr.GetScopedReg(); ARM64Reg XB = EncodeRegTo64(WB); - GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA, false); - GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB, false); + GetCRFieldBit(inst.CRBA >> 2, 3 - (inst.CRBA & 3), XA); + GetCRFieldBit(inst.CRBB >> 2, 3 - (inst.CRBB & 3), XB); // Compute combined bit switch (inst.SUBOP10) From 3eaa3f546beac4b7d8aeb733ea27698d0df78d8e Mon Sep 17 00:00:00 2001 From: Tabitha Date: Fri, 3 Jan 2025 21:51:43 -0500 Subject: [PATCH 11/32] Update RM8E01.ini --- Data/Sys/GameSettings/RM8E01.ini | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Data/Sys/GameSettings/RM8E01.ini b/Data/Sys/GameSettings/RM8E01.ini index b551dcf286..536cf20891 100644 --- a/Data/Sys/GameSettings/RM8E01.ini +++ b/Data/Sys/GameSettings/RM8E01.ini @@ -209,6 +209,9 @@ $QOL - Increased Text Display [Tabitha] $QOL - Invert IR Stick for GameCube Mod [gamemasterplc] 04106A24 83DE6404 04106A28 60000000 +04106AA4 C0428004 +04106A74 C04282F0 + $QOL - Remove Explanations [Rain] 82200001 80228760 From d048e6a373e7c7658e92994f095e0207a4d0fcd4 Mon Sep 17 00:00:00 2001 From: TryTwo Date: Mon, 30 Dec 2024 12:38:33 -0700 Subject: [PATCH 12/32] Frame Dumping: Change lossless codec from FFV1 to Ut Video. Ut Video is faster and more compatible with editing programs, but produces larger files. --- Source/Core/Core/Config/GraphicsSettings.cpp | 2 +- Source/Core/Core/Config/GraphicsSettings.h | 2 +- .../Config/Graphics/AdvancedWidget.cpp | 20 ++++++++++--------- .../Config/Graphics/AdvancedWidget.h | 2 +- Source/Core/VideoCommon/FrameDumpFFMpeg.cpp | 2 +- Source/Core/VideoCommon/VideoConfig.cpp | 2 +- Source/Core/VideoCommon/VideoConfig.h | 2 +- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index 663e74e412..61394319e2 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -64,7 +64,7 @@ const Info GFX_CACHE_HIRES_TEXTURES{{System::GFX, "Settings", "CacheHiresT const Info GFX_DUMP_EFB_TARGET{{System::GFX, "Settings", "DumpEFBTarget"}, false}; const Info GFX_DUMP_XFB_TARGET{{System::GFX, "Settings", "DumpXFBTarget"}, false}; const Info GFX_DUMP_FRAMES_AS_IMAGES{{System::GFX, "Settings", "DumpFramesAsImages"}, false}; -const Info GFX_USE_FFV1{{System::GFX, "Settings", "UseFFV1"}, false}; +const Info GFX_USE_LOSSLESS{{System::GFX, "Settings", "UseLossless"}, false}; const Info GFX_DUMP_FORMAT{{System::GFX, "Settings", "DumpFormat"}, "avi"}; const Info GFX_DUMP_CODEC{{System::GFX, "Settings", "DumpCodec"}, ""}; const Info GFX_DUMP_PIXEL_FORMAT{{System::GFX, "Settings", "DumpPixelFormat"}, ""}; diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 58c8be7588..126f970152 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -63,7 +63,7 @@ extern const Info GFX_CACHE_HIRES_TEXTURES; extern const Info GFX_DUMP_EFB_TARGET; extern const Info GFX_DUMP_XFB_TARGET; extern const Info GFX_DUMP_FRAMES_AS_IMAGES; -extern const Info GFX_USE_FFV1; +extern const Info GFX_USE_LOSSLESS; extern const Info GFX_DUMP_FORMAT; extern const Info GFX_DUMP_CODEC; extern const Info GFX_DUMP_PIXEL_FORMAT; diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp index c64c8fca40..5471a1bcb5 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.cpp @@ -185,11 +185,13 @@ void AdvancedWidget::CreateWidgets() dump_layout->addWidget(m_frame_dumps_resolution_type, 0, 1); #if defined(HAVE_FFMPEG) - m_dump_use_ffv1 = - new ConfigBool(tr("Use Lossless Codec (FFV1)"), Config::GFX_USE_FFV1, m_game_layer); + m_dump_use_lossless = + new ConfigBool(tr("Use Lossless Codec (Ut Video)"), Config::GFX_USE_LOSSLESS, m_game_layer); + m_dump_bitrate = new ConfigInteger(0, 1000000, Config::GFX_BITRATE_KBPS, m_game_layer, 1000); - m_dump_bitrate->setEnabled(!m_dump_use_ffv1->isChecked()); - dump_layout->addWidget(m_dump_use_ffv1, 1, 0); + m_dump_bitrate->setEnabled(!m_dump_use_lossless->isChecked()); + + dump_layout->addWidget(m_dump_use_lossless, 1, 0); dump_layout->addWidget(new QLabel(tr("Bitrate (kbps):")), 2, 0); dump_layout->addWidget(m_dump_bitrate, 2, 1); #endif @@ -261,9 +263,8 @@ void AdvancedWidget::ConnectWidgets() }); connect(m_enable_graphics_mods, &QCheckBox::toggled, this, [this](bool checked) { emit Settings::Instance().EnableGfxModsChanged(checked); }); - #if defined(HAVE_FFMPEG) - connect(m_dump_use_ffv1, &QCheckBox::toggled, this, + connect(m_dump_use_lossless, &QCheckBox::toggled, this, [this](bool checked) { m_dump_bitrate->setEnabled(!checked); }); #endif } @@ -391,8 +392,9 @@ void AdvancedWidget::AddDescriptions() "possible input for external editing software.

If unsure, leave " "this at \"Aspect Ratio Corrected Internal Resolution\"."); #if defined(HAVE_FFMPEG) - static const char TR_USE_FFV1_DESCRIPTION[] = - QT_TR_NOOP("Encodes frame dumps using the FFV1 codec.

If " + static const char TR_USE_LOSSLESS_DESCRIPTION[] = + QT_TR_NOOP("Encodes frame dumps using the Ut Video codec. If this option is unchecked, a " + "lossy Xvid codec will be used.

If " "unsure, leave this unchecked."); #endif static const char TR_PNG_COMPRESSION_LEVEL_DESCRIPTION[] = @@ -483,7 +485,7 @@ void AdvancedWidget::AddDescriptions() m_enable_graphics_mods->SetDescription(tr(TR_LOAD_GRAPHICS_MODS_DESCRIPTION)); m_frame_dumps_resolution_type->SetDescription(tr(TR_FRAME_DUMPS_RESOLUTION_TYPE_DESCRIPTION)); #ifdef HAVE_FFMPEG - m_dump_use_ffv1->SetDescription(tr(TR_USE_FFV1_DESCRIPTION)); + m_dump_use_lossless->SetDescription(tr(TR_USE_LOSSLESS_DESCRIPTION)); #endif m_png_compression_level->SetDescription(tr(TR_PNG_COMPRESSION_LEVEL_DESCRIPTION)); m_enable_cropping->SetDescription(tr(TR_CROPPING_DESCRIPTION)); diff --git a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h index e3b3688571..d6cd8e975e 100644 --- a/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h +++ b/Source/Core/DolphinQt/Config/Graphics/AdvancedWidget.h @@ -60,7 +60,7 @@ private: ConfigBool* m_dump_base_textures; // Frame dumping - ConfigBool* m_dump_use_ffv1; + ConfigBool* m_dump_use_lossless; ConfigChoice* m_frame_dumps_resolution_type; ConfigInteger* m_dump_bitrate; ConfigInteger* m_png_compression_level; diff --git a/Source/Core/VideoCommon/FrameDumpFFMpeg.cpp b/Source/Core/VideoCommon/FrameDumpFFMpeg.cpp index 760ae6f7e0..d698af8cd5 100644 --- a/Source/Core/VideoCommon/FrameDumpFFMpeg.cpp +++ b/Source/Core/VideoCommon/FrameDumpFFMpeg.cpp @@ -217,7 +217,7 @@ bool FFMpegFrameDump::CreateVideoFile() return false; } - const std::string& codec_name = g_Config.bUseFFV1 ? "ffv1" : g_Config.sDumpCodec; + const std::string& codec_name = g_Config.bUseLossless ? "utvideo" : g_Config.sDumpCodec; AVCodecID codec_id = output_format->video_codec; diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index 7287610d6c..95974cd798 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -123,7 +123,7 @@ void VideoConfig::Refresh() bDumpEFBTarget = Config::Get(Config::GFX_DUMP_EFB_TARGET); bDumpXFBTarget = Config::Get(Config::GFX_DUMP_XFB_TARGET); bDumpFramesAsImages = Config::Get(Config::GFX_DUMP_FRAMES_AS_IMAGES); - bUseFFV1 = Config::Get(Config::GFX_USE_FFV1); + bUseLossless = Config::Get(Config::GFX_USE_LOSSLESS); sDumpFormat = Config::Get(Config::GFX_DUMP_FORMAT); sDumpCodec = Config::Get(Config::GFX_DUMP_CODEC); sDumpPixelFormat = Config::Get(Config::GFX_DUMP_PIXEL_FORMAT); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index 915ec687ec..ec9bcb586c 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -194,7 +194,7 @@ struct VideoConfig final bool bDumpEFBTarget = false; bool bDumpXFBTarget = false; bool bDumpFramesAsImages = false; - bool bUseFFV1 = false; + bool bUseLossless = false; std::string sDumpCodec; std::string sDumpPixelFormat; std::string sDumpEncoder; From 485a84e047437e5ef6d12f8a8cb5b4e4de698569 Mon Sep 17 00:00:00 2001 From: Tabitha Date: Sun, 5 Jan 2025 08:35:55 -0500 Subject: [PATCH 13/32] fix shop mod --- Data/Sys/GameSettings/GP5E01.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Data/Sys/GameSettings/GP5E01.ini b/Data/Sys/GameSettings/GP5E01.ini index 58f4d298e2..5dfebec818 100644 --- a/Data/Sys/GameSettings/GP5E01.ini +++ b/Data/Sys/GameSettings/GP5E01.ini @@ -239,9 +239,9 @@ C20C5B48 00000001 3A000000 00000000 C20C2708 00000001 38600000 00000000 -C20A9E08 00000004 +C20A9E08 00000005 3C60817F 6063FFFC -80830000 2C040001 +A0830000 2C040001 41820008 1FDE0003 3C800000 90830000 60000000 00000000 From 4d0e93de740d314587112629e4f4220c44266bff Mon Sep 17 00:00:00 2001 From: Tabitha Date: Sun, 5 Jan 2025 16:31:29 -0500 Subject: [PATCH 14/32] Update GP6E01.ini --- Data/Sys/GameSettings/GP6E01.ini | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Data/Sys/GameSettings/GP6E01.ini b/Data/Sys/GameSettings/GP6E01.ini index b1dd51bb44..ea530ad744 100644 --- a/Data/Sys/GameSettings/GP6E01.ini +++ b/Data/Sys/GameSettings/GP6E01.ini @@ -1462,25 +1462,28 @@ $Minigame Deletion - Cash Flow [Tabitha] 002305A4 000000FF 00230CD4 000000FF -$Minigame Deletion - Cashapult ➜ Jump The Gun [Tabitha] +$Minigame Deletion - Cashapult [Tabitha] 00231316 000000FF -$Minigame Deletion - Money Belts ➜ Throw Me a Bone [Tabitha] +$Minigame Deletion - Money Belts [Tabitha] 00230966 000000FF -$Minigame Deletion - Pitifall ➜ Cog Jog [Tabitha] +$Minigame Deletion - Pitifall [Tabitha] 00231962 000000FF -$Minigame Deletion - Trap Ease Artist ➜ What Goes Up... [Tabitha] +$Minigame Deletion - Pop Star [Tabitha] +00230CCA 000000FF + +$Minigame Deletion - Trap Ease Artist [Tabitha] 0023086E 000000FF -$Minigame Deletion - Trick or Tree ➜ Boo'd Off the Stage [Tabitha] +$Minigame Deletion - Trick or Tree [Tabitha] 00231CC6 000000FF -$Minigame Deletion - Tricky Tires ➜ Cannonball Fun [Tabitha] +$Minigame Deletion - Tricky Tires [Tabitha] 00230396 000000FF -$Minigame Deletion - Wrasslin' Rapids ➜ Hyper Sniper [Tabitha] +$Minigame Deletion - Wrasslin' Rapids [Tabitha] 00231DBE 000000FF $Orbs - Disable ALL Orbs [gamemasterplc] From ec4316c265f2d89ba03738b7024f2ee83cfd94e0 Mon Sep 17 00:00:00 2001 From: Tabitha Date: Sun, 5 Jan 2025 16:33:07 -0500 Subject: [PATCH 15/32] Update GP7E01.ini --- Data/Sys/GameSettings/GP7E01.ini | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Data/Sys/GameSettings/GP7E01.ini b/Data/Sys/GameSettings/GP7E01.ini index d856d627aa..8b466e2825 100644 --- a/Data/Sys/GameSettings/GP7E01.ini +++ b/Data/Sys/GameSettings/GP7E01.ini @@ -1459,28 +1459,28 @@ $Minigame - Vine Country - DK is Faster [gamemasterplc] 044E5EE4 3803FFFE E2000001 80008000 -$Minigame Deletion - Bridge Work ➜ Mad Props +$Minigame Deletion - Bridge Works [Tabitha] 0025B722 000000FF -$Minigame Deletion - Cointagious ➜ Take Me Ohm +$Minigame Deletion - Cointagious [Tabitha] 00259FE2 000000FF -$Minigame Deletion - Fish & Cheeps ➜ Weight for It +$Minigame Deletion - Fish & Cheeps [Tabitha] 0025BB02 000000FF -$Minigame Deletion - Flashfright ➜ Think Tank +$Minigame Deletion - Flashfright [Tabitha] 0025AA0E 000000FF -$Minigame Deletion - Gimme a Sign ➜ Royal Rumpus +$Minigame Deletion - Gimme a Sign [Tabitha] 0025B6A6 000000FF -$Minigame Deletion - Light Speed ➜ Camp Ukiki +$Minigame Deletion - Light Speed [Tabitha] 0025BA0A 000000FF -$Minigame Deletion - Spin Doctor ➜ Hip Hop Drop +$Minigame Deletion - Spin Doctor [Tabitha] 0025B79E 000000FF -$Minigame Deletion - Warp Pipe Dreams ➜ Apes of Wrath +$Minigame Deletion - Warp Pipe Dreams [Tabitha] 0025B3BE 000000FF $Music - Disables Music [Ralf] From 0355d4fd468a384bffe17b6842895b04e330ff5a Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Wed, 4 Dec 2024 22:53:39 -0500 Subject: [PATCH 16/32] DolphinQt: Use TARGET_BUNDLE_DIR generator expression instead of specifying the bundle path ourselves --- Source/Core/DolphinQt/CMakeLists.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Core/DolphinQt/CMakeLists.txt b/Source/Core/DolphinQt/CMakeLists.txt index 186e70221b..5fe4c636a5 100644 --- a/Source/Core/DolphinQt/CMakeLists.txt +++ b/Source/Core/DolphinQt/CMakeLists.txt @@ -572,7 +572,6 @@ endif() if(APPLE) include(BundleUtilities) - set(BUNDLE_PATH ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/DolphinQt.app) # Ask for an application bundle. set_target_properties(dolphin-mpn PROPERTIES @@ -648,7 +647,7 @@ if(APPLE) COMMAND "${CMAKE_SOURCE_DIR}/Tools/mac-codesign.sh" "-e" "${CMAKE_CURRENT_SOURCE_DIR}/DolphinEmu$<$:Debug>.entitlements" "${MACOS_CODE_SIGNING_IDENTITY}" - "${BUNDLE_PATH}" + "$" ) endif() else() From 24fc34fd83ec7b8a1b0c09266e2a8b16eae762a9 Mon Sep 17 00:00:00 2001 From: OatmealDome Date: Wed, 4 Dec 2024 23:25:26 -0500 Subject: [PATCH 17/32] ScmRevGen: Don't generate Info.plist files directly Some generators (like Unix Makefiles and Xcode) copy an app's Info.plist at configure time. This causes a problem when we need to generate the Info.plist at build time, like how we currently do it with ScmRevGen. Instead of generating the Info.plist directly in ScmRevGen, provide an Info.plist without any version information to CMake at configure time, have ScmRevGen generate a separate plist file with the version information at build time, and then merge the two together to create the final Info.plist. --- CMake/DolphinInjectVersionInfo.cmake | 24 ++++++++++++++++++++++++ CMake/ScmRevGen.cmake | 3 +-- CMakeLists.txt | 11 +++-------- Source/Core/DolphinQt/CMakeLists.txt | 5 ++++- Source/Core/DolphinQt/Info.plist.in | 6 ------ Source/Core/MacUpdater/CMakeLists.txt | 5 ++++- Source/Core/MacUpdater/Info.plist.in | 4 ---- Source/Core/VersionInfo.plist.in | 13 +++++++++++++ 8 files changed, 49 insertions(+), 22 deletions(-) create mode 100644 CMake/DolphinInjectVersionInfo.cmake create mode 100644 Source/Core/VersionInfo.plist.in diff --git a/CMake/DolphinInjectVersionInfo.cmake b/CMake/DolphinInjectVersionInfo.cmake new file mode 100644 index 0000000000..94d9b43873 --- /dev/null +++ b/CMake/DolphinInjectVersionInfo.cmake @@ -0,0 +1,24 @@ +function(dolphin_inject_version_info target) + set(INFO_PLIST_PATH "$/Contents/Info.plist") + add_custom_command(TARGET ${target} + POST_BUILD + + COMMAND /usr/libexec/PlistBuddy -c + "Delete :CFBundleShortVersionString" + "${INFO_PLIST_PATH}" + || true + + COMMAND /usr/libexec/PlistBuddy -c + "Delete :CFBundleLongVersionString" + "${INFO_PLIST_PATH}" + || true + + COMMAND /usr/libexec/PlistBuddy -c + "Delete :CFBundleVersion" + "${INFO_PLIST_PATH}" + || true + + COMMAND /usr/libexec/PlistBuddy -c + "Merge '${CMAKE_BINARY_DIR}/Source/Core/VersionInfo.plist'" + "${INFO_PLIST_PATH}") +endfunction() diff --git a/CMake/ScmRevGen.cmake b/CMake/ScmRevGen.cmake index 492b4e432f..5f8b353cbc 100644 --- a/CMake/ScmRevGen.cmake +++ b/CMake/ScmRevGen.cmake @@ -65,6 +65,5 @@ endfunction() configure_source_file("Source/Core/Common/scmrev.h") if(APPLE) - configure_source_file("Source/Core/DolphinQt/Info.plist") - configure_source_file("Source/Core/MacUpdater/Info.plist") + configure_source_file("Source/Core/VersionInfo.plist") endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index 71bd74b6cb..2ae80bb65c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -783,14 +783,9 @@ if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/Common/scmrev.h) endif() if(APPLE) - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/DolphinQt) - if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/DolphinQt/Info.plist) - file(TOUCH ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/DolphinQt/Info.plist) - endif() - - file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/MacUpdater) - if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/MacUpdater/Info.plist) - file(TOUCH ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/MacUpdater/Info.plist) + file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/Source/Core) + if (NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/VersionInfo.plist) + file(TOUCH ${CMAKE_CURRENT_BINARY_DIR}/Source/Core/VersionInfo.plist) endif() endif() diff --git a/Source/Core/DolphinQt/CMakeLists.txt b/Source/Core/DolphinQt/CMakeLists.txt index 5fe4c636a5..f4816003dc 100644 --- a/Source/Core/DolphinQt/CMakeLists.txt +++ b/Source/Core/DolphinQt/CMakeLists.txt @@ -576,7 +576,7 @@ if(APPLE) # Ask for an application bundle. set_target_properties(dolphin-mpn PROPERTIES MACOSX_BUNDLE true - MACOSX_BUNDLE_INFO_PLIST "${CMAKE_CURRENT_BINARY_DIR}/Info.plist" + MACOSX_BUNDLE_INFO_PLIST "${CMAKE_CURRENT_SOURCE_DIR}/Info.plist.in" XCODE_ATTRIBUTE_CODE_SIGN_IDENTITY "" OUTPUT_NAME DolphinQt ) @@ -613,6 +613,9 @@ if(APPLE) source_group("Resources" FILES "${CMAKE_SOURCE_DIR}/Data/${res}") endforeach() + include(DolphinInjectVersionInfo) + dolphin_inject_version_info(dolphin-emu) + # Copy MoltenVK into the bundle if(ENABLE_VULKAN) if(USE_BUNDLED_MOLTENVK) diff --git a/Source/Core/DolphinQt/Info.plist.in b/Source/Core/DolphinQt/Info.plist.in index 2a3d17291e..ddd2ed4182 100644 --- a/Source/Core/DolphinQt/Info.plist.in +++ b/Source/Core/DolphinQt/Info.plist.in @@ -39,12 +39,6 @@ English CFBundlePackageType APPL - CFBundleShortVersionString - ${DOLPHIN_WC_DESCRIBE} - CFBundleLongVersionString - ${DOLPHIN_WC_REVISION} - CFBundleVersion - ${DOLPHIN_VERSION_MAJOR}.${DOLPHIN_VERSION_MINOR} NSHumanReadableCopyright Licensed under GPL version 2 or later (GPLv2+) LSApplicationCategoryType diff --git a/Source/Core/MacUpdater/CMakeLists.txt b/Source/Core/MacUpdater/CMakeLists.txt index d72dd46cd2..79a9bfe52f 100644 --- a/Source/Core/MacUpdater/CMakeLists.txt +++ b/Source/Core/MacUpdater/CMakeLists.txt @@ -16,7 +16,7 @@ add_dependencies(MacUpdater dolphin_scmrev) set_target_properties(MacUpdater PROPERTIES MACOSX_BUNDLE true - MACOSX_BUNDLE_INFO_PLIST "${CMAKE_CURRENT_BINARY_DIR}/Info.plist" + MACOSX_BUNDLE_INFO_PLIST "${CMAKE_CURRENT_SOURCE_DIR}/Info.plist.in" OUTPUT_NAME "Dolphin Updater") target_compile_options(MacUpdater PRIVATE -x objective-c++) @@ -53,6 +53,9 @@ foreach(sb ${STORYBOARDS}) COMMENT "Compiling Storyboard ${sb}...") endforeach() +include(DolphinInjectVersionInfo) +dolphin_inject_version_info(MacUpdater) + if(NOT SKIP_POSTPROCESS_BUNDLE) # Update library references to make the bundle portable include(DolphinPostprocessBundle) diff --git a/Source/Core/MacUpdater/Info.plist.in b/Source/Core/MacUpdater/Info.plist.in index 574843c28d..22872c858f 100644 --- a/Source/Core/MacUpdater/Info.plist.in +++ b/Source/Core/MacUpdater/Info.plist.in @@ -16,10 +16,6 @@ Dolphin Updater CFBundlePackageType APPL - CFBundleShortVersionString - ${DOLPHIN_WC_DESCRIBE} - CFBundleVersion - ${DOLPHIN_VERSION_MAJOR}.${DOLPHIN_VERSION_MINOR} LSMinimumSystemVersion ${CMAKE_OSX_DEPLOYMENT_TARGET} NSHumanReadableCopyright diff --git a/Source/Core/VersionInfo.plist.in b/Source/Core/VersionInfo.plist.in new file mode 100644 index 0000000000..38043ba2de --- /dev/null +++ b/Source/Core/VersionInfo.plist.in @@ -0,0 +1,13 @@ + + + + + + CFBundleShortVersionString + ${DOLPHIN_WC_DESCRIBE} + CFBundleLongVersionString + ${DOLPHIN_WC_REVISION} + CFBundleVersion + ${DOLPHIN_VERSION_MAJOR}.${DOLPHIN_VERSION_MINOR} + + From 4572b075cdaf55d3edeba1cdc045d4444454d982 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Thu, 24 Oct 2024 23:05:36 +0200 Subject: [PATCH 18/32] JitArm64_LoadStore: Small dcbx optimization MUL and SUB can be combined in one instruction. Before: 0x1b1a7c01 mul w1, w0, w26 0x4b010318 sub w24, w24, w1 After: 0x1b1ae018 msub w24, w0, w26, w24 --- Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index ebcb8142b7..12c35bc43c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -817,9 +817,8 @@ void JitArm64::dcbx(UGeckoInstruction inst) STR(IndexType::Unsigned, loop_counter, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); // downcount -= (WA * reg_cycle_count) - MUL(WB, WA, reg_cycle_count); + MSUB(reg_downcount, WA, reg_cycle_count, reg_downcount); // ^ Note that this cannot overflow because it's limited by (downcount/cycle_count). - SUB(reg_downcount, reg_downcount, WB); STR(IndexType::Unsigned, reg_downcount, PPC_REG, PPCSTATE_OFF(downcount)); SetJumpTarget(downcount_is_zero_or_negative); From 671fe29ebe4686649544dd43dc902e64fc374d9e Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Fri, 27 Dec 2024 09:47:54 +0100 Subject: [PATCH 19/32] JitBase: Add HasConstantCarry helper --- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index fa2fdd167f..50c840a2b9 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -178,6 +178,14 @@ protected: void CleanUpAfterStackFault(); bool CanMergeNextInstructions(int count) const; + bool HasConstantCarry() const + { +#ifdef _M_ARM_64 + return js.carryFlag == CarryFlag::ConstantTrue || js.carryFlag == CarryFlag::ConstantFalse; +#else + return false; +#endif + } bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op); From ce503e69c030c897f40cc139d81d4a406c557356 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Fri, 27 Dec 2024 09:48:58 +0100 Subject: [PATCH 20/32] JitBase: Improve const-correctness --- Source/Core/Core/PowerPC/JitCommon/JitBase.cpp | 4 ++-- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp index 033ca756cd..d2de2895a7 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.cpp @@ -110,7 +110,7 @@ JitBase::~JitBase() CPUThreadConfigCallback::RemoveConfigChangedCallback(m_registered_config_callback_id); } -bool JitBase::DoesConfigNeedRefresh() +bool JitBase::DoesConfigNeedRefresh() const { return std::ranges::any_of(JIT_SETTINGS, [this](const auto& pair) { return this->*pair.first != Config::Get(*pair.second); @@ -276,7 +276,7 @@ bool JitBase::CanMergeNextInstructions(int count) const return true; } -bool JitBase::ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op) +bool JitBase::ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op) const { if (jo.fp_exceptions) return (op->opinfo->flags & FL_FLOAT_EXCEPTION) != 0; diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 50c840a2b9..4ac81c4049 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -167,7 +167,7 @@ protected: static const std::array*>, 23> JIT_SETTINGS; - bool DoesConfigNeedRefresh(); + bool DoesConfigNeedRefresh() const; void RefreshConfig(); void InitFastmemArena(); @@ -187,7 +187,7 @@ protected: #endif } - bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op); + bool ShouldHandleFPExceptionForInstruction(const PPCAnalyst::CodeOp* op) const; public: explicit JitBase(Core::System& system); From 033bb1a5938b8cf2c9c9466eeba8d1aef0d42936 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 18:12:13 +0100 Subject: [PATCH 21/32] JitArm64_Integer: Refactor subfex --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 76f771abe5..41ad4290d0 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1216,40 +1216,44 @@ void JitArm64::subfex(UGeckoInstruction inst) if (gpr.IsImm(a) && (mex || gpr.IsImm(b))) { - u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b); - - gpr.BindToRegister(d, false); + const u32 i = gpr.GetImm(a); + const u32 j = mex ? -1 : gpr.GetImm(b); + const u32 imm = ~i + j; + const bool is_all_ones = imm == 0xFFFFFFFF; switch (js.carryFlag) { case CarryFlag::InPPCState: { + gpr.BindToRegister(d, false); + ARM64Reg RD = gpr.R(d); auto WA = gpr.GetScopedReg(); LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d)); + ADDI2R(RD, WA, imm, RD); break; } case CarryFlag::InHostCarry: { - auto WA = gpr.GetScopedReg(); - MOVI2R(WA, ~i + j); - ADC(gpr.R(d), WA, ARM64Reg::WZR); + gpr.BindToRegister(d, false); + ARM64Reg RD = gpr.R(d); + MOVI2R(RD, imm); + ADC(RD, RD, ARM64Reg::WZR); break; } case CarryFlag::ConstantTrue: { - gpr.SetImmediate(d, ~i + j + 1); + gpr.SetImmediate(d, imm + 1); break; } case CarryFlag::ConstantFalse: { - gpr.SetImmediate(d, ~i + j); + gpr.SetImmediate(d, imm); break; } } const bool must_have_carry = Interpreter::Helper_Carry(~i, j); - const bool might_have_carry = (~i + j) == 0xFFFFFFFF; + const bool might_have_carry = is_all_ones; if (must_have_carry) { From 34c85e4865d25e67c309d400e9f57bb1a98168d2 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 18:15:44 +0100 Subject: [PATCH 22/32] JitArm64_Integer: subfex - Optimize InPPCState case for 0 When the immediate is zero, we can load the carry flag from memory directly to the destination register. Before: 0x394bd3b8 ldrb w24, [x29, #0x2f4] 0x2a1803f9 mov w25, w24 After: 0x394bd3b9 ldrb w25, [x29, #0x2f4] --- .../Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 41ad4290d0..3b6aef8788 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1219,6 +1219,7 @@ void JitArm64::subfex(UGeckoInstruction inst) const u32 i = gpr.GetImm(a); const u32 j = mex ? -1 : gpr.GetImm(b); const u32 imm = ~i + j; + const bool is_zero = imm == 0; const bool is_all_ones = imm == 0xFFFFFFFF; switch (js.carryFlag) @@ -1227,9 +1228,16 @@ void JitArm64::subfex(UGeckoInstruction inst) { gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); - auto WA = gpr.GetScopedReg(); - LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - ADDI2R(RD, WA, imm, RD); + if (is_zero) + { + LDRB(IndexType::Unsigned, RD, PPC_REG, PPCSTATE_OFF(xer_ca)); + } + else + { + auto WA = gpr.GetScopedReg(); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + ADDI2R(RD, WA, imm, RD); + } break; } case CarryFlag::InHostCarry: From 4869ef64c101e09dce5d571a9ce9f2af2fa63bc1 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 20:18:09 +0100 Subject: [PATCH 23/32] JitArm64_Integer: subfex - Optimize InHostCarry case for -1 The result is either -1 or 0 depending on the state of the carry flag. This can be done with a csetm instruction. Before: 0x1280001a mov w26, #-0x1 ; =-1 0x1a1f035a adc w26, w26, wzr After: 0x5a9f23fa csetm w26, lo --- .../Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 3b6aef8788..ed59599cff 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1244,8 +1244,18 @@ void JitArm64::subfex(UGeckoInstruction inst) { gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); - MOVI2R(RD, imm); - ADC(RD, RD, ARM64Reg::WZR); + if (is_all_ones) + { + // RD = -1 + carry = carry ? 0 : -1 + // CSETM sets the destination to -1 if the condition is true, 0 + // otherwise. Hence, the condition must be carry clear. + CSETM(RD, CC_CC); + } + else + { + MOVI2R(RD, imm); + ADC(RD, RD, ARM64Reg::WZR); + } break; } case CarryFlag::ConstantTrue: From 9269d53c92dd37794db981dd3eaa16f16c33ac54 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 20:57:55 +0100 Subject: [PATCH 24/32] JitArm64_Integer: subfzex - Constant folding When both the input register and the carry flag are constants, the result can be precomputed. Before: 0x52800016 mov w22, #0x0 ; =0 0x2a3603f6 mvn w22, w22 After: --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 70 +++++++++++-------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index ed59599cff..b2193faf36 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1359,39 +1359,49 @@ void JitArm64::subfzex(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; - gpr.BindToRegister(d, d == a); + if (gpr.IsImm(a) && HasConstantCarry()) + { + const u32 imm = ~gpr.GetImm(a); + const u32 carry = js.carryFlag == CarryFlag::ConstantTrue; + gpr.SetImmediate(d, imm + carry); + ComputeCarry(Interpreter::Helper_Carry(imm, carry)); + } + else + { + gpr.BindToRegister(d, d == a); - switch (js.carryFlag) - { - case CarryFlag::InPPCState: - { + switch (js.carryFlag) { - auto WA = gpr.GetScopedReg(); - LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - MVN(gpr.R(d), gpr.R(a)); - CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(d), WA); + case CarryFlag::InPPCState: + { + { + auto WA = gpr.GetScopedReg(); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + MVN(gpr.R(d), gpr.R(a)); + CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(d), WA); + } + ComputeCarry(); + break; + } + case CarryFlag::InHostCarry: + { + CARRY_IF_NEEDED(SBC, SBCS, gpr.R(d), ARM64Reg::WZR, gpr.R(a)); + ComputeCarry(); + break; + } + case CarryFlag::ConstantTrue: + { + CARRY_IF_NEEDED(NEG, NEGS, gpr.R(d), gpr.R(a)); + ComputeCarry(); + break; + } + case CarryFlag::ConstantFalse: + { + MVN(gpr.R(d), gpr.R(a)); + ComputeCarry(false); + break; + } } - ComputeCarry(); - break; - } - case CarryFlag::InHostCarry: - { - CARRY_IF_NEEDED(SBC, SBCS, gpr.R(d), ARM64Reg::WZR, gpr.R(a)); - ComputeCarry(); - break; - } - case CarryFlag::ConstantTrue: - { - CARRY_IF_NEEDED(NEG, NEGS, gpr.R(d), gpr.R(a)); - ComputeCarry(); - break; - } - case CarryFlag::ConstantFalse: - { - MVN(gpr.R(d), gpr.R(a)); - ComputeCarry(false); - break; - } } if (inst.Rc) From 77d62b217eec2189e78f0cc6985401f613ab7918 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 21:41:48 +0100 Subject: [PATCH 25/32] JitArm64_Integer: addex - Optimize InPPCState case for 0 Same optimization we did for subfex. Skip loading the carry flag into a temporary register first when we're dealing with zero. Before: 0x394bd3b8 ldrb w24, [x29, #0x2f4] 0x2a1803f9 mov w25, w24 After: 0x394bd3b9 ldrb w25, [x29, #0x2f4] --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index b2193faf36..eb742ac1dd 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1468,40 +1468,51 @@ void JitArm64::addex(UGeckoInstruction inst) if (gpr.IsImm(a) && (mex || gpr.IsImm(b))) { - u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b); - - gpr.BindToRegister(d, false); + const u32 i = gpr.GetImm(a), j = mex ? -1 : gpr.GetImm(b); + const u32 imm = i + j; + const bool is_zero = imm == 0; + const bool is_all_ones = imm == 0xFFFFFFFF; switch (js.carryFlag) { case CarryFlag::InPPCState: { - auto WA = gpr.GetScopedReg(); - LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - ADDI2R(gpr.R(d), WA, i + j, gpr.R(d)); + gpr.BindToRegister(d, false); + ARM64Reg RD = gpr.R(d); + if (is_zero) + { + LDRB(IndexType::Unsigned, RD, PPC_REG, PPCSTATE_OFF(xer_ca)); + } + else + { + auto WA = gpr.GetScopedReg(); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + ADDI2R(RD, WA, imm, RD); + } break; } case CarryFlag::InHostCarry: { + gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); - MOVI2R(RD, i + j); + MOVI2R(RD, imm); ADC(RD, RD, ARM64Reg::WZR); break; } case CarryFlag::ConstantTrue: { - gpr.SetImmediate(d, i + j + 1); + gpr.SetImmediate(d, imm + 1); break; } case CarryFlag::ConstantFalse: { - gpr.SetImmediate(d, i + j); + gpr.SetImmediate(d, imm); break; } } const bool must_have_carry = Interpreter::Helper_Carry(i, j); - const bool might_have_carry = (i + j) == 0xFFFFFFFF; + const bool might_have_carry = is_all_ones; if (must_have_carry) { From 4bc03c635e9722f221d3af7f5cc2d057d5647ab1 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 21:53:28 +0100 Subject: [PATCH 26/32] JitArm64_Integer: addex - Optimize InHostCarry for 0 Similar to what we did for subfex, but for 0. Before: 0x5280001b mov w27, #0x0 ; =0 0x1a1f037b adc w27, w27, wzr After: 0x1a9f37fb cset w27, hs --- .../Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index eb742ac1dd..54c86ba1f1 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1495,8 +1495,16 @@ void JitArm64::addex(UGeckoInstruction inst) { gpr.BindToRegister(d, false); ARM64Reg RD = gpr.R(d); - MOVI2R(RD, imm); - ADC(RD, RD, ARM64Reg::WZR); + if (is_zero) + { + // RD = 0 + carry = carry ? 1 : 0 + CSET(RD, CC_CS); + } + else + { + MOVI2R(RD, imm); + ADC(RD, RD, ARM64Reg::WZR); + } break; } case CarryFlag::ConstantTrue: From c54bd258f9c48260a9ad4c5090433c794812663d Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 22:12:46 +0100 Subject: [PATCH 27/32] JitArm64_Integer: addex - Optimize InHostCarry for -1 Same thing we did for subfex. Before: 0x1280001a mov w26, #-0x1 ; =-1 0x1a1f035a adc w26, w26, wzr After: 0x5a9f23fa csetm w26, lo --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 54c86ba1f1..a6820fdfd2 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1500,6 +1500,13 @@ void JitArm64::addex(UGeckoInstruction inst) // RD = 0 + carry = carry ? 1 : 0 CSET(RD, CC_CS); } + else if (is_all_ones) + { + // RD = -1 + carry = carry ? 0 : -1 + // Note that CSETM sets the destination to -1 if the condition is true, + // and 0 otherwise. Hence, the condition must be carry clear. + CSETM(RD, CC_CC); + } else { MOVI2R(RD, imm); From 46d4409b668cbc14583a8439c2cecd94953aabf5 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 23:07:33 +0100 Subject: [PATCH 28/32] JitArm64_Integer: addzex - Optimize ConstantFalse and ConstantTrue When the input register and carry flags are known, we can always precompute the result. We still materialize the immediate when the condition register needs to be updated, but this seems to be a general problem. I might look into that one day, but for now this'll do. - ConstantFalse Before: 0x52800119 mov w25, #0x8 ; =8 0x2a1903fa mov w26, w25 After: N/A - ConstantTrue Before: 0x52800119 mov w25, #0x8 ; =8 0x1100073a add w26, w25, #0x1 After: N/A --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 97 ++++++++++++------- 1 file changed, 62 insertions(+), 35 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index a6820fdfd2..c3ebc252e4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -10,6 +10,7 @@ #include "Common/BitUtils.h" #include "Common/CommonTypes.h" #include "Common/MathUtil.h" +#include "Common/Unreachable.h" #include "Core/Core.h" #include "Core/CoreTiming.h" @@ -1128,47 +1129,73 @@ void JitArm64::addzex(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; - switch (js.carryFlag) + if (gpr.IsImm(a) && HasConstantCarry()) { - case CarryFlag::InPPCState: - { - const bool allocate_reg = d == a; - gpr.BindToRegister(d, allocate_reg); + const u32 imm = gpr.GetImm(a); + const bool is_all_ones = imm == 0xFFFFFFFF; + switch (js.carryFlag) { - auto WA = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(gpr.R(d)); - LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), WA); - } - - ComputeCarry(); - break; - } - case CarryFlag::InHostCarry: - { - gpr.BindToRegister(d, d == a); - CARRY_IF_NEEDED(ADC, ADCS, gpr.R(d), gpr.R(a), ARM64Reg::WZR); - ComputeCarry(); - break; - } - case CarryFlag::ConstantTrue: - { - gpr.BindToRegister(d, d == a); - CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), 1); - ComputeCarry(); - break; - } - case CarryFlag::ConstantFalse: - { - if (d != a) + case CarryFlag::ConstantTrue: { - gpr.BindToRegister(d, false); - MOV(gpr.R(d), gpr.R(a)); + gpr.SetImmediate(d, imm + 1); + ComputeCarry(is_all_ones); + break; + } + case CarryFlag::ConstantFalse: + { + gpr.SetImmediate(d, imm); + ComputeCarry(false); + break; + } + default: + Common::Unreachable(); } - - ComputeCarry(false); - break; } + else + { + switch (js.carryFlag) + { + case CarryFlag::InPPCState: + { + const bool allocate_reg = d == a; + gpr.BindToRegister(d, allocate_reg); + + { + auto WA = allocate_reg ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(gpr.R(d)); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), WA); + } + + ComputeCarry(); + break; + } + case CarryFlag::InHostCarry: + { + gpr.BindToRegister(d, d == a); + CARRY_IF_NEEDED(ADC, ADCS, gpr.R(d), gpr.R(a), ARM64Reg::WZR); + ComputeCarry(); + break; + } + case CarryFlag::ConstantTrue: + { + gpr.BindToRegister(d, d == a); + CARRY_IF_NEEDED(ADD, ADDS, gpr.R(d), gpr.R(a), 1); + ComputeCarry(); + break; + } + case CarryFlag::ConstantFalse: + { + if (d != a) + { + gpr.BindToRegister(d, false); + MOV(gpr.R(d), gpr.R(a)); + } + + ComputeCarry(false); + break; + } + } } if (inst.Rc) From 24fadb873ad5a33362e760815372a3bd20fb93f4 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 23:20:17 +0100 Subject: [PATCH 29/32] JitArm64_Integer: addzex - Optimize InPPCState case for 0 Before: 0x52800019 mov w25, #0x0 ; =0 0x394bd3b8 ldrb w24, [x29, #0x2f4] 0x2b180339 adds w25, w25, w24 After: 0x394bd3b9 ldrb w25, [x29, #0x2f4] --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index c3ebc252e4..0b21cc2d9d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1129,13 +1129,21 @@ void JitArm64::addzex(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; - if (gpr.IsImm(a) && HasConstantCarry()) + if (gpr.IsImm(a) && + (HasConstantCarry() || (js.carryFlag == CarryFlag::InPPCState && gpr.GetImm(a) == 0))) { const u32 imm = gpr.GetImm(a); const bool is_all_ones = imm == 0xFFFFFFFF; switch (js.carryFlag) { + case CarryFlag::InPPCState: + { + gpr.BindToRegister(d, false); + LDRB(IndexType::Unsigned, gpr.R(d), PPC_REG, PPCSTATE_OFF(xer_ca)); + ComputeCarry(false); + break; + } case CarryFlag::ConstantTrue: { gpr.SetImmediate(d, imm + 1); From cc5628fdfbb0596480b5006038910f86d158cc03 Mon Sep 17 00:00:00 2001 From: Sintendo <3380580+Sintendo@users.noreply.github.com> Date: Sat, 28 Dec 2024 23:26:55 +0100 Subject: [PATCH 30/32] JitArm64_Integer: addzex - Optimize InHostCarry case for 0 Before: 0x5280000d mov w13, #0x0 ; =0 0x1a1f01ae adc w14, w13, wzr After: 0x1a9f37ee cset w14, hs --- .../Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 0b21cc2d9d..4d92714fd9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -10,7 +10,6 @@ #include "Common/BitUtils.h" #include "Common/CommonTypes.h" #include "Common/MathUtil.h" -#include "Common/Unreachable.h" #include "Core/Core.h" #include "Core/CoreTiming.h" @@ -1129,8 +1128,7 @@ void JitArm64::addzex(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; - if (gpr.IsImm(a) && - (HasConstantCarry() || (js.carryFlag == CarryFlag::InPPCState && gpr.GetImm(a) == 0))) + if (gpr.IsImm(a) && (gpr.GetImm(a) == 0 || HasConstantCarry())) { const u32 imm = gpr.GetImm(a); const bool is_all_ones = imm == 0xFFFFFFFF; @@ -1144,6 +1142,13 @@ void JitArm64::addzex(UGeckoInstruction inst) ComputeCarry(false); break; } + case CarryFlag::InHostCarry: + { + gpr.BindToRegister(d, false); + CSET(gpr.R(d), CCFlags::CC_CS); + ComputeCarry(false); + break; + } case CarryFlag::ConstantTrue: { gpr.SetImmediate(d, imm + 1); @@ -1156,8 +1161,6 @@ void JitArm64::addzex(UGeckoInstruction inst) ComputeCarry(false); break; } - default: - Common::Unreachable(); } } else From 7fb997c95c509eb5a6ca415b4d7dede7d7dd405c Mon Sep 17 00:00:00 2001 From: CasualPokePlayer <50538166+CasualPokePlayer@users.noreply.github.com> Date: Mon, 30 Dec 2024 07:24:04 -0800 Subject: [PATCH 31/32] Add SYSCONF country code to DTM Recently there was some issues in TASVideos trying to sync a Donkey Kong Country Returns TAS. It eventually was synced by directly using the config from the TAS author. The exact setting which caused the desync was narrowed down to being in SYSCONF, with the country code. The TAS author lives in the US, so the country code matched the US country code, while the person attempting to sync the TAS did not live in the US. Adding SYSCONF country code to the DTM should avoid this being an issue for future Dolphin versions. --- Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp | 2 ++ Source/Core/Core/Movie.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp b/Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp index 752b562a27..37071de27b 100644 --- a/Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp +++ b/Source/Core/Core/ConfigLoaders/MovieConfigLoader.cpp @@ -41,6 +41,7 @@ static void LoadFromDTM(Config::Layer* config_layer, Movie::DTMHeader* dtm) else config_layer->Set(Config::MAIN_GC_LANGUAGE, static_cast(dtm->language)); config_layer->Set(Config::SYSCONF_WIDESCREEN, dtm->bWidescreen); + config_layer->Set(Config::SYSCONF_COUNTRY, dtm->countryCode); config_layer->Set(Config::GFX_HACK_EFB_ACCESS_ENABLE, dtm->bEFBAccessEnable); config_layer->Set(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM, dtm->bSkipEFBCopyToRam); @@ -69,6 +70,7 @@ void SaveToDTM(Movie::DTMHeader* dtm) else dtm->language = Config::Get(Config::MAIN_GC_LANGUAGE); dtm->bWidescreen = Config::Get(Config::SYSCONF_WIDESCREEN); + dtm->countryCode = Config::Get(Config::SYSCONF_COUNTRY); dtm->bEFBAccessEnable = Config::Get(Config::GFX_HACK_EFB_ACCESS_ENABLE); dtm->bSkipEFBCopyToRam = Config::Get(Config::GFX_HACK_SKIP_EFB_COPY_TO_RAM); diff --git a/Source/Core/Core/Movie.h b/Source/Core/Core/Movie.h index 32d160a39c..314123cf51 100644 --- a/Source/Core/Core/Movie.h +++ b/Source/Core/Core/Movie.h @@ -132,7 +132,8 @@ struct DTMHeader bool bUseFMA; u8 GBAControllers; // GBA Controllers plugged in (the bits are ports 1-4) bool bWidescreen; // true indicates SYSCONF aspect ratio is 16:9, false for 4:3 - std::array reserved; // Padding for any new config options + u8 countryCode; // SYSCONF country code + std::array reserved; // Padding for any new config options std::array discChange; // Name of iso file to switch to, for two disc games. std::array revision; // Git hash u32 DSPiromHash; From fed9d28ca8385f2ea9a4567e9d4dddb2676d1b44 Mon Sep 17 00:00:00 2001 From: Tabitha Date: Mon, 6 Jan 2025 20:20:04 -0500 Subject: [PATCH 32/32] Update GP5E01.ini --- Data/Sys/GameSettings/GP5E01.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Data/Sys/GameSettings/GP5E01.ini b/Data/Sys/GameSettings/GP5E01.ini index 5dfebec818..4ce5e40bab 100644 --- a/Data/Sys/GameSettings/GP5E01.ini +++ b/Data/Sys/GameSettings/GP5E01.ini @@ -949,7 +949,7 @@ B09C002E 3B7B0002 83810010 83A10014 83C10018 83E1001C 38210010 00000000 -C204BF38 00000008 +C20A85F8 00000008 3FA08024 63BDD5F8 3C600001 60630025 A8AD8BA8 2C050000