From 2996fa017f775ea3c336d89d2998ba0ea0b97a14 Mon Sep 17 00:00:00 2001 From: camdenorrb Date: Sun, 15 Dec 2024 17:36:44 -0600 Subject: [PATCH 1/4] JitArm64_Integer: Optimize subfic for -1 Another one backported from x86. Not sure why I didn't do this in #12891 already. - Without carry Before: 0x2a3a03fb mvn w27, w26 0x6b1a037b subs w27, w27, w26 After: 0x1280001b mov w27, #-0x1 ; =-1 - With carry Before: 0x2a3b03f7 mvn w23, w27 0x6b1b02f7 subs w23, w23, w27 0x1a9f37f6 cset w22, hs 0x390bd3b6 strb w22, [x29, #0x2f4] After: 0x12800017 mov w23, #-0x1 ; =-1 --- .../PowerPC/JitArm64/JitArm64_Integer.cpp | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 8b00447ffd..76f771abe5 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -1394,23 +1394,34 @@ void JitArm64::subfic(UGeckoInstruction inst) else { const bool will_read = d == a; - const bool is_zero = imm == 0; gpr.BindToRegister(d, will_read); - - // d = imm - a ARM64Reg RD = gpr.R(d); + + if (imm == -1) { - Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR); - if (!is_zero) + // d = -1 - a = ~a + MVN(RD, gpr.R(a)); + // CA is always set in this case + ComputeCarry(true); + } + else + { + const bool is_zero = imm == 0; + + // d = imm - a { - WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); - MOVI2R(WA, imm); + Arm64GPRCache::ScopedARM64Reg WA(ARM64Reg::WZR); + if (!is_zero) + { + WA = will_read ? gpr.GetScopedReg() : Arm64GPRCache::ScopedARM64Reg(RD); + MOVI2R(WA, imm); + } + + CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a)); } - CARRY_IF_NEEDED(SUB, SUBS, RD, WA, gpr.R(a)); + ComputeCarry(); } - - ComputeCarry(); } } From 600746a9bf487df773c796b43e2f375fac1245f1 Mon Sep 17 00:00:00 2001 From: camdenorrb Date: Sun, 15 Dec 2024 17:37:45 -0600 Subject: [PATCH 2/4] Optimize ps_sel with d == b || d == c --- Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp index 900d9c87c1..7ac2c31bb9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Paired.cpp @@ -7,6 +7,7 @@ #include "Common/CommonTypes.h" #include "Common/Config/Config.h" #include "Common/StringUtil.h" +#include "Common/Unreachable.h" #include "Core/Config/SessionSettings.h" #include "Core/ConfigManager.h" @@ -337,8 +338,12 @@ void JitArm64::ps_sel(UGeckoInstruction inst) const auto V0Q = fpr.GetScopedReg(); const ARM64Reg V0 = reg_encoder(V0Q); m_float_emit.FCMGE(size, V0, VA); - m_float_emit.BSL(V0, VC, VB); - m_float_emit.MOV(VD, V0); + if (d == b) + m_float_emit.BIT(VD, VC, V0); + else if (d == c) + m_float_emit.BIF(VD, VB, V0); + else + Common::Unreachable(); } ASSERT_MSG(DYNA_REC, singles == (fpr.IsSingle(a) && fpr.IsSingle(b) && fpr.IsSingle(c)), From d4211b20478c2ee6fa508d56e822d5c70ea29cf5 Mon Sep 17 00:00:00 2001 From: camdenorrb Date: Sun, 15 Dec 2024 17:38:37 -0600 Subject: [PATCH 3/4] Small dcbx optimization --- Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index ebcb8142b7..12c35bc43c 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -817,9 +817,8 @@ void JitArm64::dcbx(UGeckoInstruction inst) STR(IndexType::Unsigned, loop_counter, PPC_REG, PPCSTATE_OFF_SPR(SPR_CTR)); // downcount -= (WA * reg_cycle_count) - MUL(WB, WA, reg_cycle_count); + MSUB(reg_downcount, WA, reg_cycle_count, reg_downcount); // ^ Note that this cannot overflow because it's limited by (downcount/cycle_count). - SUB(reg_downcount, reg_downcount, WB); STR(IndexType::Unsigned, reg_downcount, PPC_REG, PPCSTATE_OFF(downcount)); SetJumpTarget(downcount_is_zero_or_negative); From 62cb347e218645c397c2231316e933013c9648f3 Mon Sep 17 00:00:00 2001 From: Camden Date: Sun, 15 Dec 2024 21:47:59 -0600 Subject: [PATCH 4/4] Update Readme.md --- Readme.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Readme.md b/Readme.md index 456fb88078..ed777941a3 100644 --- a/Readme.md +++ b/Readme.md @@ -9,7 +9,7 @@ ### Performance Build steps 1. `mkdir build` 2. `cd build` -3. `cmake .. -G Ninja -DENABLE_NOGUI=ON -DENABLE_QT=OFF -DENABLE_HEADLESS=ON -DENABLE_ALSA=OFF -DENABLE_PULSEAUDIO=OFF -DENABLE_VULKAN=ON -DFASTLOG=OFF -DENABLE_AUTOUPDATE=OFF -DENABLE_SDL=OFF -DENCODE_FRAMEDUMPS=ON -DENABLE_LLVM=OFF -DENABLE_X11=OFF -DCMAKE_BUILD_TYPE=Release -DENABLE_EGL=OFF -DENABLE_ANALYTICS=OFF -DUSE_RETRO_ACHIEVEMENTS=OFF -DUSE_DISCORD_PRESENCE=OFF -DCMAKE_CXX_FLAGS="-march=native -mtune=native -flto -O3" -DCMAKE_C_FLAGS="-march=native -mtune=native -flto -O3"` +3. `cmake .. -G Ninja -DENABLE_NOGUI=ON -DENABLE_QT=OFF -DENABLE_HEADLESS=ON -DENABLE_ALSA=OFF -DENABLE_PULSEAUDIO=OFF -DENABLE_VULKAN=ON -DFASTLOG=OFF -DENABLE_AUTOUPDATE=OFF -DENABLE_SDL=OFF -DENCODE_FRAMEDUMPS=ON -DENABLE_LLVM=OFF -DENABLE_X11=OFF -DCMAKE_BUILD_TYPE=Release -DENABLE_EGL=OFF -DENABLE_ANALYTICS=OFF -DUSE_RETRO_ACHIEVEMENTS=OFF -DUSE_DISCORD_PRESENCE=OFF -DCMAKE_CXX_FLAGS="-march=native -mtune=native -flto -O3 -pipe" -DCMAKE_C_FLAGS="-march=native -mtune=native -flto -O3 -pipe"` 4. `ninja -j $(nproc)`