From c9e6171d04b47665e5b981868f9ee4b7b5d60633 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sun, 29 Dec 2024 17:07:00 +0100 Subject: [PATCH] JitArm64: Remove now unnecessary locking of temp registers --- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 45 +++++-------------- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 24 +++------- .../JitArm64/JitArm64_LoadStorePaired.cpp | 45 +++++++++---------- 3 files changed, 36 insertions(+), 78 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index eed8fc77b9..d08fae4add 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -31,7 +31,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o { // We want to make sure to not get LR as a temp register gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) gpr.Lock(ARM64Reg::W0); gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false); @@ -127,7 +127,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o BitSet32 scratch_fprs; if (!update || early_update) scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; if (!jo.memcheck) scratch_gprs[DecodeReg(dest_reg)] = true; @@ -170,7 +170,7 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o } gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) gpr.Unlock(ARM64Reg::W0); } @@ -178,9 +178,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s bool update) { // We want to make sure to not get LR as a temp register - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W2, ARM64Reg::W30); // Don't materialize zero. ARM64Reg RS = gpr.IsImm(value, 0) ? ARM64Reg::WZR : gpr.R(value); @@ -274,11 +272,8 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s BitSet32 scratch_gprs; BitSet32 scratch_fprs; - scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; - if (!jo.fastmem) - scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -319,7 +314,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s } else if (mmio_address) { - scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; scratch_gprs[DecodeReg(ARM64Reg::W30)] = true; scratch_gprs[DecodeReg(RS)] = true; @@ -341,9 +335,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s MOV(gpr.R(dest), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W0); + gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30); } FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp, @@ -526,7 +518,7 @@ void JitArm64::lmw(UGeckoInstruction inst) s32 offset = inst.SIMM_16; gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) gpr.Lock(ARM64Reg::W0); // MMU games make use of a >= d despite this being invalid according to the PEM. @@ -598,7 +590,7 @@ void JitArm64::lmw(UGeckoInstruction inst) BitSet32 scratch_gprs; BitSet32 scratch_fprs; scratch_gprs[DecodeReg(addr_reg)] = true; - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; if (!jo.memcheck) scratch_gprs[DecodeReg(dest_reg)] = true; @@ -633,7 +625,7 @@ void JitArm64::lmw(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) gpr.Unlock(ARM64Reg::W0); } @@ -645,9 +637,7 @@ void JitArm64::stmw(UGeckoInstruction inst) u32 a = inst.RA, s = inst.RS; s32 offset = inst.SIMM_16; - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W2, ARM64Reg::W30); ARM64Reg addr_reg = ARM64Reg::W2; bool a_is_addr_base_reg = false; @@ -715,10 +705,7 @@ void JitArm64::stmw(UGeckoInstruction inst) BitSet32 scratch_gprs; BitSet32 scratch_fprs; - scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; scratch_gprs[DecodeReg(addr_reg)] = true; - if (!jo.fastmem) - scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), scratch_gprs, scratch_fprs); @@ -753,9 +740,7 @@ void JitArm64::stmw(UGeckoInstruction inst) } } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W0); + gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30); } void JitArm64::dcbx(UGeckoInstruction inst) @@ -975,14 +960,8 @@ void JitArm64::dcbz(UGeckoInstruction inst) int a = inst.RA, b = inst.RB; gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W0); - Common::ScopeGuard register_guard([&] { - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W0); - }); + Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); }); constexpr ARM64Reg addr_reg = ARM64Reg::W1; constexpr ARM64Reg temp_reg = ARM64Reg::W30; @@ -1049,8 +1028,6 @@ void JitArm64::dcbz(UGeckoInstruction inst) BitSet32 scratch_gprs; BitSet32 scratch_fprs; scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; - if (!jo.fastmem) - scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, EncodeRegTo64(addr_reg), scratch_gprs, scratch_fprs); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index 28e89aeff4..c53e5f0f02 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -78,8 +78,7 @@ void JitArm64::lfXX(UGeckoInstruction inst) (flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle; gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Lock(ARM64Reg::Q0); - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) gpr.Lock(ARM64Reg::W0); const ARM64Reg VD = fpr.RW(inst.FD, type, false); @@ -168,9 +167,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) BitSet32 scratch_fprs; if (!update || early_update) scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; - scratch_gprs[DecodeReg(ARM64Reg::Q0)] = true; if (!jo.memcheck) scratch_fprs[DecodeReg(VD)] = true; @@ -194,8 +192,7 @@ void JitArm64::lfXX(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) gpr.Unlock(ARM64Reg::W0); } @@ -265,8 +262,6 @@ void JitArm64::stfXX(UGeckoInstruction inst) u32 imm_addr = 0; bool is_immediate = false; - fpr.Lock(ARM64Reg::Q0); - const bool have_single = fpr.IsSingle(inst.FS, true); Arm64FPRCache::ScopedARM64Reg V0 = @@ -279,9 +274,7 @@ void JitArm64::stfXX(UGeckoInstruction inst) V0 = std::move(single_reg); } - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W2, ARM64Reg::W30); ARM64Reg addr_reg = ARM64Reg::W2; @@ -370,12 +363,8 @@ void JitArm64::stfXX(UGeckoInstruction inst) BitSet32 scratch_gprs; BitSet32 scratch_fprs; - scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; - if (!jo.fastmem) - scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; - scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true; if (is_immediate) { @@ -426,8 +415,5 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W0); + gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 57970646f7..9a8f2a3ede 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -39,13 +39,12 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) const int w = indexed ? inst.Wx : inst.W; gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Lock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) { gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); - fpr.Lock(ARM64Reg::Q1); + fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); } - else if (jo.memcheck || !jo.fastmem) + else if (jo.memcheck) { gpr.Lock(ARM64Reg::W0); } @@ -84,9 +83,8 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) if (!update || early_update) scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; - if (jo.memcheck || !jo.fastmem) + if (jo.memcheck) scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; - scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true; if (!jo.memcheck) scratch_fprs[DecodeReg(VS)] = true; @@ -133,13 +131,12 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) } gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) { gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); - fpr.Unlock(ARM64Reg::Q1); + fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1); } - else if (jo.memcheck || !jo.fastmem) + else if (jo.memcheck) { gpr.Unlock(ARM64Reg::W0); } @@ -166,9 +163,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; - fpr.Lock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) - fpr.Lock(ARM64Reg::Q1); + fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); const bool have_single = fpr.IsSingle(inst.RS); @@ -204,11 +200,13 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) } } - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!js.assumeNoPairedQuantize || !jo.fastmem) - gpr.Lock(ARM64Reg::W0); - if (!js.assumeNoPairedQuantize && !jo.fastmem) - gpr.Lock(ARM64Reg::W3); + gpr.Lock(ARM64Reg::W2, ARM64Reg::W30); + if (!js.assumeNoPairedQuantize) + { + gpr.Lock(ARM64Reg::W0, ARM64Reg::W1); + if (!jo.fastmem) + gpr.Lock(ARM64Reg::W3); + } constexpr ARM64Reg type_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W1; @@ -241,11 +239,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) BitSet32 scratch_gprs; BitSet32 scratch_fprs; - scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; - if (!jo.fastmem) - scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) @@ -278,12 +273,12 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) MOV(gpr.R(inst.RA), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); - if (!js.assumeNoPairedQuantize || !jo.fastmem) - gpr.Unlock(ARM64Reg::W0); - if (!js.assumeNoPairedQuantize && !jo.fastmem) - gpr.Unlock(ARM64Reg::W3); + gpr.Unlock(ARM64Reg::W2, ARM64Reg::W30); if (!js.assumeNoPairedQuantize) - fpr.Unlock(ARM64Reg::Q1); + { + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1); + if (!jo.fastmem) + gpr.Unlock(ARM64Reg::W3); + fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1); + } }