From f8bd262c2f37ff6e0ae2ad5c5900ea637320a241 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 28 Dec 2024 20:39:27 +0100 Subject: [PATCH] JitArm64: Check GPRs/FPRs to push inside EmitBackpatchRoutine Preparation for the next commit, which will make EmitBackpatchRoutine allocate registers on its own. Because the register allocation will change during the call to EmitBackpatchRoutine, the set of GPRs/FPRs to push can't be computed prior to the call, so let's compute them during the call instead. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 8 +- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 7 +- .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 91 ++++++++++--------- .../JitArm64/JitArm64_LoadStoreFloating.cpp | 39 ++++---- .../JitArm64/JitArm64_LoadStorePaired.cpp | 32 +++---- Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp | 90 +++++++++--------- 6 files changed, 137 insertions(+), 130 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index fd5cc4e32e..a89e8aa01f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -273,11 +273,11 @@ protected: // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 // !emitting_routine && mode == Auto && jo.fastmem: X30 // - // Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push - // may be clobbered if mode != AlwaysFastAccess. + // If there are any other registers that the caller doesn't mind being overwritten, + // these can be indicated in scratch_gprs and scratch_fprs. void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, - Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), - BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); + Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0), + BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false); // Loadstore routines void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 1c54c00ebe..059be57048 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -54,7 +54,7 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx) } void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr, - BitSet32 gprs_to_push, BitSet32 fprs_to_push, + BitSet32 scratch_gprs, BitSet32 scratch_fprs, bool emitting_routine) { const u32 access_size = BackPatchInfo::GetFlagSize(flags); @@ -65,6 +65,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess; const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess; + const BitSet32 gprs_to_push = + (emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs; + const BitSet32 fprs_to_push = + (emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs; + bool in_far_code = false; const u8* fast_access_start = GetCodePtr(); std::optional slow_access_fixup; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 2da5d113aa..eed8fc77b9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -123,14 +123,14 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o MOV(gpr.R(addr), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; if (!jo.memcheck) - regs_in_use[DecodeReg(dest_reg)] = false; + scratch_gprs[DecodeReg(dest_reg)] = true; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -140,22 +140,23 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, scratch_gprs, + scratch_fprs); } else if (mmio_address) { - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(ARM64Reg::W30)] = false; - regs_in_use[DecodeReg(dest_reg)] = false; - MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use, - fprs_in_use, dest_reg, mmio_address, flags); + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; + scratch_gprs[DecodeReg(ARM64Reg::W30)] = true; + scratch_gprs[DecodeReg(dest_reg)] = true; + MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, + gpr.GetCallerSavedUsed() & ~scratch_gprs, + fpr.GetCallerSavedUsed() & ~scratch_fprs, dest_reg, mmio_address, flags); addr_reg_set = false; } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, scratch_gprs, scratch_fprs); } gpr.BindToRegister(dest, false, true); @@ -271,13 +272,13 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s MOV(gpr.R(dest), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -313,22 +314,24 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, scratch_gprs, + scratch_fprs); } else if (mmio_address) { - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; - regs_in_use[DecodeReg(ARM64Reg::W30)] = false; - regs_in_use[DecodeReg(RS)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; + scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; + scratch_gprs[DecodeReg(ARM64Reg::W30)] = true; + scratch_gprs[DecodeReg(RS)] = true; MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, - regs_in_use, fprs_in_use, RS, mmio_address, flags); + gpr.GetCallerSavedUsed() & ~scratch_gprs, + fpr.GetCallerSavedUsed() & ~scratch_fprs, RS, mmio_address, flags); addr_reg_set = false; } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, scratch_gprs, scratch_fprs); } if (update && !early_update) @@ -592,16 +595,16 @@ void JitArm64::lmw(UGeckoInstruction inst) else if (i != d) ADDI2R(addr_reg, addr_base_reg, (i - d) * 4); - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(addr_reg)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(addr_reg)] = true; if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; if (!jo.memcheck) - regs_in_use[DecodeReg(dest_reg)] = false; + scratch_gprs[DecodeReg(dest_reg)] = true; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), + scratch_gprs, scratch_fprs); gpr.BindToRegister(i, false, true); ASSERT(dest_reg == gpr.R(i)); @@ -710,15 +713,15 @@ void JitArm64::stmw(UGeckoInstruction inst) else if (i != s) ADDI2R(addr_reg, addr_base_reg, (i - s) * 4); - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(addr_reg)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; + scratch_gprs[DecodeReg(addr_reg)] = true; if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), scratch_gprs, + scratch_fprs); // To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores // after this instruction, flush registers that would be flushed after this instruction anyway. @@ -1043,14 +1046,14 @@ void JitArm64::dcbz(UGeckoInstruction inst) } } - BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); - BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); - gprs_to_push[DecodeReg(ARM64Reg::W1)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!jo.fastmem) - gprs_to_push[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, - EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); + EncodeRegTo64(addr_reg), scratch_gprs, scratch_fprs); if (using_dcbz_hack) SetJumpTarget(end_dcbz_hack); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index e5ded0dde4..28e89aeff4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -164,23 +164,24 @@ void JitArm64::lfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; + scratch_gprs[DecodeReg(ARM64Reg::Q0)] = true; if (!jo.memcheck) - fprs_in_use[DecodeReg(VD)] = false; + scratch_fprs[DecodeReg(VD)] = true; if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) { - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, scratch_gprs, + scratch_fprs); } else { - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, scratch_gprs, scratch_fprs); } const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); @@ -367,14 +368,14 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; + scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true; if (is_immediate) { @@ -402,20 +403,20 @@ void JitArm64::stfXX(UGeckoInstruction inst) else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, scratch_gprs, + scratch_fprs); } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, scratch_gprs, + scratch_fprs); } } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, scratch_gprs, scratch_fprs); } if (update && !early_update) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 2d5c5d9355..57970646f7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -79,24 +79,23 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) if (js.assumeNoPairedQuantize) { - BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; - // Wipe the registers we are using as temporaries if (!update || early_update) - gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (jo.memcheck || !jo.fastmem) - gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; + scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true; if (!jo.memcheck) - fprs_in_use[DecodeReg(VS)] = false; + scratch_fprs[DecodeReg(VS)] = true; u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), scratch_gprs, + scratch_fprs); } else { @@ -239,22 +238,21 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) if (js.assumeNoPairedQuantize) { - BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; - // Wipe the registers we are using as temporaries - gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) - gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; if (!jo.fastmem) - gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), scratch_gprs, + scratch_fprs); } else { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 56c26739a3..aefec65cad 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -526,10 +526,10 @@ void JitArm64::GenerateQuantizedLoads() ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg addr_reg = ARM64Reg::X1; ARM64Reg scale_reg = ARM64Reg::X2; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3}; + BitSet32 scratch_gprs{0, 3}; if (!jo.memcheck) - gprs_to_push &= ~BitSet32{1}; - BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; + scratch_gprs[1] = true; + BitSet32 scratch_fprs{0, 1}; ARM64FloatEmitter float_emit(this); const u8* start = GetCodePtr(); @@ -541,7 +541,7 @@ void JitArm64::GenerateQuantizedLoads() BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, - gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); + scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true); RET(ARM64Reg::X30); } @@ -550,8 +550,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -568,8 +568,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -586,8 +586,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -603,8 +603,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -622,7 +622,7 @@ void JitArm64::GenerateQuantizedLoads() BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, - gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); + scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true); RET(ARM64Reg::X30); } @@ -631,8 +631,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -649,8 +649,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -667,8 +667,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -684,8 +684,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -736,12 +736,12 @@ void JitArm64::GenerateQuantizedStores() ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg scale_reg = ARM64Reg::X1; ARM64Reg addr_reg = ARM64Reg::X2; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1}; + BitSet32 scratch_gprs{0, 1}; if (!jo.memcheck) - gprs_to_push &= ~BitSet32{2}; + scratch_gprs[2] = true; if (!jo.fastmem) - gprs_to_push &= ~BitSet32{3}; - BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; + scratch_gprs[3] = true; + BitSet32 scratch_fprs{0, 1}; ARM64FloatEmitter float_emit(this); const u8* start = GetCodePtr(); @@ -752,8 +752,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -771,8 +771,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -790,8 +790,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -808,8 +808,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -826,8 +826,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -837,8 +837,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -856,8 +856,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -875,8 +875,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -893,8 +893,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -911,8 +911,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); }