diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index fd5cc4e32e..a89e8aa01f 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -273,11 +273,11 @@ protected: // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 // !emitting_routine && mode == Auto && jo.fastmem: X30 // - // Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push - // may be clobbered if mode != AlwaysFastAccess. + // If there are any other registers that the caller doesn't mind being overwritten, + // these can be indicated in scratch_gprs and scratch_fprs. void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, - Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), - BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); + Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0), + BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false); // Loadstore routines void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 1c54c00ebe..059be57048 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -54,7 +54,7 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx) } void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr, - BitSet32 gprs_to_push, BitSet32 fprs_to_push, + BitSet32 scratch_gprs, BitSet32 scratch_fprs, bool emitting_routine) { const u32 access_size = BackPatchInfo::GetFlagSize(flags); @@ -65,6 +65,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess; const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess; + const BitSet32 gprs_to_push = + (emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs; + const BitSet32 fprs_to_push = + (emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs; + bool in_far_code = false; const u8* fast_access_start = GetCodePtr(); std::optional slow_access_fixup; diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 2da5d113aa..eed8fc77b9 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -123,14 +123,14 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o MOV(gpr.R(addr), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; if (!jo.memcheck) - regs_in_use[DecodeReg(dest_reg)] = false; + scratch_gprs[DecodeReg(dest_reg)] = true; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -140,22 +140,23 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, scratch_gprs, + scratch_fprs); } else if (mmio_address) { - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(ARM64Reg::W30)] = false; - regs_in_use[DecodeReg(dest_reg)] = false; - MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use, - fprs_in_use, dest_reg, mmio_address, flags); + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; + scratch_gprs[DecodeReg(ARM64Reg::W30)] = true; + scratch_gprs[DecodeReg(dest_reg)] = true; + MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, + gpr.GetCallerSavedUsed() & ~scratch_gprs, + fpr.GetCallerSavedUsed() & ~scratch_fprs, dest_reg, mmio_address, flags); addr_reg_set = false; } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, scratch_gprs, scratch_fprs); } gpr.BindToRegister(dest, false, true); @@ -271,13 +272,13 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s MOV(gpr.R(dest), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -313,22 +314,24 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, scratch_gprs, + scratch_fprs); } else if (mmio_address) { - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; - regs_in_use[DecodeReg(ARM64Reg::W30)] = false; - regs_in_use[DecodeReg(RS)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; + scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; + scratch_gprs[DecodeReg(ARM64Reg::W30)] = true; + scratch_gprs[DecodeReg(RS)] = true; MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, - regs_in_use, fprs_in_use, RS, mmio_address, flags); + gpr.GetCallerSavedUsed() & ~scratch_gprs, + fpr.GetCallerSavedUsed() & ~scratch_fprs, RS, mmio_address, flags); addr_reg_set = false; } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, scratch_gprs, scratch_fprs); } if (update && !early_update) @@ -592,16 +595,16 @@ void JitArm64::lmw(UGeckoInstruction inst) else if (i != d) ADDI2R(addr_reg, addr_base_reg, (i - d) * 4); - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(addr_reg)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(addr_reg)] = true; if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; if (!jo.memcheck) - regs_in_use[DecodeReg(dest_reg)] = false; + scratch_gprs[DecodeReg(dest_reg)] = true; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), + scratch_gprs, scratch_fprs); gpr.BindToRegister(i, false, true); ASSERT(dest_reg == gpr.R(i)); @@ -710,15 +713,15 @@ void JitArm64::stmw(UGeckoInstruction inst) else if (i != s) ADDI2R(addr_reg, addr_base_reg, (i - s) * 4); - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(addr_reg)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; + scratch_gprs[DecodeReg(addr_reg)] = true; if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), scratch_gprs, + scratch_fprs); // To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores // after this instruction, flush registers that would be flushed after this instruction anyway. @@ -1043,14 +1046,14 @@ void JitArm64::dcbz(UGeckoInstruction inst) } } - BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); - BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); - gprs_to_push[DecodeReg(ARM64Reg::W1)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!jo.fastmem) - gprs_to_push[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, - EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); + EncodeRegTo64(addr_reg), scratch_gprs, scratch_fprs); if (using_dcbz_hack) SetJumpTarget(end_dcbz_hack); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index e5ded0dde4..28e89aeff4 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -164,23 +164,24 @@ void JitArm64::lfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; + scratch_gprs[DecodeReg(ARM64Reg::Q0)] = true; if (!jo.memcheck) - fprs_in_use[DecodeReg(VD)] = false; + scratch_fprs[DecodeReg(VD)] = true; if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) { - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, scratch_gprs, + scratch_fprs); } else { - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, scratch_gprs, scratch_fprs); } const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); @@ -367,14 +368,14 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; + scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true; if (is_immediate) { @@ -402,20 +403,20 @@ void JitArm64::stfXX(UGeckoInstruction inst) else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, scratch_gprs, + scratch_fprs); } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, scratch_gprs, + scratch_fprs); } } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, scratch_gprs, scratch_fprs); } if (update && !early_update) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 2d5c5d9355..57970646f7 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -79,24 +79,23 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) if (js.assumeNoPairedQuantize) { - BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; - // Wipe the registers we are using as temporaries if (!update || early_update) - gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (jo.memcheck || !jo.fastmem) - gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; + scratch_fprs[DecodeReg(ARM64Reg::Q0)] = true; if (!jo.memcheck) - fprs_in_use[DecodeReg(VS)] = false; + scratch_fprs[DecodeReg(VS)] = true; u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), scratch_gprs, + scratch_fprs); } else { @@ -239,22 +238,21 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) if (js.assumeNoPairedQuantize) { - BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; - // Wipe the registers we are using as temporaries - gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W1)] = true; if (!update || early_update) - gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W2)] = true; if (!jo.fastmem) - gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), scratch_gprs, + scratch_fprs); } else { diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 56c26739a3..aefec65cad 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -526,10 +526,10 @@ void JitArm64::GenerateQuantizedLoads() ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg addr_reg = ARM64Reg::X1; ARM64Reg scale_reg = ARM64Reg::X2; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3}; + BitSet32 scratch_gprs{0, 3}; if (!jo.memcheck) - gprs_to_push &= ~BitSet32{1}; - BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; + scratch_gprs[1] = true; + BitSet32 scratch_fprs{0, 1}; ARM64FloatEmitter float_emit(this); const u8* start = GetCodePtr(); @@ -541,7 +541,7 @@ void JitArm64::GenerateQuantizedLoads() BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, - gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); + scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true); RET(ARM64Reg::X30); } @@ -550,8 +550,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -568,8 +568,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -586,8 +586,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -603,8 +603,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -622,7 +622,7 @@ void JitArm64::GenerateQuantizedLoads() BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, - gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); + scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true); RET(ARM64Reg::X30); } @@ -631,8 +631,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -649,8 +649,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -667,8 +667,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -684,8 +684,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -736,12 +736,12 @@ void JitArm64::GenerateQuantizedStores() ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg scale_reg = ARM64Reg::X1; ARM64Reg addr_reg = ARM64Reg::X2; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1}; + BitSet32 scratch_gprs{0, 1}; if (!jo.memcheck) - gprs_to_push &= ~BitSet32{2}; + scratch_gprs[2] = true; if (!jo.fastmem) - gprs_to_push &= ~BitSet32{3}; - BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; + scratch_gprs[3] = true; + BitSet32 scratch_fprs{0, 1}; ARM64FloatEmitter float_emit(this); const u8* start = GetCodePtr(); @@ -752,8 +752,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -771,8 +771,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -790,8 +790,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -808,8 +808,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -826,8 +826,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -837,8 +837,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -856,8 +856,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -875,8 +875,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -893,8 +893,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -911,8 +911,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); }