diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index fd5cc4e32e..fe30f36dbc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -250,34 +250,42 @@ protected: // This is the core routine for accessing emulated memory, with support for // many different kinds of loads and stores as well as fastmem/backpatching. // - // Registers used: + // The addr parameter can be any register, but the code emitted for slow accesses + // will be slightly more efficient if the addr parameter is as follows: // - // addr scratch - // Store: X2 X1 - // Load: X1 - // Zero 256: X1 X30 - // Store float: X2 Q0 - // Load float: X1 + // Store: W2 + // Load: W1 + // Zero 256: W1 + // Store float: W2 + // Load float: W1 // - // If mode == AlwaysFastAccess, the addr argument can be any register. - // Otherwise it must be the register listed in the table above. + // This routine allocates most scratch registers dynamically, but in the following + // situations, specific scratch registers have to be allocated in advance: // - // Additional scratch registers are used in the following situations: + // emitting_routine && mode == Auto: X0 + // emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X1 + // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 + // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3 + // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X0 + // emitting_routine && mode != AlwaysSlowAccess && + // (flags & BackPatchInfo::FLAG_STORE) && !(flags & BackPatchInfo::FLAG_FLOAT): X1 + // emitting_routine && mode != AlwaysSlowAccess && + // (flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT): Q0 + // emitting_routine && mode != AlwaysSlowAccess && + // (flags & BackPatchInfo::FLAG_ZERO_256): X30 + // !emitting_routine && mode == Auto && jo.fastmem: X30 // - // emitting_routine && mode == Auto: X0 - // emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3 - // emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3 - // mode != AlwaysSlowAccess && !jo.fastmem: X0 + // If there are any other registers that the caller doesn't mind being overwritten, + // these can be indicated in scratch_gprs and scratch_fprs. + // + // In the following situations, certain host registers must not contain guest registers: + // + // !emitting_routine && mode != AlwaysFastAccess && jo.memcheck: X30 // !emitting_routine && mode != AlwaysFastAccess && jo.memcheck && - // (flags & BackPatchInfo::FLAG_LOAD): X0 - // !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30 - // !emitting_routine && mode == Auto && jo.fastmem: X30 - // - // Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push - // may be clobbered if mode != AlwaysFastAccess. + // (flags & BackPatchInfo::FLAG_LOAD): X0 void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS, - Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0), - BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false); + Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0), + BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false); // Loadstore routines void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 1c54c00ebe..3c11e83caa 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -54,7 +54,7 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx) } void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr, - BitSet32 gprs_to_push, BitSet32 fprs_to_push, + BitSet32 scratch_gprs, BitSet32 scratch_fprs, bool emitting_routine) { const u32 access_size = BackPatchInfo::GetFlagSize(flags); @@ -65,6 +65,148 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess; const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess; + const bool memcheck = jo.memcheck && !emitting_routine; + + if ((flags & BackPatchInfo::FLAG_LOAD)) + { + if ((flags & BackPatchInfo::FLAG_FLOAT)) + scratch_fprs[DecodeReg(RS)] = !memcheck; + else + scratch_gprs[DecodeReg(RS)] = !memcheck; + } + + BitSet32 temp_gpr_candidates = scratch_gprs; + BitSet32 temp_fpr_candidates = scratch_fprs; + temp_gpr_candidates[DecodeReg(addr)] = false; + if (flags & BackPatchInfo::FLAG_FLOAT) + temp_fpr_candidates[DecodeReg(RS)] = false; + else if (!(flags & BackPatchInfo::FLAG_ZERO_256)) + temp_gpr_candidates[DecodeReg(RS)] = false; + if (!emitting_routine && mode == MemAccessMode::Auto && jo.fastmem) + temp_gpr_candidates[30] = true; + + const auto allocate_temp_reg = [this](Arm64RegCache& reg_cache, + BitSet32& candidates) -> Arm64RegCache::ScopedARM64Reg { + for (int i : candidates) + { + candidates[i] = false; + ARM64Reg reg = ARM64Reg(i); + if (®_cache == &fpr) + reg = EncodeRegToQuad(reg); + return reg; + } + return reg_cache.GetScopedReg(); + }; + + const auto can_allocate_temp_reg_for_free = [](Arm64RegCache& reg_cache, BitSet32& candidates) { + return candidates != BitSet32{} || reg_cache.GetUnlockedRegisterCount() > 0; + }; + + Arm64RegCache::ScopedARM64Reg temp_gpr_1; + Arm64RegCache::ScopedARM64Reg temp_gpr_2; + Arm64RegCache::ScopedARM64Reg temp_gpr_3; + Arm64RegCache::ScopedARM64Reg temp_fpr_1; + + if (emit_fast_access) + { + if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT)) + { + temp_fpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::Q0) : + allocate_temp_reg(fpr, temp_fpr_candidates); + scratch_fprs[DecodeReg(temp_fpr_1)] = true; + } + else if (flags & BackPatchInfo::FLAG_STORE) + { + temp_gpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W1) : + allocate_temp_reg(gpr, temp_gpr_candidates); + scratch_gprs[DecodeReg(temp_gpr_1)] = true; + } + else if (flags & BackPatchInfo::FLAG_ZERO_256) + { + temp_gpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W30) : + allocate_temp_reg(gpr, temp_gpr_candidates); + scratch_gprs[DecodeReg(temp_gpr_1)] = true; + } + + if (!jo.fastmem) + { + temp_gpr_2 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W0) : + allocate_temp_reg(gpr, temp_gpr_candidates); + temp_gpr_3 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W3) : + allocate_temp_reg(gpr, temp_gpr_candidates); + scratch_gprs[DecodeReg(temp_gpr_2)] = true; + scratch_gprs[DecodeReg(temp_gpr_3)] = true; + } + else if (emit_slow_access && emitting_routine) + { + temp_gpr_2 = ARM64Reg::W0; + temp_gpr_3 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3; + scratch_gprs[DecodeReg(temp_gpr_2)] = true; + scratch_gprs[DecodeReg(temp_gpr_3)] = true; + } + } + + // Setting memcheck_temp_gpr to W30 works, but because W30 is a register that needs to be pushed + // and popped, using W30 may require us to emit an extra push and pop instruction, depending on + // what other registers need pushing and popping. If we can find another register to use without + // having to evict anything from the register cache, let's do that instead of using W30. + ARM64Reg memcheck_temp_gpr = ARM64Reg::W30; + if (emit_slow_access && memcheck) + { + const auto is_suitable_as_memcheck_temp_gpr = [flags](ARM64Reg reg) { + return reg != ARM64Reg::INVALID_REG && reg != ARM64Reg::W30 && + (reg != ARM64Reg::W0 || !(flags & BackPatchInfo::FLAG_LOAD)); + }; + + const auto get_unset_temp_gpr = [&]() -> Arm64RegCache::ScopedARM64Reg& { + if (temp_gpr_1 == ARM64Reg::INVALID_REG) + return temp_gpr_1; + if (temp_gpr_2 == ARM64Reg::INVALID_REG) + return temp_gpr_2; + ASSERT(temp_gpr_3 == ARM64Reg::INVALID_REG); + return temp_gpr_3; + }; + + if (is_suitable_as_memcheck_temp_gpr(temp_gpr_1)) + { + memcheck_temp_gpr = temp_gpr_1; + } + else if (is_suitable_as_memcheck_temp_gpr(temp_gpr_2)) + { + memcheck_temp_gpr = temp_gpr_2; + } + else if (is_suitable_as_memcheck_temp_gpr(temp_gpr_3)) + { + memcheck_temp_gpr = temp_gpr_3; + } + else + { + while (can_allocate_temp_reg_for_free(gpr, temp_gpr_candidates)) + { + Arm64RegCache::ScopedARM64Reg& temp_gpr_x = get_unset_temp_gpr(); + temp_gpr_x = allocate_temp_reg(gpr, temp_gpr_candidates); + scratch_gprs[DecodeReg(temp_gpr_x)] = true; + if (is_suitable_as_memcheck_temp_gpr(temp_gpr_x)) + break; + } + } + + if (temp_fpr_1 == ARM64Reg::INVALID_REG && + can_allocate_temp_reg_for_free(fpr, temp_fpr_candidates)) + { + temp_fpr_1 = allocate_temp_reg(fpr, temp_fpr_candidates); + scratch_fprs[DecodeReg(temp_fpr_1)] = true; + } + } + + BitSet32 gprs_to_push = + (emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs; + BitSet32 fprs_to_push = + (emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs; + + if (!emitting_routine && mode == MemAccessMode::Auto && jo.fastmem) + gprs_to_push[30] = true; + bool in_far_code = false; const u8* fast_access_start = GetCodePtr(); std::optional slow_access_fixup; @@ -76,13 +218,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (!jo.fastmem) { - const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30; + memory_base = EncodeRegTo64(temp_gpr_3); + memory_offset = temp_gpr_2; - memory_base = EncodeRegTo64(temp); - memory_offset = ARM64Reg::W0; - - LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT); - LDR(memory_base, MEM_REG, ArithOption(temp, true)); + LSR(temp_gpr_3, addr, PowerPC::BAT_INDEX_SHIFT); + LDR(memory_base, MEM_REG, ArithOption(temp_gpr_3, true)); if (emit_slow_access) { @@ -95,15 +235,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } else if (emit_slow_access && emitting_routine) { - const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3; - const ARM64Reg temp2 = ARM64Reg::W0; - - slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2); + slow_access_fixup = CheckIfSafeAddress(addr, temp_gpr_3, temp_gpr_2); } if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT)) { - ARM64Reg temp = ARM64Reg::D0; + ARM64Reg temp = EncodeRegToDouble(temp_fpr_1); temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true); m_float_emit.STR(access_size, temp, memory_base, memory_offset); @@ -117,7 +254,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, } else if (flags & BackPatchInfo::FLAG_STORE) { - ARM64Reg temp = ARM64Reg::W1; + ARM64Reg temp = temp_gpr_1; temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true); if (flags & BackPatchInfo::FLAG_SIZE_32) @@ -130,7 +267,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, else if (flags & BackPatchInfo::FLAG_ZERO_256) { // This literally only stores 32bytes of zeros to the target address - ARM64Reg temp = ARM64Reg::X30; + ARM64Reg temp = EncodeRegTo64(temp_gpr_1); ADD(temp, memory_base, memory_offset); STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0); STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16); @@ -151,8 +288,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (emit_slow_access) { - const bool memcheck = jo.memcheck && !emitting_routine; - if (emit_fast_access) { in_far_code = true; @@ -169,12 +304,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (slow_access_fixup) SetJumpTarget(*slow_access_fixup); - const ARM64Reg temp_gpr = ARM64Reg::W1; - const int temp_gpr_index = DecodeReg(temp_gpr); - BitSet32 gprs_to_push_early = {}; if (memcheck) - gprs_to_push_early[temp_gpr_index] = true; + gprs_to_push_early[DecodeReg(memcheck_temp_gpr)] = true; if (flags & BackPatchInfo::FLAG_LOAD) gprs_to_push_early[0] = true; @@ -185,9 +317,18 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if ((gprs_to_push & gprs_to_push_early).Count() & 1) gprs_to_push_early[30] = true; + // This temp GPR is only used when GPRs have been pushed, so we can choose almost any register + ARM64Reg temp_gpr_for_function_call = ARM64Reg::W8; + while (temp_gpr_for_function_call == addr || + (temp_gpr_for_function_call == RS && (flags & BackPatchInfo::FLAG_STORE))) + { + temp_gpr_for_function_call = + static_cast(static_cast(temp_gpr_for_function_call) + 1); + } + ABI_PushRegisters(gprs_to_push & gprs_to_push_early); ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early); - m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); + m_float_emit.ABI_PushRegisters(fprs_to_push, EncodeRegTo64(temp_gpr_for_function_call)); // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe // interrupt checks, and printing accurate PC locations in debug logs. @@ -196,14 +337,23 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, // so the caller has to store the PC themselves. if (!emitting_routine) { - MOVI2R(ARM64Reg::W30, js.compilerPC); - STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc)); + MOVI2R(temp_gpr_for_function_call, js.compilerPC); + STR(IndexType::Unsigned, temp_gpr_for_function_call, PPC_REG, PPCSTATE_OFF(pc)); } if (flags & BackPatchInfo::FLAG_STORE) { ARM64Reg src_reg = RS; const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X1 : ARM64Reg::W1; + ARM64Reg temp_addr_reg = addr; + if (addr == ARM64Reg::W1) + { + // If addr is W1, we must move the address to a different register so we don't + // overwrite it when moving RS to W1. W2 is the optimal register to move to, + // because that's the register the address needs to be in for the function call. + temp_addr_reg = RS != ARM64Reg::W2 ? ARM64Reg::W2 : temp_gpr_for_function_call; + MOV(temp_addr_reg, addr); + } if (flags & BackPatchInfo::FLAG_FLOAT) { @@ -227,49 +377,48 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (access_size == 64) { ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit, - &m_mmu, src_reg, ARM64Reg::W2); + &m_mmu, src_reg, temp_addr_reg); } else if (access_size == 32) { ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit, - &m_mmu, src_reg, ARM64Reg::W2); + &m_mmu, src_reg, temp_addr_reg); } else if (access_size == 16) { ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit, - &m_mmu, src_reg, ARM64Reg::W2); + &m_mmu, src_reg, temp_addr_reg); } else { - ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, ARM64Reg::W2); + ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, addr); } } else if (flags & BackPatchInfo::FLAG_ZERO_256) { - ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, addr); } else { if (access_size == 64) - ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, addr); else if (access_size == 32) - ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, addr); else if (access_size == 16) - ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, addr); else - ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, addr); } - m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); + m_float_emit.ABI_PopRegisters(fprs_to_push, EncodeRegTo64(temp_gpr_for_function_call)); ABI_PopRegisters(gprs_to_push & ~gprs_to_push_early); if (memcheck) { - const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0; const u64 early_push_count = (gprs_to_push & gprs_to_push_early).Count(); const u64 early_push_size = Common::AlignUp(early_push_count, 2) * 8; - WriteConditionalExceptionExit(EXCEPTION_DSI, temp_gpr, temp_fpr, early_push_size); + WriteConditionalExceptionExit(EXCEPTION_DSI, memcheck_temp_gpr, temp_fpr_1, early_push_size); } if (flags & BackPatchInfo::FLAG_LOAD) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 2da5d113aa..cc6f6f32f3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -30,10 +30,12 @@ using namespace Arm64Gen; void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update) { // We want to make sure to not get LR as a temp register - gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - if (jo.memcheck || !jo.fastmem) + gpr.Lock(ARM64Reg::W30); + if (jo.memcheck) gpr.Lock(ARM64Reg::W0); + const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1); + gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false); ARM64Reg dest_reg = gpr.R(dest); ARM64Reg up_reg = ARM64Reg::INVALID_REG; @@ -45,7 +47,6 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (offsetReg != -1 && !gpr.IsImm(offsetReg)) off_reg = gpr.R(offsetReg); - ARM64Reg addr_reg = ARM64Reg::W1; u32 imm_addr = 0; bool is_immediate = false; @@ -107,12 +108,10 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o } } - ARM64Reg XA = EncodeRegTo64(addr_reg); - bool addr_reg_set = !is_immediate; const auto set_addr_reg_if_needed = [&] { if (!addr_reg_set) - MOVI2R(XA, imm_addr); + MOVI2R(addr_reg, imm_addr); }; const bool early_update = !jo.memcheck && dest != static_cast(addr); @@ -123,14 +122,12 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o MOV(gpr.R(addr), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; - if (!jo.memcheck) - regs_in_use[DecodeReg(dest_reg)] = false; + scratch_gprs[DecodeReg(addr_reg)] = true; + if (jo.memcheck) + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -140,22 +137,24 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, addr_reg, scratch_gprs, + scratch_fprs); } else if (mmio_address) { - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(ARM64Reg::W30)] = false; - regs_in_use[DecodeReg(dest_reg)] = false; - MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use, - fprs_in_use, dest_reg, mmio_address, flags); + scratch_gprs[DecodeReg(addr_reg)] = true; + scratch_gprs[DecodeReg(ARM64Reg::W30)] = true; + scratch_gprs[DecodeReg(dest_reg)] = true; + MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, + gpr.GetCallerSavedUsed() & ~scratch_gprs, + fpr.GetCallerSavedUsed() & ~scratch_fprs, dest_reg, mmio_address, flags); addr_reg_set = false; } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, addr_reg, scratch_gprs, + scratch_fprs); } gpr.BindToRegister(dest, false, true); @@ -168,8 +167,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o MOV(gpr.R(addr), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - if (jo.memcheck || !jo.fastmem) + gpr.Unlock(ARM64Reg::W30); + if (jo.memcheck) gpr.Unlock(ARM64Reg::W0); } @@ -177,9 +176,9 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s bool update) { // We want to make sure to not get LR as a temp register - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W30); + + const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2); // Don't materialize zero. ARM64Reg RS = gpr.IsImm(value, 0) ? ARM64Reg::WZR : gpr.R(value); @@ -192,8 +191,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s if (dest != -1 && !gpr.IsImm(dest)) reg_dest = gpr.R(dest); - ARM64Reg addr_reg = ARM64Reg::W2; - u32 imm_addr = 0; bool is_immediate = false; @@ -255,12 +252,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s } } - ARM64Reg XA = EncodeRegTo64(addr_reg); - bool addr_reg_set = !is_immediate; const auto set_addr_reg_if_needed = [&] { if (!addr_reg_set) - MOVI2R(XA, imm_addr); + MOVI2R(addr_reg, imm_addr); }; const bool early_update = !jo.memcheck && value != static_cast(dest); @@ -271,13 +266,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s MOV(gpr.R(dest), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; - if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(addr_reg)] = true; u32 access_size = BackPatchInfo::GetFlagSize(flags); u32 mmio_address = 0; @@ -313,22 +305,23 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size)) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, addr_reg, scratch_gprs, + scratch_fprs); } else if (mmio_address) { - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; - regs_in_use[DecodeReg(ARM64Reg::W30)] = false; - regs_in_use[DecodeReg(RS)] = false; + scratch_gprs[DecodeReg(addr_reg)] = true; + scratch_gprs[DecodeReg(ARM64Reg::W30)] = true; + scratch_gprs[DecodeReg(RS)] = true; MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, - regs_in_use, fprs_in_use, RS, mmio_address, flags); + gpr.GetCallerSavedUsed() & ~scratch_gprs, + fpr.GetCallerSavedUsed() & ~scratch_fprs, RS, mmio_address, flags); addr_reg_set = false; } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, addr_reg, scratch_gprs, scratch_fprs); } if (update && !early_update) @@ -338,9 +331,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s MOV(gpr.R(dest), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W0); + gpr.Unlock(ARM64Reg::W30); } FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp, @@ -522,28 +513,33 @@ void JitArm64::lmw(UGeckoInstruction inst) u32 a = inst.RA, d = inst.RD; s32 offset = inst.SIMM_16; - gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - if (jo.memcheck || !jo.fastmem) + gpr.Lock(ARM64Reg::W30); + if (jo.memcheck) gpr.Lock(ARM64Reg::W0); // MMU games make use of a >= d despite this being invalid according to the PEM. // If a >= d occurs, we must make sure to not re-read rA after starting doing the loads. - ARM64Reg addr_reg = ARM64Reg::W1; + const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1); + Arm64RegCache::ScopedARM64Reg addr_base_reg; bool a_is_addr_base_reg = false; if (!a) - MOVI2R(addr_reg, offset); - else if (gpr.IsImm(a)) - MOVI2R(addr_reg, gpr.GetImm(a) + offset); - else if (a < d && offset + (31 - d) * 4 < 0x1000) - a_is_addr_base_reg = true; - else - ADDI2R(addr_reg, gpr.R(a), offset, addr_reg); - - Arm64RegCache::ScopedARM64Reg addr_base_reg; - if (!a_is_addr_base_reg) { addr_base_reg = gpr.GetScopedReg(); - MOV(addr_base_reg, addr_reg); + MOVI2R(addr_base_reg, offset); + } + else if (gpr.IsImm(a)) + { + addr_base_reg = gpr.GetScopedReg(); + MOVI2R(addr_base_reg, gpr.GetImm(a) + offset); + } + else if (a < d && offset + (31 - d) * 4 < 0x1000) + { + a_is_addr_base_reg = true; + } + else + { + addr_base_reg = gpr.GetScopedReg(); + ADDI2R(addr_base_reg, gpr.R(a), offset, addr_base_reg); } BitSet32 gprs_to_discard{}; @@ -586,22 +582,32 @@ void JitArm64::lmw(UGeckoInstruction inst) { gpr.BindToRegister(i, false, false); ARM64Reg dest_reg = gpr.R(i); + ARM64Reg current_iteration_addr_reg = addr_reg; if (a_is_addr_base_reg) - ADDI2R(addr_reg, gpr.R(a), offset + (i - d) * 4); - else if (i != d) - ADDI2R(addr_reg, addr_base_reg, (i - d) * 4); + { + const u32 current_iteration_offset = offset + (i - d) * 4; + if (current_iteration_offset != 0) + ADDI2R(addr_reg, gpr.R(a), current_iteration_offset); + else + current_iteration_addr_reg = gpr.R(a); + } + else + { + if (i != d) + ADDI2R(addr_reg, addr_base_reg, (i - d) * 4); + else + current_iteration_addr_reg = addr_base_reg; + } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(addr_reg)] = false; - if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; - if (!jo.memcheck) - regs_in_use[DecodeReg(dest_reg)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(addr_reg)] = true; + if (jo.memcheck) + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, current_iteration_addr_reg, + scratch_gprs, scratch_fprs); gpr.BindToRegister(i, false, true); ASSERT(dest_reg == gpr.R(i)); @@ -629,8 +635,8 @@ void JitArm64::lmw(UGeckoInstruction inst) } } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - if (jo.memcheck || !jo.fastmem) + gpr.Unlock(ARM64Reg::W30); + if (jo.memcheck) gpr.Unlock(ARM64Reg::W0); } @@ -642,26 +648,29 @@ void JitArm64::stmw(UGeckoInstruction inst) u32 a = inst.RA, s = inst.RS; s32 offset = inst.SIMM_16; - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W30); - ARM64Reg addr_reg = ARM64Reg::W2; + const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2); + Arm64RegCache::ScopedARM64Reg addr_base_reg; bool a_is_addr_base_reg = false; if (!a) - MOVI2R(addr_reg, offset); - else if (gpr.IsImm(a)) - MOVI2R(addr_reg, gpr.GetImm(a) + offset); - else if (offset + (31 - s) * 4 < 0x1000) - a_is_addr_base_reg = true; - else - ADDI2R(addr_reg, gpr.R(a), offset, addr_reg); - - Arm64GPRCache::ScopedARM64Reg addr_base_reg; - if (!a_is_addr_base_reg) { addr_base_reg = gpr.GetScopedReg(); - MOV(addr_base_reg, addr_reg); + MOVI2R(addr_base_reg, offset); + } + else if (gpr.IsImm(a)) + { + addr_base_reg = gpr.GetScopedReg(); + MOVI2R(addr_base_reg, gpr.GetImm(a) + offset); + } + else if (offset + (31 - s) * 4 < 0x1000) + { + a_is_addr_base_reg = true; + } + else + { + addr_base_reg = gpr.GetScopedReg(); + ADDI2R(addr_base_reg, gpr.R(a), offset, addr_base_reg); } BitSet32 gprs_to_discard{}; @@ -704,21 +713,30 @@ void JitArm64::stmw(UGeckoInstruction inst) for (u32 i = s; i < 32; i++) { ARM64Reg src_reg = gpr.R(i); + ARM64Reg current_iteration_addr_reg = addr_reg; if (a_is_addr_base_reg) - ADDI2R(addr_reg, gpr.R(a), offset + (i - s) * 4); - else if (i != s) - ADDI2R(addr_reg, addr_base_reg, (i - s) * 4); + { + const u32 current_iteration_offset = offset + (i - s) * 4; + if (current_iteration_offset != 0) + ADDI2R(addr_reg, gpr.R(a), current_iteration_offset); + else + current_iteration_addr_reg = gpr.R(a); + } + else + { + if (i != s) + ADDI2R(addr_reg, addr_base_reg, (i - s) * 4); + else + current_iteration_addr_reg = addr_base_reg; + } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - regs_in_use[DecodeReg(addr_reg)] = false; - if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(addr_reg)] = true; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, current_iteration_addr_reg, + scratch_gprs, scratch_fprs); // To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores // after this instruction, flush registers that would be flushed after this instruction anyway. @@ -750,9 +768,7 @@ void JitArm64::stmw(UGeckoInstruction inst) } } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W0); + gpr.Unlock(ARM64Reg::W30); } void JitArm64::dcbx(UGeckoInstruction inst) @@ -971,17 +987,11 @@ void JitArm64::dcbz(UGeckoInstruction inst) int a = inst.RA, b = inst.RB; - gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W30); - Common::ScopeGuard register_guard([&] { - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W0); - }); + Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W30); }); - constexpr ARM64Reg addr_reg = ARM64Reg::W1; + const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1); constexpr ARM64Reg temp_reg = ARM64Reg::W30; // HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region. @@ -1043,14 +1053,12 @@ void JitArm64::dcbz(UGeckoInstruction inst) } } - BitSet32 gprs_to_push = gpr.GetCallerSavedUsed(); - BitSet32 fprs_to_push = fpr.GetCallerSavedUsed(); - gprs_to_push[DecodeReg(ARM64Reg::W1)] = false; - if (!jo.fastmem) - gprs_to_push[DecodeReg(ARM64Reg::W0)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; + scratch_gprs[DecodeReg(addr_reg)] = true; - EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, - EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push); + EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, addr_reg, + scratch_gprs, scratch_fprs); if (using_dcbz_hack) SetJumpTarget(end_dcbz_hack); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp index e5ded0dde4..18e86a3185 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp @@ -77,13 +77,12 @@ void JitArm64::lfXX(UGeckoInstruction inst) const RegType type = (flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle; - gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Lock(ARM64Reg::Q0); - if (jo.memcheck || !jo.fastmem) + gpr.Lock(ARM64Reg::W30); + if (jo.memcheck) gpr.Lock(ARM64Reg::W0); + const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1); const ARM64Reg VD = fpr.RW(inst.FD, type, false); - ARM64Reg addr_reg = ARM64Reg::W1; if (update) { @@ -152,10 +151,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) } } - ARM64Reg XA = EncodeRegTo64(addr_reg); - if (is_immediate) - MOVI2R(XA, imm_addr); + MOVI2R(addr_reg, imm_addr); const bool early_update = !jo.memcheck; if (update && early_update) @@ -164,23 +161,21 @@ void JitArm64::lfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; - if (jo.memcheck || !jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; - if (!jo.memcheck) - fprs_in_use[DecodeReg(VD)] = false; + scratch_gprs[DecodeReg(addr_reg)] = true; + if (jo.memcheck) + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) { - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, addr_reg, scratch_gprs, + scratch_fprs); } else { - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, addr_reg, scratch_gprs, scratch_fprs); } const ARM64Reg VD_again = fpr.RW(inst.FD, type, true); @@ -192,9 +187,8 @@ void JitArm64::lfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); - if (jo.memcheck || !jo.fastmem) + gpr.Unlock(ARM64Reg::W30); + if (jo.memcheck) gpr.Unlock(ARM64Reg::W0); } @@ -264,8 +258,6 @@ void JitArm64::stfXX(UGeckoInstruction inst) u32 imm_addr = 0; bool is_immediate = false; - fpr.Lock(ARM64Reg::Q0); - const bool have_single = fpr.IsSingle(inst.FS, true); Arm64FPRCache::ScopedARM64Reg V0 = @@ -278,11 +270,9 @@ void JitArm64::stfXX(UGeckoInstruction inst) V0 = std::move(single_reg); } - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!jo.fastmem) - gpr.Lock(ARM64Reg::W0); + gpr.Lock(ARM64Reg::W30); - ARM64Reg addr_reg = ARM64Reg::W2; + const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2); if (update) { @@ -351,12 +341,10 @@ void JitArm64::stfXX(UGeckoInstruction inst) } } - ARM64Reg XA = EncodeRegTo64(addr_reg); - bool addr_reg_set = !is_immediate; const auto set_addr_reg_if_needed = [&] { if (!addr_reg_set) - MOVI2R(XA, imm_addr); + MOVI2R(addr_reg, imm_addr); }; const bool early_update = !jo.memcheck; @@ -367,14 +355,10 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); - regs_in_use[DecodeReg(ARM64Reg::W1)] = false; + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; if (!update || early_update) - regs_in_use[DecodeReg(ARM64Reg::W2)] = false; - if (!jo.fastmem) - regs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; + scratch_gprs[DecodeReg(addr_reg)] = true; if (is_immediate) { @@ -402,20 +386,20 @@ void JitArm64::stfXX(UGeckoInstruction inst) else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags))) { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, addr_reg, scratch_gprs, + scratch_fprs); } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, addr_reg, scratch_gprs, + scratch_fprs); } } else { set_addr_reg_if_needed(); - EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, addr_reg, scratch_gprs, scratch_fprs); } if (update && !early_update) @@ -425,8 +409,5 @@ void JitArm64::stfXX(UGeckoInstruction inst) MOV(gpr.R(a), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); - if (!jo.fastmem) - gpr.Unlock(ARM64Reg::W0); + gpr.Unlock(ARM64Reg::W30); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp index 2d5c5d9355..803d7a5dae 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStorePaired.cpp @@ -38,20 +38,20 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; - gpr.Lock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Lock(ARM64Reg::Q0); + gpr.Lock(ARM64Reg::W30); if (!js.assumeNoPairedQuantize) { - gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); - fpr.Lock(ARM64Reg::Q1); + gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); + fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); } - else if (jo.memcheck || !jo.fastmem) + else if (jo.memcheck) { gpr.Lock(ARM64Reg::W0); } constexpr ARM64Reg type_reg = ARM64Reg::W0; - constexpr ARM64Reg addr_reg = ARM64Reg::W1; + const auto addr_reg = js.assumeNoPairedQuantize ? gpr.GetScopedRegWithPreference(ARM64Reg::W1) : + Arm64RegCache::ScopedARM64Reg(ARM64Reg::W1); constexpr ARM64Reg scale_reg = ARM64Reg::W2; ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false); @@ -79,24 +79,19 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) if (js.assumeNoPairedQuantize) { - BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; - // Wipe the registers we are using as temporaries if (!update || early_update) - gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; - if (jo.memcheck || !jo.fastmem) - gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; - fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false; - if (!jo.memcheck) - fprs_in_use[DecodeReg(VS)] = false; + scratch_gprs[DecodeReg(addr_reg)] = true; + if (jo.memcheck) + scratch_gprs[DecodeReg(ARM64Reg::W0)] = true; u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, addr_reg, scratch_gprs, scratch_fprs); } else { @@ -133,14 +128,13 @@ void JitArm64::psq_lXX(UGeckoInstruction inst) MOV(gpr.R(inst.RA), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); + gpr.Unlock(ARM64Reg::W30); if (!js.assumeNoPairedQuantize) { - gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3); - fpr.Unlock(ARM64Reg::Q1); + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3); + fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1); } - else if (jo.memcheck || !jo.fastmem) + else if (jo.memcheck) { gpr.Unlock(ARM64Reg::W0); } @@ -167,9 +161,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) const int i = indexed ? inst.Ix : inst.I; const int w = indexed ? inst.Wx : inst.W; - fpr.Lock(ARM64Reg::Q0); if (!js.assumeNoPairedQuantize) - fpr.Lock(ARM64Reg::Q1); + fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1); const bool have_single = fpr.IsSingle(inst.RS); @@ -205,15 +198,18 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) } } - gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - if (!js.assumeNoPairedQuantize || !jo.fastmem) - gpr.Lock(ARM64Reg::W0); - if (!js.assumeNoPairedQuantize && !jo.fastmem) - gpr.Lock(ARM64Reg::W3); + gpr.Lock(ARM64Reg::W30); + if (!js.assumeNoPairedQuantize) + { + gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2); + if (!jo.fastmem) + gpr.Lock(ARM64Reg::W3); + } constexpr ARM64Reg type_reg = ARM64Reg::W0; constexpr ARM64Reg scale_reg = ARM64Reg::W1; - constexpr ARM64Reg addr_reg = ARM64Reg::W2; + const auto addr_reg = js.assumeNoPairedQuantize ? gpr.GetScopedRegWithPreference(ARM64Reg::W2) : + Arm64RegCache::ScopedARM64Reg(ARM64Reg::W2); if (inst.RA || update) // Always uses the register on update { @@ -239,22 +235,17 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) if (js.assumeNoPairedQuantize) { - BitSet32 gprs_in_use = gpr.GetCallerSavedUsed(); - BitSet32 fprs_in_use = fpr.GetCallerSavedUsed(); + BitSet32 scratch_gprs; + BitSet32 scratch_fprs; - // Wipe the registers we are using as temporaries - gprs_in_use[DecodeReg(ARM64Reg::W1)] = false; if (!update || early_update) - gprs_in_use[DecodeReg(ARM64Reg::W2)] = false; - if (!jo.fastmem) - gprs_in_use[DecodeReg(ARM64Reg::W0)] = false; + scratch_gprs[DecodeReg(addr_reg)] = true; u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; if (!w) flags |= BackPatchInfo::FLAG_PAIR; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use, - fprs_in_use); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, addr_reg, scratch_gprs, scratch_fprs); } else { @@ -280,12 +271,12 @@ void JitArm64::psq_stXX(UGeckoInstruction inst) MOV(gpr.R(inst.RA), addr_reg); } - gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30); - fpr.Unlock(ARM64Reg::Q0); - if (!js.assumeNoPairedQuantize || !jo.fastmem) - gpr.Unlock(ARM64Reg::W0); - if (!js.assumeNoPairedQuantize && !jo.fastmem) - gpr.Unlock(ARM64Reg::W3); + gpr.Unlock(ARM64Reg::W30); if (!js.assumeNoPairedQuantize) - fpr.Unlock(ARM64Reg::Q1); + { + gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2); + if (!jo.fastmem) + gpr.Unlock(ARM64Reg::W3); + fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1); + } } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index 20a86c0389..f6e7926777 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include "Common/Assert.h" @@ -63,6 +64,30 @@ ARM64Reg Arm64RegCache::GetReg() return ARM64Reg::INVALID_REG; } +ARM64Reg Arm64RegCache::GetRegWithPreference(Arm64Gen::ARM64Reg preferred) +{ + // In practice, the preferred register tends to be towards the end of m_host_registers, + // so we scan through m_host_registers backwards + for (auto& it : m_host_registers | std::views::reverse) + { + if (it.GetReg() == preferred) + { + if (it.IsLocked()) + { + return GetReg(); + } + else + { + it.Lock(); + return it.GetReg(); + } + } + } + ASSERT_MSG(DYNA_REC, false, "Preferred register {:#x} is not in register cache", + static_cast(preferred)); + return ARM64Reg::INVALID_REG; +} + void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used) { for (size_t i = 0; i < m_guest_registers.size(); ++i) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 5164745cd8..dd3822bb58 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -183,13 +183,16 @@ public: // Returns a temporary register for use // Requires unlocking after done Arm64Gen::ARM64Reg GetReg(); + Arm64Gen::ARM64Reg GetRegWithPreference(Arm64Gen::ARM64Reg preferred); class ScopedARM64Reg { public: inline ScopedARM64Reg() = default; ScopedARM64Reg(const ScopedARM64Reg&) = delete; - explicit inline ScopedARM64Reg(Arm64RegCache& cache) : m_reg(cache.GetReg()), m_gpr(&cache) {} + inline ScopedARM64Reg(Arm64RegCache& cache, Arm64Gen::ARM64Reg reg) : m_reg(reg), m_gpr(&cache) + { + } inline ScopedARM64Reg(Arm64Gen::ARM64Reg reg) : m_reg(reg) {} inline ScopedARM64Reg(ScopedARM64Reg&& scoped_reg) { *this = std::move(scoped_reg); } inline ~ScopedARM64Reg() { Unlock(); } @@ -235,7 +238,11 @@ public: // Returns a temporary register // Unlocking is implicitly handled through RAII - inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this); } + inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this, GetReg()); } + inline ScopedARM64Reg GetScopedRegWithPreference(Arm64Gen::ARM64Reg preferred) + { + return ScopedARM64Reg(*this, GetRegWithPreference(preferred)); + } void UpdateLastUsed(BitSet32 regs_used); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 56c26739a3..bb28b9052b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -524,12 +524,12 @@ void JitArm64::GenerateQuantizedLoads() // Q0 is the return // Q1 is a temporary ARM64Reg temp_reg = ARM64Reg::X0; - ARM64Reg addr_reg = ARM64Reg::X1; + ARM64Reg addr_reg = ARM64Reg::W1; ARM64Reg scale_reg = ARM64Reg::X2; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3}; + BitSet32 scratch_gprs{0, 3}; if (!jo.memcheck) - gprs_to_push &= ~BitSet32{1}; - BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; + scratch_gprs[1] = true; + BitSet32 scratch_fprs{0, 1}; ARM64FloatEmitter float_emit(this); const u8* start = GetCodePtr(); @@ -541,7 +541,7 @@ void JitArm64::GenerateQuantizedLoads() BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, - gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); + scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true); RET(ARM64Reg::X30); } @@ -550,8 +550,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -568,8 +568,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -586,8 +586,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -603,8 +603,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -622,7 +622,7 @@ void JitArm64::GenerateQuantizedLoads() BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, - gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true); + scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true); RET(ARM64Reg::X30); } @@ -631,8 +631,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -649,8 +649,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); @@ -667,8 +667,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -684,8 +684,8 @@ void JitArm64::GenerateQuantizedLoads() constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0); float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0); @@ -735,13 +735,13 @@ void JitArm64::GenerateQuantizedStores() // Q1 is a temporary ARM64Reg temp_reg = ARM64Reg::X0; ARM64Reg scale_reg = ARM64Reg::X1; - ARM64Reg addr_reg = ARM64Reg::X2; - BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1}; + ARM64Reg addr_reg = ARM64Reg::W2; + BitSet32 scratch_gprs{0, 1}; if (!jo.memcheck) - gprs_to_push &= ~BitSet32{2}; + scratch_gprs[2] = true; if (!jo.fastmem) - gprs_to_push &= ~BitSet32{3}; - BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1}; + scratch_gprs[3] = true; + BitSet32 scratch_fprs{0, 1}; ARM64FloatEmitter float_emit(this); const u8* start = GetCodePtr(); @@ -752,8 +752,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -771,8 +771,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -790,8 +790,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -808,8 +808,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -826,8 +826,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -837,8 +837,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -856,8 +856,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -875,8 +875,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -893,8 +893,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); } @@ -911,8 +911,8 @@ void JitArm64::GenerateQuantizedStores() constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push, - fprs_to_push, true); + EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs, + scratch_fprs, true); RET(ARM64Reg::X30); }