diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 67ef2ff621..fe30f36dbc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -250,18 +250,15 @@ protected: // This is the core routine for accessing emulated memory, with support for // many different kinds of loads and stores as well as fastmem/backpatching. // - // Registers used: + // The addr parameter can be any register, but the code emitted for slow accesses + // will be slightly more efficient if the addr parameter is as follows: // - // addr // Store: W2 // Load: W1 // Zero 256: W1 // Store float: W2 // Load float: W1 // - // If mode == AlwaysFastAccess, the addr argument can be any register. - // Otherwise it must be the register listed in the table above. - // // This routine allocates most scratch registers dynamically, but in the following // situations, specific scratch registers have to be allocated in advance: // diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp index 02299e0e2a..3c11e83caa 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -317,9 +317,18 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if ((gprs_to_push & gprs_to_push_early).Count() & 1) gprs_to_push_early[30] = true; + // This temp GPR is only used when GPRs have been pushed, so we can choose almost any register + ARM64Reg temp_gpr_for_function_call = ARM64Reg::W8; + while (temp_gpr_for_function_call == addr || + (temp_gpr_for_function_call == RS && (flags & BackPatchInfo::FLAG_STORE))) + { + temp_gpr_for_function_call = + static_cast(static_cast(temp_gpr_for_function_call) + 1); + } + ABI_PushRegisters(gprs_to_push & gprs_to_push_early); ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early); - m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30); + m_float_emit.ABI_PushRegisters(fprs_to_push, EncodeRegTo64(temp_gpr_for_function_call)); // PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe // interrupt checks, and printing accurate PC locations in debug logs. @@ -328,14 +337,23 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, // so the caller has to store the PC themselves. if (!emitting_routine) { - MOVI2R(ARM64Reg::W30, js.compilerPC); - STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc)); + MOVI2R(temp_gpr_for_function_call, js.compilerPC); + STR(IndexType::Unsigned, temp_gpr_for_function_call, PPC_REG, PPCSTATE_OFF(pc)); } if (flags & BackPatchInfo::FLAG_STORE) { ARM64Reg src_reg = RS; const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X1 : ARM64Reg::W1; + ARM64Reg temp_addr_reg = addr; + if (addr == ARM64Reg::W1) + { + // If addr is W1, we must move the address to a different register so we don't + // overwrite it when moving RS to W1. W2 is the optimal register to move to, + // because that's the register the address needs to be in for the function call. + temp_addr_reg = RS != ARM64Reg::W2 ? ARM64Reg::W2 : temp_gpr_for_function_call; + MOV(temp_addr_reg, addr); + } if (flags & BackPatchInfo::FLAG_FLOAT) { @@ -359,40 +377,40 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, if (access_size == 64) { ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit, - &m_mmu, src_reg, ARM64Reg::W2); + &m_mmu, src_reg, temp_addr_reg); } else if (access_size == 32) { ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit, - &m_mmu, src_reg, ARM64Reg::W2); + &m_mmu, src_reg, temp_addr_reg); } else if (access_size == 16) { ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit, - &m_mmu, src_reg, ARM64Reg::W2); + &m_mmu, src_reg, temp_addr_reg); } else { - ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, ARM64Reg::W2); + ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, addr); } } else if (flags & BackPatchInfo::FLAG_ZERO_256) { - ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, addr); } else { if (access_size == 64) - ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, addr); else if (access_size == 32) - ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, addr); else if (access_size == 16) - ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, addr); else - ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, ARM64Reg::W1); + ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, addr); } - m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30); + m_float_emit.ABI_PopRegisters(fprs_to_push, EncodeRegTo64(temp_gpr_for_function_call)); ABI_PopRegisters(gprs_to_push & ~gprs_to_push_early); if (memcheck)