This commit is contained in:
JosJuice 2025-08-10 08:38:49 +01:00 committed by GitHub
commit 7edf25ba17
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 486 additions and 317 deletions

View file

@ -250,34 +250,42 @@ protected:
// This is the core routine for accessing emulated memory, with support for
// many different kinds of loads and stores as well as fastmem/backpatching.
//
// Registers used:
// The addr parameter can be any register, but the code emitted for slow accesses
// will be slightly more efficient if the addr parameter is as follows:
//
// addr scratch
// Store: X2 X1
// Load: X1
// Zero 256: X1 X30
// Store float: X2 Q0
// Load float: X1
// Store: W2
// Load: W1
// Zero 256: W1
// Store float: W2
// Load float: W1
//
// If mode == AlwaysFastAccess, the addr argument can be any register.
// Otherwise it must be the register listed in the table above.
// This routine allocates most scratch registers dynamically, but in the following
// situations, specific scratch registers have to be allocated in advance:
//
// Additional scratch registers are used in the following situations:
// emitting_routine && mode == Auto: X0
// emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X1
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X0
// emitting_routine && mode != AlwaysSlowAccess &&
// (flags & BackPatchInfo::FLAG_STORE) && !(flags & BackPatchInfo::FLAG_FLOAT): X1
// emitting_routine && mode != AlwaysSlowAccess &&
// (flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT): Q0
// emitting_routine && mode != AlwaysSlowAccess &&
// (flags & BackPatchInfo::FLAG_ZERO_256): X30
// !emitting_routine && mode == Auto && jo.fastmem: X30
//
// emitting_routine && mode == Auto: X0
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
// mode != AlwaysSlowAccess && !jo.fastmem: X0
// If there are any other registers that the caller doesn't mind being overwritten,
// these can be indicated in scratch_gprs and scratch_fprs.
//
// In the following situations, certain host registers must not contain guest registers:
//
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck: X30
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck &&
// (flags & BackPatchInfo::FLAG_LOAD): X0
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
// !emitting_routine && mode == Auto && jo.fastmem: X30
//
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
// may be clobbered if mode != AlwaysFastAccess.
// (flags & BackPatchInfo::FLAG_LOAD): X0
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0),
BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false);
// Loadstore routines
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);

View file

@ -54,7 +54,7 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
}
void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr,
BitSet32 gprs_to_push, BitSet32 fprs_to_push,
BitSet32 scratch_gprs, BitSet32 scratch_fprs,
bool emitting_routine)
{
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
@ -65,6 +65,148 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess;
const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess;
const bool memcheck = jo.memcheck && !emitting_routine;
if ((flags & BackPatchInfo::FLAG_LOAD))
{
if ((flags & BackPatchInfo::FLAG_FLOAT))
scratch_fprs[DecodeReg(RS)] = !memcheck;
else
scratch_gprs[DecodeReg(RS)] = !memcheck;
}
BitSet32 temp_gpr_candidates = scratch_gprs;
BitSet32 temp_fpr_candidates = scratch_fprs;
temp_gpr_candidates[DecodeReg(addr)] = false;
if (flags & BackPatchInfo::FLAG_FLOAT)
temp_fpr_candidates[DecodeReg(RS)] = false;
else if (!(flags & BackPatchInfo::FLAG_ZERO_256))
temp_gpr_candidates[DecodeReg(RS)] = false;
if (!emitting_routine && mode == MemAccessMode::Auto && jo.fastmem)
temp_gpr_candidates[30] = true;
const auto allocate_temp_reg = [this](Arm64RegCache& reg_cache,
BitSet32& candidates) -> Arm64RegCache::ScopedARM64Reg {
for (int i : candidates)
{
candidates[i] = false;
ARM64Reg reg = ARM64Reg(i);
if (&reg_cache == &fpr)
reg = EncodeRegToQuad(reg);
return reg;
}
return reg_cache.GetScopedReg();
};
const auto can_allocate_temp_reg_for_free = [](Arm64RegCache& reg_cache, BitSet32& candidates) {
return candidates != BitSet32{} || reg_cache.GetUnlockedRegisterCount() > 0;
};
Arm64RegCache::ScopedARM64Reg temp_gpr_1;
Arm64RegCache::ScopedARM64Reg temp_gpr_2;
Arm64RegCache::ScopedARM64Reg temp_gpr_3;
Arm64RegCache::ScopedARM64Reg temp_fpr_1;
if (emit_fast_access)
{
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
{
temp_fpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::Q0) :
allocate_temp_reg(fpr, temp_fpr_candidates);
scratch_fprs[DecodeReg(temp_fpr_1)] = true;
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
temp_gpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W1) :
allocate_temp_reg(gpr, temp_gpr_candidates);
scratch_gprs[DecodeReg(temp_gpr_1)] = true;
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
temp_gpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W30) :
allocate_temp_reg(gpr, temp_gpr_candidates);
scratch_gprs[DecodeReg(temp_gpr_1)] = true;
}
if (!jo.fastmem)
{
temp_gpr_2 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W0) :
allocate_temp_reg(gpr, temp_gpr_candidates);
temp_gpr_3 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W3) :
allocate_temp_reg(gpr, temp_gpr_candidates);
scratch_gprs[DecodeReg(temp_gpr_2)] = true;
scratch_gprs[DecodeReg(temp_gpr_3)] = true;
}
else if (emit_slow_access && emitting_routine)
{
temp_gpr_2 = ARM64Reg::W0;
temp_gpr_3 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3;
scratch_gprs[DecodeReg(temp_gpr_2)] = true;
scratch_gprs[DecodeReg(temp_gpr_3)] = true;
}
}
// Setting memcheck_temp_gpr to W30 works, but because W30 is a register that needs to be pushed
// and popped, using W30 may require us to emit an extra push and pop instruction, depending on
// what other registers need pushing and popping. If we can find another register to use without
// having to evict anything from the register cache, let's do that instead of using W30.
ARM64Reg memcheck_temp_gpr = ARM64Reg::W30;
if (emit_slow_access && memcheck)
{
const auto is_suitable_as_memcheck_temp_gpr = [flags](ARM64Reg reg) {
return reg != ARM64Reg::INVALID_REG && reg != ARM64Reg::W30 &&
(reg != ARM64Reg::W0 || !(flags & BackPatchInfo::FLAG_LOAD));
};
const auto get_unset_temp_gpr = [&]() -> Arm64RegCache::ScopedARM64Reg& {
if (temp_gpr_1 == ARM64Reg::INVALID_REG)
return temp_gpr_1;
if (temp_gpr_2 == ARM64Reg::INVALID_REG)
return temp_gpr_2;
ASSERT(temp_gpr_3 == ARM64Reg::INVALID_REG);
return temp_gpr_3;
};
if (is_suitable_as_memcheck_temp_gpr(temp_gpr_1))
{
memcheck_temp_gpr = temp_gpr_1;
}
else if (is_suitable_as_memcheck_temp_gpr(temp_gpr_2))
{
memcheck_temp_gpr = temp_gpr_2;
}
else if (is_suitable_as_memcheck_temp_gpr(temp_gpr_3))
{
memcheck_temp_gpr = temp_gpr_3;
}
else
{
while (can_allocate_temp_reg_for_free(gpr, temp_gpr_candidates))
{
Arm64RegCache::ScopedARM64Reg& temp_gpr_x = get_unset_temp_gpr();
temp_gpr_x = allocate_temp_reg(gpr, temp_gpr_candidates);
scratch_gprs[DecodeReg(temp_gpr_x)] = true;
if (is_suitable_as_memcheck_temp_gpr(temp_gpr_x))
break;
}
}
if (temp_fpr_1 == ARM64Reg::INVALID_REG &&
can_allocate_temp_reg_for_free(fpr, temp_fpr_candidates))
{
temp_fpr_1 = allocate_temp_reg(fpr, temp_fpr_candidates);
scratch_fprs[DecodeReg(temp_fpr_1)] = true;
}
}
BitSet32 gprs_to_push =
(emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs;
BitSet32 fprs_to_push =
(emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs;
if (!emitting_routine && mode == MemAccessMode::Auto && jo.fastmem)
gprs_to_push[30] = true;
bool in_far_code = false;
const u8* fast_access_start = GetCodePtr();
std::optional<FixupBranch> slow_access_fixup;
@ -76,13 +218,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (!jo.fastmem)
{
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
memory_base = EncodeRegTo64(temp_gpr_3);
memory_offset = temp_gpr_2;
memory_base = EncodeRegTo64(temp);
memory_offset = ARM64Reg::W0;
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp, true));
LSR(temp_gpr_3, addr, PowerPC::BAT_INDEX_SHIFT);
LDR(memory_base, MEM_REG, ArithOption(temp_gpr_3, true));
if (emit_slow_access)
{
@ -95,15 +235,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
}
else if (emit_slow_access && emitting_routine)
{
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3;
const ARM64Reg temp2 = ARM64Reg::W0;
slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2);
slow_access_fixup = CheckIfSafeAddress(addr, temp_gpr_3, temp_gpr_2);
}
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
{
ARM64Reg temp = ARM64Reg::D0;
ARM64Reg temp = EncodeRegToDouble(temp_fpr_1);
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
m_float_emit.STR(access_size, temp, memory_base, memory_offset);
@ -117,7 +254,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
}
else if (flags & BackPatchInfo::FLAG_STORE)
{
ARM64Reg temp = ARM64Reg::W1;
ARM64Reg temp = temp_gpr_1;
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
if (flags & BackPatchInfo::FLAG_SIZE_32)
@ -130,7 +267,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
// This literally only stores 32bytes of zeros to the target address
ARM64Reg temp = ARM64Reg::X30;
ARM64Reg temp = EncodeRegTo64(temp_gpr_1);
ADD(temp, memory_base, memory_offset);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
@ -151,8 +288,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (emit_slow_access)
{
const bool memcheck = jo.memcheck && !emitting_routine;
if (emit_fast_access)
{
in_far_code = true;
@ -169,12 +304,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (slow_access_fixup)
SetJumpTarget(*slow_access_fixup);
const ARM64Reg temp_gpr = ARM64Reg::W1;
const int temp_gpr_index = DecodeReg(temp_gpr);
BitSet32 gprs_to_push_early = {};
if (memcheck)
gprs_to_push_early[temp_gpr_index] = true;
gprs_to_push_early[DecodeReg(memcheck_temp_gpr)] = true;
if (flags & BackPatchInfo::FLAG_LOAD)
gprs_to_push_early[0] = true;
@ -185,9 +317,18 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if ((gprs_to_push & gprs_to_push_early).Count() & 1)
gprs_to_push_early[30] = true;
// This temp GPR is only used when GPRs have been pushed, so we can choose almost any register
ARM64Reg temp_gpr_for_function_call = ARM64Reg::W8;
while (temp_gpr_for_function_call == addr ||
(temp_gpr_for_function_call == RS && (flags & BackPatchInfo::FLAG_STORE)))
{
temp_gpr_for_function_call =
static_cast<ARM64Reg>(static_cast<int>(temp_gpr_for_function_call) + 1);
}
ABI_PushRegisters(gprs_to_push & gprs_to_push_early);
ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early);
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
m_float_emit.ABI_PushRegisters(fprs_to_push, EncodeRegTo64(temp_gpr_for_function_call));
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
// interrupt checks, and printing accurate PC locations in debug logs.
@ -196,14 +337,23 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
// so the caller has to store the PC themselves.
if (!emitting_routine)
{
MOVI2R(ARM64Reg::W30, js.compilerPC);
STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc));
MOVI2R(temp_gpr_for_function_call, js.compilerPC);
STR(IndexType::Unsigned, temp_gpr_for_function_call, PPC_REG, PPCSTATE_OFF(pc));
}
if (flags & BackPatchInfo::FLAG_STORE)
{
ARM64Reg src_reg = RS;
const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X1 : ARM64Reg::W1;
ARM64Reg temp_addr_reg = addr;
if (addr == ARM64Reg::W1)
{
// If addr is W1, we must move the address to a different register so we don't
// overwrite it when moving RS to W1. W2 is the optimal register to move to,
// because that's the register the address needs to be in for the function call.
temp_addr_reg = RS != ARM64Reg::W2 ? ARM64Reg::W2 : temp_gpr_for_function_call;
MOV(temp_addr_reg, addr);
}
if (flags & BackPatchInfo::FLAG_FLOAT)
{
@ -227,49 +377,48 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
if (access_size == 64)
{
ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit,
&m_mmu, src_reg, ARM64Reg::W2);
&m_mmu, src_reg, temp_addr_reg);
}
else if (access_size == 32)
{
ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit,
&m_mmu, src_reg, ARM64Reg::W2);
&m_mmu, src_reg, temp_addr_reg);
}
else if (access_size == 16)
{
ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit,
&m_mmu, src_reg, ARM64Reg::W2);
&m_mmu, src_reg, temp_addr_reg);
}
else
{
ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, ARM64Reg::W2);
ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, addr);
}
}
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1);
ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, addr);
}
else
{
if (access_size == 64)
ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, ARM64Reg::W1);
ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, addr);
else if (access_size == 32)
ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, ARM64Reg::W1);
ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, addr);
else if (access_size == 16)
ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, ARM64Reg::W1);
ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, addr);
else
ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, ARM64Reg::W1);
ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, addr);
}
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
m_float_emit.ABI_PopRegisters(fprs_to_push, EncodeRegTo64(temp_gpr_for_function_call));
ABI_PopRegisters(gprs_to_push & ~gprs_to_push_early);
if (memcheck)
{
const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0;
const u64 early_push_count = (gprs_to_push & gprs_to_push_early).Count();
const u64 early_push_size = Common::AlignUp(early_push_count, 2) * 8;
WriteConditionalExceptionExit(EXCEPTION_DSI, temp_gpr, temp_fpr, early_push_size);
WriteConditionalExceptionExit(EXCEPTION_DSI, memcheck_temp_gpr, temp_fpr_1, early_push_size);
}
if (flags & BackPatchInfo::FLAG_LOAD)

View file

@ -30,10 +30,12 @@ using namespace Arm64Gen;
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
ARM64Reg dest_reg = gpr.R(dest);
ARM64Reg up_reg = ARM64Reg::INVALID_REG;
@ -45,7 +47,6 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (offsetReg != -1 && !gpr.IsImm(offsetReg))
off_reg = gpr.R(offsetReg);
ARM64Reg addr_reg = ARM64Reg::W1;
u32 imm_addr = 0;
bool is_immediate = false;
@ -107,12 +108,10 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
bool addr_reg_set = !is_immediate;
const auto set_addr_reg_if_needed = [&] {
if (!addr_reg_set)
MOVI2R(XA, imm_addr);
MOVI2R(addr_reg, imm_addr);
};
const bool early_update = !jo.memcheck && dest != static_cast<u32>(addr);
@ -123,14 +122,12 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOV(gpr.R(addr), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = false;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -140,22 +137,24 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, addr_reg, scratch_gprs,
scratch_fprs);
}
else if (mmio_address)
{
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
regs_in_use[DecodeReg(ARM64Reg::W30)] = false;
regs_in_use[DecodeReg(dest_reg)] = false;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use,
fprs_in_use, dest_reg, mmio_address, flags);
scratch_gprs[DecodeReg(addr_reg)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
scratch_gprs[DecodeReg(dest_reg)] = true;
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
gpr.GetCallerSavedUsed() & ~scratch_gprs,
fpr.GetCallerSavedUsed() & ~scratch_fprs, dest_reg, mmio_address, flags);
addr_reg_set = false;
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, addr_reg, scratch_gprs,
scratch_fprs);
}
gpr.BindToRegister(dest, false, true);
@ -168,8 +167,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
MOV(gpr.R(addr), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
@ -177,9 +176,9 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W0);
gpr.Lock(ARM64Reg::W30);
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);
// Don't materialize zero.
ARM64Reg RS = gpr.IsImm(value, 0) ? ARM64Reg::WZR : gpr.R(value);
@ -192,8 +191,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
if (dest != -1 && !gpr.IsImm(dest))
reg_dest = gpr.R(dest);
ARM64Reg addr_reg = ARM64Reg::W2;
u32 imm_addr = 0;
bool is_immediate = false;
@ -255,12 +252,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
bool addr_reg_set = !is_immediate;
const auto set_addr_reg_if_needed = [&] {
if (!addr_reg_set)
MOVI2R(XA, imm_addr);
MOVI2R(addr_reg, imm_addr);
};
const bool early_update = !jo.memcheck && value != static_cast<u32>(dest);
@ -271,13 +266,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W2)] = false;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
scratch_gprs[DecodeReg(addr_reg)] = true;
u32 access_size = BackPatchInfo::GetFlagSize(flags);
u32 mmio_address = 0;
@ -313,22 +305,23 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, addr_reg, scratch_gprs,
scratch_fprs);
}
else if (mmio_address)
{
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
regs_in_use[DecodeReg(ARM64Reg::W2)] = false;
regs_in_use[DecodeReg(ARM64Reg::W30)] = false;
regs_in_use[DecodeReg(RS)] = false;
scratch_gprs[DecodeReg(addr_reg)] = true;
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
scratch_gprs[DecodeReg(RS)] = true;
MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
regs_in_use, fprs_in_use, RS, mmio_address, flags);
gpr.GetCallerSavedUsed() & ~scratch_gprs,
fpr.GetCallerSavedUsed() & ~scratch_fprs, RS, mmio_address, flags);
addr_reg_set = false;
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, addr_reg, scratch_gprs, scratch_fprs);
}
if (update && !early_update)
@ -338,9 +331,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
MOV(gpr.R(dest), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
gpr.Unlock(ARM64Reg::W30);
}
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
@ -522,28 +513,33 @@ void JitArm64::lmw(UGeckoInstruction inst)
u32 a = inst.RA, d = inst.RD;
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);
// MMU games make use of a >= d despite this being invalid according to the PEM.
// If a >= d occurs, we must make sure to not re-read rA after starting doing the loads.
ARM64Reg addr_reg = ARM64Reg::W1;
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
Arm64RegCache::ScopedARM64Reg addr_base_reg;
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
else if (gpr.IsImm(a))
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
else if (a < d && offset + (31 - d) * 4 < 0x1000)
a_is_addr_base_reg = true;
else
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
Arm64RegCache::ScopedARM64Reg addr_base_reg;
if (!a_is_addr_base_reg)
{
addr_base_reg = gpr.GetScopedReg();
MOV(addr_base_reg, addr_reg);
MOVI2R(addr_base_reg, offset);
}
else if (gpr.IsImm(a))
{
addr_base_reg = gpr.GetScopedReg();
MOVI2R(addr_base_reg, gpr.GetImm(a) + offset);
}
else if (a < d && offset + (31 - d) * 4 < 0x1000)
{
a_is_addr_base_reg = true;
}
else
{
addr_base_reg = gpr.GetScopedReg();
ADDI2R(addr_base_reg, gpr.R(a), offset, addr_base_reg);
}
BitSet32 gprs_to_discard{};
@ -586,22 +582,32 @@ void JitArm64::lmw(UGeckoInstruction inst)
{
gpr.BindToRegister(i, false, false);
ARM64Reg dest_reg = gpr.R(i);
ARM64Reg current_iteration_addr_reg = addr_reg;
if (a_is_addr_base_reg)
ADDI2R(addr_reg, gpr.R(a), offset + (i - d) * 4);
else if (i != d)
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);
{
const u32 current_iteration_offset = offset + (i - d) * 4;
if (current_iteration_offset != 0)
ADDI2R(addr_reg, gpr.R(a), current_iteration_offset);
else
current_iteration_addr_reg = gpr.R(a);
}
else
{
if (i != d)
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);
else
current_iteration_addr_reg = addr_base_reg;
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(addr_reg)] = false;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
if (!jo.memcheck)
regs_in_use[DecodeReg(dest_reg)] = false;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, current_iteration_addr_reg,
scratch_gprs, scratch_fprs);
gpr.BindToRegister(i, false, true);
ASSERT(dest_reg == gpr.R(i));
@ -629,8 +635,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
}
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
@ -642,26 +648,29 @@ void JitArm64::stmw(UGeckoInstruction inst)
u32 a = inst.RA, s = inst.RS;
s32 offset = inst.SIMM_16;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W0);
gpr.Lock(ARM64Reg::W30);
ARM64Reg addr_reg = ARM64Reg::W2;
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);
Arm64RegCache::ScopedARM64Reg addr_base_reg;
bool a_is_addr_base_reg = false;
if (!a)
MOVI2R(addr_reg, offset);
else if (gpr.IsImm(a))
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
else if (offset + (31 - s) * 4 < 0x1000)
a_is_addr_base_reg = true;
else
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
Arm64GPRCache::ScopedARM64Reg addr_base_reg;
if (!a_is_addr_base_reg)
{
addr_base_reg = gpr.GetScopedReg();
MOV(addr_base_reg, addr_reg);
MOVI2R(addr_base_reg, offset);
}
else if (gpr.IsImm(a))
{
addr_base_reg = gpr.GetScopedReg();
MOVI2R(addr_base_reg, gpr.GetImm(a) + offset);
}
else if (offset + (31 - s) * 4 < 0x1000)
{
a_is_addr_base_reg = true;
}
else
{
addr_base_reg = gpr.GetScopedReg();
ADDI2R(addr_base_reg, gpr.R(a), offset, addr_base_reg);
}
BitSet32 gprs_to_discard{};
@ -704,21 +713,30 @@ void JitArm64::stmw(UGeckoInstruction inst)
for (u32 i = s; i < 32; i++)
{
ARM64Reg src_reg = gpr.R(i);
ARM64Reg current_iteration_addr_reg = addr_reg;
if (a_is_addr_base_reg)
ADDI2R(addr_reg, gpr.R(a), offset + (i - s) * 4);
else if (i != s)
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);
{
const u32 current_iteration_offset = offset + (i - s) * 4;
if (current_iteration_offset != 0)
ADDI2R(addr_reg, gpr.R(a), current_iteration_offset);
else
current_iteration_addr_reg = gpr.R(a);
}
else
{
if (i != s)
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);
else
current_iteration_addr_reg = addr_base_reg;
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
regs_in_use[DecodeReg(addr_reg)] = false;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(addr_reg)] = true;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, current_iteration_addr_reg,
scratch_gprs, scratch_fprs);
// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
// after this instruction, flush registers that would be flushed after this instruction anyway.
@ -750,9 +768,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
}
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
gpr.Unlock(ARM64Reg::W30);
}
void JitArm64::dcbx(UGeckoInstruction inst)
@ -971,17 +987,11 @@ void JitArm64::dcbz(UGeckoInstruction inst)
int a = inst.RA, b = inst.RB;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W0);
gpr.Lock(ARM64Reg::W30);
Common::ScopeGuard register_guard([&] {
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
});
Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W30); });
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
// HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region.
@ -1043,14 +1053,12 @@ void JitArm64::dcbz(UGeckoInstruction inst)
}
}
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[DecodeReg(ARM64Reg::W1)] = false;
if (!jo.fastmem)
gprs_to_push[DecodeReg(ARM64Reg::W0)] = false;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
scratch_gprs[DecodeReg(addr_reg)] = true;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1,
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, addr_reg,
scratch_gprs, scratch_fprs);
if (using_dcbz_hack)
SetJumpTarget(end_dcbz_hack);

View file

@ -77,13 +77,12 @@ void JitArm64::lfXX(UGeckoInstruction inst)
const RegType type =
(flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
if (jo.memcheck || !jo.fastmem)
gpr.Lock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Lock(ARM64Reg::W0);
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
ARM64Reg addr_reg = ARM64Reg::W1;
if (update)
{
@ -152,10 +151,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
MOVI2R(addr_reg, imm_addr);
const bool early_update = !jo.memcheck;
if (update && early_update)
@ -164,23 +161,21 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (jo.memcheck || !jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VD)] = false;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
{
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, addr_reg, scratch_gprs,
scratch_fprs);
}
else
{
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, addr_reg, scratch_gprs, scratch_fprs);
}
const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
@ -192,9 +187,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (jo.memcheck || !jo.fastmem)
gpr.Unlock(ARM64Reg::W30);
if (jo.memcheck)
gpr.Unlock(ARM64Reg::W0);
}
@ -264,8 +258,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
u32 imm_addr = 0;
bool is_immediate = false;
fpr.Lock(ARM64Reg::Q0);
const bool have_single = fpr.IsSingle(inst.FS, true);
Arm64FPRCache::ScopedARM64Reg V0 =
@ -278,11 +270,9 @@ void JitArm64::stfXX(UGeckoInstruction inst)
V0 = std::move(single_reg);
}
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W0);
gpr.Lock(ARM64Reg::W30);
ARM64Reg addr_reg = ARM64Reg::W2;
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);
if (update)
{
@ -351,12 +341,10 @@ void JitArm64::stfXX(UGeckoInstruction inst)
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
bool addr_reg_set = !is_immediate;
const auto set_addr_reg_if_needed = [&] {
if (!addr_reg_set)
MOVI2R(XA, imm_addr);
MOVI2R(addr_reg, imm_addr);
};
const bool early_update = !jo.memcheck;
@ -367,14 +355,10 @@ void JitArm64::stfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
if (!update || early_update)
regs_in_use[DecodeReg(ARM64Reg::W2)] = false;
if (!jo.fastmem)
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (is_immediate)
{
@ -402,20 +386,20 @@ void JitArm64::stfXX(UGeckoInstruction inst)
else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, addr_reg, scratch_gprs,
scratch_fprs);
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, addr_reg, scratch_gprs,
scratch_fprs);
}
}
else
{
set_addr_reg_if_needed();
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, addr_reg, scratch_gprs, scratch_fprs);
}
if (update && !early_update)
@ -425,8 +409,5 @@ void JitArm64::stfXX(UGeckoInstruction inst)
MOV(gpr.R(a), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
gpr.Unlock(ARM64Reg::W30);
}

View file

@ -38,20 +38,20 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W;
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Lock(ARM64Reg::Q0);
gpr.Lock(ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q1);
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
}
else if (jo.memcheck || !jo.fastmem)
else if (jo.memcheck)
{
gpr.Lock(ARM64Reg::W0);
}
constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
const auto addr_reg = js.assumeNoPairedQuantize ? gpr.GetScopedRegWithPreference(ARM64Reg::W1) :
Arm64RegCache::ScopedARM64Reg(ARM64Reg::W1);
constexpr ARM64Reg scale_reg = ARM64Reg::W2;
ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false);
@ -79,24 +79,19 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize)
{
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
// Wipe the registers we are using as temporaries
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (jo.memcheck || !jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
if (!jo.memcheck)
fprs_in_use[DecodeReg(VS)] = false;
scratch_gprs[DecodeReg(addr_reg)] = true;
if (jo.memcheck)
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
flags |= BackPatchInfo::FLAG_PAIR;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, addr_reg, scratch_gprs, scratch_fprs);
}
else
{
@ -133,14 +128,13 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
gpr.Unlock(ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q1);
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}
else if (jo.memcheck || !jo.fastmem)
else if (jo.memcheck)
{
gpr.Unlock(ARM64Reg::W0);
}
@ -167,9 +161,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
const int i = indexed ? inst.Ix : inst.I;
const int w = indexed ? inst.Wx : inst.W;
fpr.Lock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize)
fpr.Lock(ARM64Reg::Q1);
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
const bool have_single = fpr.IsSingle(inst.RS);
@ -205,15 +198,18 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
}
}
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Lock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Lock(ARM64Reg::W3);
gpr.Lock(ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
{
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2);
if (!jo.fastmem)
gpr.Lock(ARM64Reg::W3);
}
constexpr ARM64Reg type_reg = ARM64Reg::W0;
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
constexpr ARM64Reg addr_reg = ARM64Reg::W2;
const auto addr_reg = js.assumeNoPairedQuantize ? gpr.GetScopedRegWithPreference(ARM64Reg::W2) :
Arm64RegCache::ScopedARM64Reg(ARM64Reg::W2);
if (inst.RA || update) // Always uses the register on update
{
@ -239,22 +235,17 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
if (js.assumeNoPairedQuantize)
{
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
BitSet32 scratch_gprs;
BitSet32 scratch_fprs;
// Wipe the registers we are using as temporaries
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
if (!update || early_update)
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
if (!jo.fastmem)
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
scratch_gprs[DecodeReg(addr_reg)] = true;
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
if (!w)
flags |= BackPatchInfo::FLAG_PAIR;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
fprs_in_use);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, addr_reg, scratch_gprs, scratch_fprs);
}
else
{
@ -280,12 +271,12 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
MOV(gpr.R(inst.RA), addr_reg);
}
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
fpr.Unlock(ARM64Reg::Q0);
if (!js.assumeNoPairedQuantize || !jo.fastmem)
gpr.Unlock(ARM64Reg::W0);
if (!js.assumeNoPairedQuantize && !jo.fastmem)
gpr.Unlock(ARM64Reg::W3);
gpr.Unlock(ARM64Reg::W30);
if (!js.assumeNoPairedQuantize)
fpr.Unlock(ARM64Reg::Q1);
{
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2);
if (!jo.fastmem)
gpr.Unlock(ARM64Reg::W3);
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
}
}

View file

@ -5,6 +5,7 @@
#include <algorithm>
#include <cstddef>
#include <ranges>
#include <vector>
#include "Common/Assert.h"
@ -63,6 +64,30 @@ ARM64Reg Arm64RegCache::GetReg()
return ARM64Reg::INVALID_REG;
}
ARM64Reg Arm64RegCache::GetRegWithPreference(Arm64Gen::ARM64Reg preferred)
{
// In practice, the preferred register tends to be towards the end of m_host_registers,
// so we scan through m_host_registers backwards
for (auto& it : m_host_registers | std::views::reverse)
{
if (it.GetReg() == preferred)
{
if (it.IsLocked())
{
return GetReg();
}
else
{
it.Lock();
return it.GetReg();
}
}
}
ASSERT_MSG(DYNA_REC, false, "Preferred register {:#x} is not in register cache",
static_cast<int>(preferred));
return ARM64Reg::INVALID_REG;
}
void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
{
for (size_t i = 0; i < m_guest_registers.size(); ++i)

View file

@ -183,13 +183,16 @@ public:
// Returns a temporary register for use
// Requires unlocking after done
Arm64Gen::ARM64Reg GetReg();
Arm64Gen::ARM64Reg GetRegWithPreference(Arm64Gen::ARM64Reg preferred);
class ScopedARM64Reg
{
public:
inline ScopedARM64Reg() = default;
ScopedARM64Reg(const ScopedARM64Reg&) = delete;
explicit inline ScopedARM64Reg(Arm64RegCache& cache) : m_reg(cache.GetReg()), m_gpr(&cache) {}
inline ScopedARM64Reg(Arm64RegCache& cache, Arm64Gen::ARM64Reg reg) : m_reg(reg), m_gpr(&cache)
{
}
inline ScopedARM64Reg(Arm64Gen::ARM64Reg reg) : m_reg(reg) {}
inline ScopedARM64Reg(ScopedARM64Reg&& scoped_reg) { *this = std::move(scoped_reg); }
inline ~ScopedARM64Reg() { Unlock(); }
@ -235,7 +238,11 @@ public:
// Returns a temporary register
// Unlocking is implicitly handled through RAII
inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this); }
inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this, GetReg()); }
inline ScopedARM64Reg GetScopedRegWithPreference(Arm64Gen::ARM64Reg preferred)
{
return ScopedARM64Reg(*this, GetRegWithPreference(preferred));
}
void UpdateLastUsed(BitSet32 regs_used);

View file

@ -524,12 +524,12 @@ void JitArm64::GenerateQuantizedLoads()
// Q0 is the return
// Q1 is a temporary
ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg addr_reg = ARM64Reg::X1;
ARM64Reg addr_reg = ARM64Reg::W1;
ARM64Reg scale_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3};
BitSet32 scratch_gprs{0, 3};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{1};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
scratch_gprs[1] = true;
BitSet32 scratch_fprs{0, 1};
ARM64FloatEmitter float_emit(this);
const u8* start = GetCodePtr();
@ -541,7 +541,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);
scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -550,8 +550,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -568,8 +568,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -586,8 +586,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -603,8 +603,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -622,7 +622,7 @@ void JitArm64::GenerateQuantizedLoads()
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);
scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -631,8 +631,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -649,8 +649,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
@ -667,8 +667,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -684,8 +684,8 @@ void JitArm64::GenerateQuantizedLoads()
constexpr u32 flags =
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
@ -735,13 +735,13 @@ void JitArm64::GenerateQuantizedStores()
// Q1 is a temporary
ARM64Reg temp_reg = ARM64Reg::X0;
ARM64Reg scale_reg = ARM64Reg::X1;
ARM64Reg addr_reg = ARM64Reg::X2;
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1};
ARM64Reg addr_reg = ARM64Reg::W2;
BitSet32 scratch_gprs{0, 1};
if (!jo.memcheck)
gprs_to_push &= ~BitSet32{2};
scratch_gprs[2] = true;
if (!jo.fastmem)
gprs_to_push &= ~BitSet32{3};
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
scratch_gprs[3] = true;
BitSet32 scratch_fprs{0, 1};
ARM64FloatEmitter float_emit(this);
const u8* start = GetCodePtr();
@ -752,8 +752,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -771,8 +771,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -790,8 +790,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -808,8 +808,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -826,8 +826,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -837,8 +837,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -856,8 +856,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -875,8 +875,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -893,8 +893,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}
@ -911,8 +911,8 @@ void JitArm64::GenerateQuantizedStores()
constexpr u32 flags =
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
fprs_to_push, true);
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
scratch_fprs, true);
RET(ARM64Reg::X30);
}