mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-09-02 23:55:52 +00:00
Merge ebb026e2c4
into e099a9c180
This commit is contained in:
commit
7edf25ba17
8 changed files with 486 additions and 317 deletions
|
@ -250,34 +250,42 @@ protected:
|
|||
// This is the core routine for accessing emulated memory, with support for
|
||||
// many different kinds of loads and stores as well as fastmem/backpatching.
|
||||
//
|
||||
// Registers used:
|
||||
// The addr parameter can be any register, but the code emitted for slow accesses
|
||||
// will be slightly more efficient if the addr parameter is as follows:
|
||||
//
|
||||
// addr scratch
|
||||
// Store: X2 X1
|
||||
// Load: X1
|
||||
// Zero 256: X1 X30
|
||||
// Store float: X2 Q0
|
||||
// Load float: X1
|
||||
// Store: W2
|
||||
// Load: W1
|
||||
// Zero 256: W1
|
||||
// Store float: W2
|
||||
// Load float: W1
|
||||
//
|
||||
// If mode == AlwaysFastAccess, the addr argument can be any register.
|
||||
// Otherwise it must be the register listed in the table above.
|
||||
// This routine allocates most scratch registers dynamically, but in the following
|
||||
// situations, specific scratch registers have to be allocated in advance:
|
||||
//
|
||||
// Additional scratch registers are used in the following situations:
|
||||
// emitting_routine && mode == Auto: X0
|
||||
// emitting_routine && mode == Auto && (flags & BackPatchInfo::FLAG_STORE): X1
|
||||
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
|
||||
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X0
|
||||
// emitting_routine && mode != AlwaysSlowAccess &&
|
||||
// (flags & BackPatchInfo::FLAG_STORE) && !(flags & BackPatchInfo::FLAG_FLOAT): X1
|
||||
// emitting_routine && mode != AlwaysSlowAccess &&
|
||||
// (flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT): Q0
|
||||
// emitting_routine && mode != AlwaysSlowAccess &&
|
||||
// (flags & BackPatchInfo::FLAG_ZERO_256): X30
|
||||
// !emitting_routine && mode == Auto && jo.fastmem: X30
|
||||
//
|
||||
// emitting_routine && mode == Auto: X0
|
||||
// emitting_routine && mode == Auto && !(flags & BackPatchInfo::FLAG_STORE): X3
|
||||
// emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X3
|
||||
// mode != AlwaysSlowAccess && !jo.fastmem: X0
|
||||
// If there are any other registers that the caller doesn't mind being overwritten,
|
||||
// these can be indicated in scratch_gprs and scratch_fprs.
|
||||
//
|
||||
// In the following situations, certain host registers must not contain guest registers:
|
||||
//
|
||||
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck: X30
|
||||
// !emitting_routine && mode != AlwaysFastAccess && jo.memcheck &&
|
||||
// (flags & BackPatchInfo::FLAG_LOAD): X0
|
||||
// !emitting_routine && mode != AlwaysSlowAccess && !jo.fastmem: X30
|
||||
// !emitting_routine && mode == Auto && jo.fastmem: X30
|
||||
//
|
||||
// Furthermore, any callee-saved register which isn't marked in gprs_to_push/fprs_to_push
|
||||
// may be clobbered if mode != AlwaysFastAccess.
|
||||
// (flags & BackPatchInfo::FLAG_LOAD): X0
|
||||
void EmitBackpatchRoutine(u32 flags, MemAccessMode mode, Arm64Gen::ARM64Reg RS,
|
||||
Arm64Gen::ARM64Reg addr, BitSet32 gprs_to_push = BitSet32(0),
|
||||
BitSet32 fprs_to_push = BitSet32(0), bool emitting_routine = false);
|
||||
Arm64Gen::ARM64Reg addr, BitSet32 scratch_gprs = BitSet32(0),
|
||||
BitSet32 scratch_fprs = BitSet32(0), bool emitting_routine = false);
|
||||
|
||||
// Loadstore routines
|
||||
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
|
||||
|
|
|
@ -54,7 +54,7 @@ void JitArm64::DoBacktrace(uintptr_t access_address, SContext* ctx)
|
|||
}
|
||||
|
||||
void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS, ARM64Reg addr,
|
||||
BitSet32 gprs_to_push, BitSet32 fprs_to_push,
|
||||
BitSet32 scratch_gprs, BitSet32 scratch_fprs,
|
||||
bool emitting_routine)
|
||||
{
|
||||
const u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
||||
|
@ -65,6 +65,148 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
const bool emit_fast_access = mode != MemAccessMode::AlwaysSlowAccess;
|
||||
const bool emit_slow_access = mode != MemAccessMode::AlwaysFastAccess;
|
||||
|
||||
const bool memcheck = jo.memcheck && !emitting_routine;
|
||||
|
||||
if ((flags & BackPatchInfo::FLAG_LOAD))
|
||||
{
|
||||
if ((flags & BackPatchInfo::FLAG_FLOAT))
|
||||
scratch_fprs[DecodeReg(RS)] = !memcheck;
|
||||
else
|
||||
scratch_gprs[DecodeReg(RS)] = !memcheck;
|
||||
}
|
||||
|
||||
BitSet32 temp_gpr_candidates = scratch_gprs;
|
||||
BitSet32 temp_fpr_candidates = scratch_fprs;
|
||||
temp_gpr_candidates[DecodeReg(addr)] = false;
|
||||
if (flags & BackPatchInfo::FLAG_FLOAT)
|
||||
temp_fpr_candidates[DecodeReg(RS)] = false;
|
||||
else if (!(flags & BackPatchInfo::FLAG_ZERO_256))
|
||||
temp_gpr_candidates[DecodeReg(RS)] = false;
|
||||
if (!emitting_routine && mode == MemAccessMode::Auto && jo.fastmem)
|
||||
temp_gpr_candidates[30] = true;
|
||||
|
||||
const auto allocate_temp_reg = [this](Arm64RegCache& reg_cache,
|
||||
BitSet32& candidates) -> Arm64RegCache::ScopedARM64Reg {
|
||||
for (int i : candidates)
|
||||
{
|
||||
candidates[i] = false;
|
||||
ARM64Reg reg = ARM64Reg(i);
|
||||
if (®_cache == &fpr)
|
||||
reg = EncodeRegToQuad(reg);
|
||||
return reg;
|
||||
}
|
||||
return reg_cache.GetScopedReg();
|
||||
};
|
||||
|
||||
const auto can_allocate_temp_reg_for_free = [](Arm64RegCache& reg_cache, BitSet32& candidates) {
|
||||
return candidates != BitSet32{} || reg_cache.GetUnlockedRegisterCount() > 0;
|
||||
};
|
||||
|
||||
Arm64RegCache::ScopedARM64Reg temp_gpr_1;
|
||||
Arm64RegCache::ScopedARM64Reg temp_gpr_2;
|
||||
Arm64RegCache::ScopedARM64Reg temp_gpr_3;
|
||||
Arm64RegCache::ScopedARM64Reg temp_fpr_1;
|
||||
|
||||
if (emit_fast_access)
|
||||
{
|
||||
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
|
||||
{
|
||||
temp_fpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::Q0) :
|
||||
allocate_temp_reg(fpr, temp_fpr_candidates);
|
||||
scratch_fprs[DecodeReg(temp_fpr_1)] = true;
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_STORE)
|
||||
{
|
||||
temp_gpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W1) :
|
||||
allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||
scratch_gprs[DecodeReg(temp_gpr_1)] = true;
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||
{
|
||||
temp_gpr_1 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W30) :
|
||||
allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||
scratch_gprs[DecodeReg(temp_gpr_1)] = true;
|
||||
}
|
||||
|
||||
if (!jo.fastmem)
|
||||
{
|
||||
temp_gpr_2 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W0) :
|
||||
allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||
temp_gpr_3 = emitting_routine ? Arm64RegCache::ScopedARM64Reg(ARM64Reg::W3) :
|
||||
allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||
scratch_gprs[DecodeReg(temp_gpr_2)] = true;
|
||||
scratch_gprs[DecodeReg(temp_gpr_3)] = true;
|
||||
}
|
||||
else if (emit_slow_access && emitting_routine)
|
||||
{
|
||||
temp_gpr_2 = ARM64Reg::W0;
|
||||
temp_gpr_3 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3;
|
||||
scratch_gprs[DecodeReg(temp_gpr_2)] = true;
|
||||
scratch_gprs[DecodeReg(temp_gpr_3)] = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Setting memcheck_temp_gpr to W30 works, but because W30 is a register that needs to be pushed
|
||||
// and popped, using W30 may require us to emit an extra push and pop instruction, depending on
|
||||
// what other registers need pushing and popping. If we can find another register to use without
|
||||
// having to evict anything from the register cache, let's do that instead of using W30.
|
||||
ARM64Reg memcheck_temp_gpr = ARM64Reg::W30;
|
||||
if (emit_slow_access && memcheck)
|
||||
{
|
||||
const auto is_suitable_as_memcheck_temp_gpr = [flags](ARM64Reg reg) {
|
||||
return reg != ARM64Reg::INVALID_REG && reg != ARM64Reg::W30 &&
|
||||
(reg != ARM64Reg::W0 || !(flags & BackPatchInfo::FLAG_LOAD));
|
||||
};
|
||||
|
||||
const auto get_unset_temp_gpr = [&]() -> Arm64RegCache::ScopedARM64Reg& {
|
||||
if (temp_gpr_1 == ARM64Reg::INVALID_REG)
|
||||
return temp_gpr_1;
|
||||
if (temp_gpr_2 == ARM64Reg::INVALID_REG)
|
||||
return temp_gpr_2;
|
||||
ASSERT(temp_gpr_3 == ARM64Reg::INVALID_REG);
|
||||
return temp_gpr_3;
|
||||
};
|
||||
|
||||
if (is_suitable_as_memcheck_temp_gpr(temp_gpr_1))
|
||||
{
|
||||
memcheck_temp_gpr = temp_gpr_1;
|
||||
}
|
||||
else if (is_suitable_as_memcheck_temp_gpr(temp_gpr_2))
|
||||
{
|
||||
memcheck_temp_gpr = temp_gpr_2;
|
||||
}
|
||||
else if (is_suitable_as_memcheck_temp_gpr(temp_gpr_3))
|
||||
{
|
||||
memcheck_temp_gpr = temp_gpr_3;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (can_allocate_temp_reg_for_free(gpr, temp_gpr_candidates))
|
||||
{
|
||||
Arm64RegCache::ScopedARM64Reg& temp_gpr_x = get_unset_temp_gpr();
|
||||
temp_gpr_x = allocate_temp_reg(gpr, temp_gpr_candidates);
|
||||
scratch_gprs[DecodeReg(temp_gpr_x)] = true;
|
||||
if (is_suitable_as_memcheck_temp_gpr(temp_gpr_x))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (temp_fpr_1 == ARM64Reg::INVALID_REG &&
|
||||
can_allocate_temp_reg_for_free(fpr, temp_fpr_candidates))
|
||||
{
|
||||
temp_fpr_1 = allocate_temp_reg(fpr, temp_fpr_candidates);
|
||||
scratch_fprs[DecodeReg(temp_fpr_1)] = true;
|
||||
}
|
||||
}
|
||||
|
||||
BitSet32 gprs_to_push =
|
||||
(emitting_routine ? CALLER_SAVED_GPRS : gpr.GetCallerSavedUsed()) & ~scratch_gprs;
|
||||
BitSet32 fprs_to_push =
|
||||
(emitting_routine ? BitSet32(0xFFFFFFFF) : fpr.GetCallerSavedUsed()) & ~scratch_fprs;
|
||||
|
||||
if (!emitting_routine && mode == MemAccessMode::Auto && jo.fastmem)
|
||||
gprs_to_push[30] = true;
|
||||
|
||||
bool in_far_code = false;
|
||||
const u8* fast_access_start = GetCodePtr();
|
||||
std::optional<FixupBranch> slow_access_fixup;
|
||||
|
@ -76,13 +218,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
|
||||
if (!jo.fastmem)
|
||||
{
|
||||
const ARM64Reg temp = emitting_routine ? ARM64Reg::W3 : ARM64Reg::W30;
|
||||
memory_base = EncodeRegTo64(temp_gpr_3);
|
||||
memory_offset = temp_gpr_2;
|
||||
|
||||
memory_base = EncodeRegTo64(temp);
|
||||
memory_offset = ARM64Reg::W0;
|
||||
|
||||
LSR(temp, addr, PowerPC::BAT_INDEX_SHIFT);
|
||||
LDR(memory_base, MEM_REG, ArithOption(temp, true));
|
||||
LSR(temp_gpr_3, addr, PowerPC::BAT_INDEX_SHIFT);
|
||||
LDR(memory_base, MEM_REG, ArithOption(temp_gpr_3, true));
|
||||
|
||||
if (emit_slow_access)
|
||||
{
|
||||
|
@ -95,15 +235,12 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
}
|
||||
else if (emit_slow_access && emitting_routine)
|
||||
{
|
||||
const ARM64Reg temp1 = flags & BackPatchInfo::FLAG_STORE ? ARM64Reg::W1 : ARM64Reg::W3;
|
||||
const ARM64Reg temp2 = ARM64Reg::W0;
|
||||
|
||||
slow_access_fixup = CheckIfSafeAddress(addr, temp1, temp2);
|
||||
slow_access_fixup = CheckIfSafeAddress(addr, temp_gpr_3, temp_gpr_2);
|
||||
}
|
||||
|
||||
if ((flags & BackPatchInfo::FLAG_STORE) && (flags & BackPatchInfo::FLAG_FLOAT))
|
||||
{
|
||||
ARM64Reg temp = ARM64Reg::D0;
|
||||
ARM64Reg temp = EncodeRegToDouble(temp_fpr_1);
|
||||
temp = ByteswapBeforeStore(this, &m_float_emit, temp, EncodeRegToDouble(RS), flags, true);
|
||||
|
||||
m_float_emit.STR(access_size, temp, memory_base, memory_offset);
|
||||
|
@ -117,7 +254,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
}
|
||||
else if (flags & BackPatchInfo::FLAG_STORE)
|
||||
{
|
||||
ARM64Reg temp = ARM64Reg::W1;
|
||||
ARM64Reg temp = temp_gpr_1;
|
||||
temp = ByteswapBeforeStore(this, &m_float_emit, temp, RS, flags, true);
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
|
@ -130,7 +267,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||
{
|
||||
// This literally only stores 32bytes of zeros to the target address
|
||||
ARM64Reg temp = ARM64Reg::X30;
|
||||
ARM64Reg temp = EncodeRegTo64(temp_gpr_1);
|
||||
ADD(temp, memory_base, memory_offset);
|
||||
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 0);
|
||||
STP(IndexType::Signed, ARM64Reg::ZR, ARM64Reg::ZR, temp, 16);
|
||||
|
@ -151,8 +288,6 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
|
||||
if (emit_slow_access)
|
||||
{
|
||||
const bool memcheck = jo.memcheck && !emitting_routine;
|
||||
|
||||
if (emit_fast_access)
|
||||
{
|
||||
in_far_code = true;
|
||||
|
@ -169,12 +304,9 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
if (slow_access_fixup)
|
||||
SetJumpTarget(*slow_access_fixup);
|
||||
|
||||
const ARM64Reg temp_gpr = ARM64Reg::W1;
|
||||
const int temp_gpr_index = DecodeReg(temp_gpr);
|
||||
|
||||
BitSet32 gprs_to_push_early = {};
|
||||
if (memcheck)
|
||||
gprs_to_push_early[temp_gpr_index] = true;
|
||||
gprs_to_push_early[DecodeReg(memcheck_temp_gpr)] = true;
|
||||
if (flags & BackPatchInfo::FLAG_LOAD)
|
||||
gprs_to_push_early[0] = true;
|
||||
|
||||
|
@ -185,9 +317,18 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
if ((gprs_to_push & gprs_to_push_early).Count() & 1)
|
||||
gprs_to_push_early[30] = true;
|
||||
|
||||
// This temp GPR is only used when GPRs have been pushed, so we can choose almost any register
|
||||
ARM64Reg temp_gpr_for_function_call = ARM64Reg::W8;
|
||||
while (temp_gpr_for_function_call == addr ||
|
||||
(temp_gpr_for_function_call == RS && (flags & BackPatchInfo::FLAG_STORE)))
|
||||
{
|
||||
temp_gpr_for_function_call =
|
||||
static_cast<ARM64Reg>(static_cast<int>(temp_gpr_for_function_call) + 1);
|
||||
}
|
||||
|
||||
ABI_PushRegisters(gprs_to_push & gprs_to_push_early);
|
||||
ABI_PushRegisters(gprs_to_push & ~gprs_to_push_early);
|
||||
m_float_emit.ABI_PushRegisters(fprs_to_push, ARM64Reg::X30);
|
||||
m_float_emit.ABI_PushRegisters(fprs_to_push, EncodeRegTo64(temp_gpr_for_function_call));
|
||||
|
||||
// PC is used by memory watchpoints (if enabled), profiling where to insert gather pipe
|
||||
// interrupt checks, and printing accurate PC locations in debug logs.
|
||||
|
@ -196,14 +337,23 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
// so the caller has to store the PC themselves.
|
||||
if (!emitting_routine)
|
||||
{
|
||||
MOVI2R(ARM64Reg::W30, js.compilerPC);
|
||||
STR(IndexType::Unsigned, ARM64Reg::W30, PPC_REG, PPCSTATE_OFF(pc));
|
||||
MOVI2R(temp_gpr_for_function_call, js.compilerPC);
|
||||
STR(IndexType::Unsigned, temp_gpr_for_function_call, PPC_REG, PPCSTATE_OFF(pc));
|
||||
}
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_STORE)
|
||||
{
|
||||
ARM64Reg src_reg = RS;
|
||||
const ARM64Reg dst_reg = access_size == 64 ? ARM64Reg::X1 : ARM64Reg::W1;
|
||||
ARM64Reg temp_addr_reg = addr;
|
||||
if (addr == ARM64Reg::W1)
|
||||
{
|
||||
// If addr is W1, we must move the address to a different register so we don't
|
||||
// overwrite it when moving RS to W1. W2 is the optimal register to move to,
|
||||
// because that's the register the address needs to be in for the function call.
|
||||
temp_addr_reg = RS != ARM64Reg::W2 ? ARM64Reg::W2 : temp_gpr_for_function_call;
|
||||
MOV(temp_addr_reg, addr);
|
||||
}
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_FLOAT)
|
||||
{
|
||||
|
@ -227,49 +377,48 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, MemAccessMode mode, ARM64Reg RS,
|
|||
if (access_size == 64)
|
||||
{
|
||||
ABI_CallFunction(reverse ? &PowerPC::WriteU64SwapFromJit : &PowerPC::WriteU64FromJit,
|
||||
&m_mmu, src_reg, ARM64Reg::W2);
|
||||
&m_mmu, src_reg, temp_addr_reg);
|
||||
}
|
||||
else if (access_size == 32)
|
||||
{
|
||||
ABI_CallFunction(reverse ? &PowerPC::WriteU32SwapFromJit : &PowerPC::WriteU32FromJit,
|
||||
&m_mmu, src_reg, ARM64Reg::W2);
|
||||
&m_mmu, src_reg, temp_addr_reg);
|
||||
}
|
||||
else if (access_size == 16)
|
||||
{
|
||||
ABI_CallFunction(reverse ? &PowerPC::WriteU16SwapFromJit : &PowerPC::WriteU16FromJit,
|
||||
&m_mmu, src_reg, ARM64Reg::W2);
|
||||
&m_mmu, src_reg, temp_addr_reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, ARM64Reg::W2);
|
||||
ABI_CallFunction(&PowerPC::WriteU8FromJit, &m_mmu, src_reg, addr);
|
||||
}
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||
{
|
||||
ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, ARM64Reg::W1);
|
||||
ABI_CallFunction(&PowerPC::ClearDCacheLineFromJit, &m_mmu, addr);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (access_size == 64)
|
||||
ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, ARM64Reg::W1);
|
||||
ABI_CallFunction(&PowerPC::ReadU64FromJit, &m_mmu, addr);
|
||||
else if (access_size == 32)
|
||||
ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, ARM64Reg::W1);
|
||||
ABI_CallFunction(&PowerPC::ReadU32FromJit, &m_mmu, addr);
|
||||
else if (access_size == 16)
|
||||
ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, ARM64Reg::W1);
|
||||
ABI_CallFunction(&PowerPC::ReadU16FromJit, &m_mmu, addr);
|
||||
else
|
||||
ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, ARM64Reg::W1);
|
||||
ABI_CallFunction(&PowerPC::ReadU8FromJit, &m_mmu, addr);
|
||||
}
|
||||
|
||||
m_float_emit.ABI_PopRegisters(fprs_to_push, ARM64Reg::X30);
|
||||
m_float_emit.ABI_PopRegisters(fprs_to_push, EncodeRegTo64(temp_gpr_for_function_call));
|
||||
ABI_PopRegisters(gprs_to_push & ~gprs_to_push_early);
|
||||
|
||||
if (memcheck)
|
||||
{
|
||||
const ARM64Reg temp_fpr = fprs_to_push[0] ? ARM64Reg::INVALID_REG : ARM64Reg::Q0;
|
||||
const u64 early_push_count = (gprs_to_push & gprs_to_push_early).Count();
|
||||
const u64 early_push_size = Common::AlignUp(early_push_count, 2) * 8;
|
||||
|
||||
WriteConditionalExceptionExit(EXCEPTION_DSI, temp_gpr, temp_fpr, early_push_size);
|
||||
WriteConditionalExceptionExit(EXCEPTION_DSI, memcheck_temp_gpr, temp_fpr_1, early_push_size);
|
||||
}
|
||||
|
||||
if (flags & BackPatchInfo::FLAG_LOAD)
|
||||
|
|
|
@ -30,10 +30,12 @@ using namespace Arm64Gen;
|
|||
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
|
||||
{
|
||||
// We want to make sure to not get LR as a temp register
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
if (jo.memcheck)
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
|
||||
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
|
||||
|
||||
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg, false);
|
||||
ARM64Reg dest_reg = gpr.R(dest);
|
||||
ARM64Reg up_reg = ARM64Reg::INVALID_REG;
|
||||
|
@ -45,7 +47,6 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
if (offsetReg != -1 && !gpr.IsImm(offsetReg))
|
||||
off_reg = gpr.R(offsetReg);
|
||||
|
||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
u32 imm_addr = 0;
|
||||
bool is_immediate = false;
|
||||
|
||||
|
@ -107,12 +108,10 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
}
|
||||
}
|
||||
|
||||
ARM64Reg XA = EncodeRegTo64(addr_reg);
|
||||
|
||||
bool addr_reg_set = !is_immediate;
|
||||
const auto set_addr_reg_if_needed = [&] {
|
||||
if (!addr_reg_set)
|
||||
MOVI2R(XA, imm_addr);
|
||||
MOVI2R(addr_reg, imm_addr);
|
||||
};
|
||||
|
||||
const bool early_update = !jo.memcheck && dest != static_cast<u32>(addr);
|
||||
|
@ -123,14 +122,12 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
MOV(gpr.R(addr), addr_reg);
|
||||
}
|
||||
|
||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
if (!update || early_update)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
if (!jo.memcheck)
|
||||
regs_in_use[DecodeReg(dest_reg)] = false;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
if (jo.memcheck)
|
||||
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
|
||||
|
||||
u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
||||
u32 mmio_address = 0;
|
||||
|
@ -140,22 +137,24 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, XA, regs_in_use,
|
||||
fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, dest_reg, addr_reg, scratch_gprs,
|
||||
scratch_fprs);
|
||||
}
|
||||
else if (mmio_address)
|
||||
{
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
regs_in_use[DecodeReg(ARM64Reg::W30)] = false;
|
||||
regs_in_use[DecodeReg(dest_reg)] = false;
|
||||
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit, regs_in_use,
|
||||
fprs_in_use, dest_reg, mmio_address, flags);
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
|
||||
scratch_gprs[DecodeReg(dest_reg)] = true;
|
||||
MMIOLoadToReg(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
|
||||
gpr.GetCallerSavedUsed() & ~scratch_gprs,
|
||||
fpr.GetCallerSavedUsed() & ~scratch_fprs, dest_reg, mmio_address, flags);
|
||||
addr_reg_set = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, addr_reg, scratch_gprs,
|
||||
scratch_fprs);
|
||||
}
|
||||
|
||||
gpr.BindToRegister(dest, false, true);
|
||||
|
@ -168,8 +167,8 @@ void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 o
|
|||
MOV(gpr.R(addr), addr_reg);
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
if (jo.memcheck)
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
}
|
||||
|
||||
|
@ -177,9 +176,9 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
bool update)
|
||||
{
|
||||
// We want to make sure to not get LR as a temp register
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
if (!jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
|
||||
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);
|
||||
|
||||
// Don't materialize zero.
|
||||
ARM64Reg RS = gpr.IsImm(value, 0) ? ARM64Reg::WZR : gpr.R(value);
|
||||
|
@ -192,8 +191,6 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
if (dest != -1 && !gpr.IsImm(dest))
|
||||
reg_dest = gpr.R(dest);
|
||||
|
||||
ARM64Reg addr_reg = ARM64Reg::W2;
|
||||
|
||||
u32 imm_addr = 0;
|
||||
bool is_immediate = false;
|
||||
|
||||
|
@ -255,12 +252,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
}
|
||||
}
|
||||
|
||||
ARM64Reg XA = EncodeRegTo64(addr_reg);
|
||||
|
||||
bool addr_reg_set = !is_immediate;
|
||||
const auto set_addr_reg_if_needed = [&] {
|
||||
if (!addr_reg_set)
|
||||
MOVI2R(XA, imm_addr);
|
||||
MOVI2R(addr_reg, imm_addr);
|
||||
};
|
||||
|
||||
const bool early_update = !jo.memcheck && value != static_cast<u32>(dest);
|
||||
|
@ -271,13 +266,10 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
MOV(gpr.R(dest), addr_reg);
|
||||
}
|
||||
|
||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
if (!update || early_update)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = false;
|
||||
if (!jo.fastmem)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
|
||||
u32 access_size = BackPatchInfo::GetFlagSize(flags);
|
||||
u32 mmio_address = 0;
|
||||
|
@ -313,22 +305,23 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
else if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, access_size))
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, RS, addr_reg, scratch_gprs,
|
||||
scratch_fprs);
|
||||
}
|
||||
else if (mmio_address)
|
||||
{
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = false;
|
||||
regs_in_use[DecodeReg(ARM64Reg::W30)] = false;
|
||||
regs_in_use[DecodeReg(RS)] = false;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
scratch_gprs[DecodeReg(ARM64Reg::W30)] = true;
|
||||
scratch_gprs[DecodeReg(RS)] = true;
|
||||
MMIOWriteRegToAddr(m_system, m_system.GetMemory().GetMMIOMapping(), this, &m_float_emit,
|
||||
regs_in_use, fprs_in_use, RS, mmio_address, flags);
|
||||
gpr.GetCallerSavedUsed() & ~scratch_gprs,
|
||||
fpr.GetCallerSavedUsed() & ~scratch_fprs, RS, mmio_address, flags);
|
||||
addr_reg_set = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, RS, addr_reg, scratch_gprs, scratch_fprs);
|
||||
}
|
||||
|
||||
if (update && !early_update)
|
||||
|
@ -338,9 +331,7 @@ void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s
|
|||
MOV(gpr.R(dest), addr_reg);
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
if (!jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
}
|
||||
|
||||
FixupBranch JitArm64::BATAddressLookup(ARM64Reg addr_out, ARM64Reg addr_in, ARM64Reg tmp,
|
||||
|
@ -522,28 +513,33 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||
u32 a = inst.RA, d = inst.RD;
|
||||
s32 offset = inst.SIMM_16;
|
||||
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
if (jo.memcheck)
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
|
||||
// MMU games make use of a >= d despite this being invalid according to the PEM.
|
||||
// If a >= d occurs, we must make sure to not re-read rA after starting doing the loads.
|
||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
|
||||
Arm64RegCache::ScopedARM64Reg addr_base_reg;
|
||||
bool a_is_addr_base_reg = false;
|
||||
if (!a)
|
||||
MOVI2R(addr_reg, offset);
|
||||
else if (gpr.IsImm(a))
|
||||
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
|
||||
else if (a < d && offset + (31 - d) * 4 < 0x1000)
|
||||
a_is_addr_base_reg = true;
|
||||
else
|
||||
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
|
||||
|
||||
Arm64RegCache::ScopedARM64Reg addr_base_reg;
|
||||
if (!a_is_addr_base_reg)
|
||||
{
|
||||
addr_base_reg = gpr.GetScopedReg();
|
||||
MOV(addr_base_reg, addr_reg);
|
||||
MOVI2R(addr_base_reg, offset);
|
||||
}
|
||||
else if (gpr.IsImm(a))
|
||||
{
|
||||
addr_base_reg = gpr.GetScopedReg();
|
||||
MOVI2R(addr_base_reg, gpr.GetImm(a) + offset);
|
||||
}
|
||||
else if (a < d && offset + (31 - d) * 4 < 0x1000)
|
||||
{
|
||||
a_is_addr_base_reg = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
addr_base_reg = gpr.GetScopedReg();
|
||||
ADDI2R(addr_base_reg, gpr.R(a), offset, addr_base_reg);
|
||||
}
|
||||
|
||||
BitSet32 gprs_to_discard{};
|
||||
|
@ -586,22 +582,32 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||
{
|
||||
gpr.BindToRegister(i, false, false);
|
||||
ARM64Reg dest_reg = gpr.R(i);
|
||||
ARM64Reg current_iteration_addr_reg = addr_reg;
|
||||
|
||||
if (a_is_addr_base_reg)
|
||||
ADDI2R(addr_reg, gpr.R(a), offset + (i - d) * 4);
|
||||
else if (i != d)
|
||||
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);
|
||||
{
|
||||
const u32 current_iteration_offset = offset + (i - d) * 4;
|
||||
if (current_iteration_offset != 0)
|
||||
ADDI2R(addr_reg, gpr.R(a), current_iteration_offset);
|
||||
else
|
||||
current_iteration_addr_reg = gpr.R(a);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i != d)
|
||||
ADDI2R(addr_reg, addr_base_reg, (i - d) * 4);
|
||||
else
|
||||
current_iteration_addr_reg = addr_base_reg;
|
||||
}
|
||||
|
||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[DecodeReg(addr_reg)] = false;
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
if (!jo.memcheck)
|
||||
regs_in_use[DecodeReg(dest_reg)] = false;
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
if (jo.memcheck)
|
||||
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, EncodeRegTo64(addr_reg), regs_in_use,
|
||||
fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, dest_reg, current_iteration_addr_reg,
|
||||
scratch_gprs, scratch_fprs);
|
||||
|
||||
gpr.BindToRegister(i, false, true);
|
||||
ASSERT(dest_reg == gpr.R(i));
|
||||
|
@ -629,8 +635,8 @@ void JitArm64::lmw(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
if (jo.memcheck)
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
}
|
||||
|
||||
|
@ -642,26 +648,29 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
|||
u32 a = inst.RA, s = inst.RS;
|
||||
s32 offset = inst.SIMM_16;
|
||||
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
if (!jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
|
||||
ARM64Reg addr_reg = ARM64Reg::W2;
|
||||
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);
|
||||
Arm64RegCache::ScopedARM64Reg addr_base_reg;
|
||||
bool a_is_addr_base_reg = false;
|
||||
if (!a)
|
||||
MOVI2R(addr_reg, offset);
|
||||
else if (gpr.IsImm(a))
|
||||
MOVI2R(addr_reg, gpr.GetImm(a) + offset);
|
||||
else if (offset + (31 - s) * 4 < 0x1000)
|
||||
a_is_addr_base_reg = true;
|
||||
else
|
||||
ADDI2R(addr_reg, gpr.R(a), offset, addr_reg);
|
||||
|
||||
Arm64GPRCache::ScopedARM64Reg addr_base_reg;
|
||||
if (!a_is_addr_base_reg)
|
||||
{
|
||||
addr_base_reg = gpr.GetScopedReg();
|
||||
MOV(addr_base_reg, addr_reg);
|
||||
MOVI2R(addr_base_reg, offset);
|
||||
}
|
||||
else if (gpr.IsImm(a))
|
||||
{
|
||||
addr_base_reg = gpr.GetScopedReg();
|
||||
MOVI2R(addr_base_reg, gpr.GetImm(a) + offset);
|
||||
}
|
||||
else if (offset + (31 - s) * 4 < 0x1000)
|
||||
{
|
||||
a_is_addr_base_reg = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
addr_base_reg = gpr.GetScopedReg();
|
||||
ADDI2R(addr_base_reg, gpr.R(a), offset, addr_base_reg);
|
||||
}
|
||||
|
||||
BitSet32 gprs_to_discard{};
|
||||
|
@ -704,21 +713,30 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
|||
for (u32 i = s; i < 32; i++)
|
||||
{
|
||||
ARM64Reg src_reg = gpr.R(i);
|
||||
ARM64Reg current_iteration_addr_reg = addr_reg;
|
||||
|
||||
if (a_is_addr_base_reg)
|
||||
ADDI2R(addr_reg, gpr.R(a), offset + (i - s) * 4);
|
||||
else if (i != s)
|
||||
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);
|
||||
{
|
||||
const u32 current_iteration_offset = offset + (i - s) * 4;
|
||||
if (current_iteration_offset != 0)
|
||||
ADDI2R(addr_reg, gpr.R(a), current_iteration_offset);
|
||||
else
|
||||
current_iteration_addr_reg = gpr.R(a);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i != s)
|
||||
ADDI2R(addr_reg, addr_base_reg, (i - s) * 4);
|
||||
else
|
||||
current_iteration_addr_reg = addr_base_reg;
|
||||
}
|
||||
|
||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
regs_in_use[DecodeReg(addr_reg)] = false;
|
||||
if (!jo.fastmem)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, EncodeRegTo64(addr_reg), regs_in_use,
|
||||
fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, src_reg, current_iteration_addr_reg,
|
||||
scratch_gprs, scratch_fprs);
|
||||
|
||||
// To reduce register pressure and to avoid getting a pipeline-unfriendly long run of stores
|
||||
// after this instruction, flush registers that would be flushed after this instruction anyway.
|
||||
|
@ -750,9 +768,7 @@ void JitArm64::stmw(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
if (!jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
}
|
||||
|
||||
void JitArm64::dcbx(UGeckoInstruction inst)
|
||||
|
@ -971,17 +987,11 @@ void JitArm64::dcbz(UGeckoInstruction inst)
|
|||
|
||||
int a = inst.RA, b = inst.RB;
|
||||
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (!jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
|
||||
Common::ScopeGuard register_guard([&] {
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
if (!jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
});
|
||||
Common::ScopeGuard register_guard([&] { gpr.Unlock(ARM64Reg::W30); });
|
||||
|
||||
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
|
||||
constexpr ARM64Reg temp_reg = ARM64Reg::W30;
|
||||
|
||||
// HACK: Don't clear any memory in the [0x8000'0000, 0x8000'8000) region.
|
||||
|
@ -1043,14 +1053,12 @@ void JitArm64::dcbz(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
||||
gprs_to_push[DecodeReg(ARM64Reg::W1)] = false;
|
||||
if (!jo.fastmem)
|
||||
gprs_to_push[DecodeReg(ARM64Reg::W0)] = false;
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
|
||||
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1,
|
||||
EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
|
||||
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, MemAccessMode::Auto, ARM64Reg::W1, addr_reg,
|
||||
scratch_gprs, scratch_fprs);
|
||||
|
||||
if (using_dcbz_hack)
|
||||
SetJumpTarget(end_dcbz_hack);
|
||||
|
|
|
@ -77,13 +77,12 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
const RegType type =
|
||||
(flags & BackPatchInfo::FLAG_SIZE_64) != 0 ? RegType::LowerPair : RegType::DuplicatedSingle;
|
||||
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
fpr.Lock(ARM64Reg::Q0);
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
if (jo.memcheck)
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
|
||||
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W1);
|
||||
const ARM64Reg VD = fpr.RW(inst.FD, type, false);
|
||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
|
||||
if (update)
|
||||
{
|
||||
|
@ -152,10 +151,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
ARM64Reg XA = EncodeRegTo64(addr_reg);
|
||||
|
||||
if (is_immediate)
|
||||
MOVI2R(XA, imm_addr);
|
||||
MOVI2R(addr_reg, imm_addr);
|
||||
|
||||
const bool early_update = !jo.memcheck;
|
||||
if (update && early_update)
|
||||
|
@ -164,23 +161,21 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
MOV(gpr.R(a), addr_reg);
|
||||
}
|
||||
|
||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
if (!update || early_update)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
|
||||
if (!jo.memcheck)
|
||||
fprs_in_use[DecodeReg(VD)] = false;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
if (jo.memcheck)
|
||||
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
|
||||
|
||||
if (is_immediate && m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
|
||||
{
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, VD, addr_reg, scratch_gprs,
|
||||
scratch_fprs);
|
||||
}
|
||||
else
|
||||
{
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VD, addr_reg, scratch_gprs, scratch_fprs);
|
||||
}
|
||||
|
||||
const ARM64Reg VD_again = fpr.RW(inst.FD, type, true);
|
||||
|
@ -192,9 +187,8 @@ void JitArm64::lfXX(UGeckoInstruction inst)
|
|||
MOV(gpr.R(a), addr_reg);
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
if (jo.memcheck)
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
}
|
||||
|
||||
|
@ -264,8 +258,6 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
u32 imm_addr = 0;
|
||||
bool is_immediate = false;
|
||||
|
||||
fpr.Lock(ARM64Reg::Q0);
|
||||
|
||||
const bool have_single = fpr.IsSingle(inst.FS, true);
|
||||
|
||||
Arm64FPRCache::ScopedARM64Reg V0 =
|
||||
|
@ -278,11 +270,9 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
V0 = std::move(single_reg);
|
||||
}
|
||||
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
if (!jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
|
||||
ARM64Reg addr_reg = ARM64Reg::W2;
|
||||
const Arm64RegCache::ScopedARM64Reg addr_reg = gpr.GetScopedRegWithPreference(ARM64Reg::W2);
|
||||
|
||||
if (update)
|
||||
{
|
||||
|
@ -351,12 +341,10 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
ARM64Reg XA = EncodeRegTo64(addr_reg);
|
||||
|
||||
bool addr_reg_set = !is_immediate;
|
||||
const auto set_addr_reg_if_needed = [&] {
|
||||
if (!addr_reg_set)
|
||||
MOVI2R(XA, imm_addr);
|
||||
MOVI2R(addr_reg, imm_addr);
|
||||
};
|
||||
|
||||
const bool early_update = !jo.memcheck;
|
||||
|
@ -367,14 +355,10 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
MOV(gpr.R(a), addr_reg);
|
||||
}
|
||||
|
||||
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
regs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
if (!update || early_update)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W2)] = false;
|
||||
if (!jo.fastmem)
|
||||
regs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
|
||||
if (is_immediate)
|
||||
{
|
||||
|
@ -402,20 +386,20 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
else if (m_mmu.IsOptimizableRAMAddress(imm_addr, BackPatchInfo::GetFlagSize(flags)))
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, XA, regs_in_use,
|
||||
fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysFastAccess, V0, addr_reg, scratch_gprs,
|
||||
scratch_fprs);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, XA, regs_in_use,
|
||||
fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::AlwaysSlowAccess, V0, addr_reg, scratch_gprs,
|
||||
scratch_fprs);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
set_addr_reg_if_needed();
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, XA, regs_in_use, fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, V0, addr_reg, scratch_gprs, scratch_fprs);
|
||||
}
|
||||
|
||||
if (update && !early_update)
|
||||
|
@ -425,8 +409,5 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
MOV(gpr.R(a), addr_reg);
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
if (!jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
}
|
||||
|
|
|
@ -38,20 +38,20 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
|||
const int i = indexed ? inst.Ix : inst.I;
|
||||
const int w = indexed ? inst.Wx : inst.W;
|
||||
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
fpr.Lock(ARM64Reg::Q0);
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
if (!js.assumeNoPairedQuantize)
|
||||
{
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
|
||||
fpr.Lock(ARM64Reg::Q1);
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
||||
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
|
||||
}
|
||||
else if (jo.memcheck || !jo.fastmem)
|
||||
else if (jo.memcheck)
|
||||
{
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
}
|
||||
|
||||
constexpr ARM64Reg type_reg = ARM64Reg::W0;
|
||||
constexpr ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
const auto addr_reg = js.assumeNoPairedQuantize ? gpr.GetScopedRegWithPreference(ARM64Reg::W1) :
|
||||
Arm64RegCache::ScopedARM64Reg(ARM64Reg::W1);
|
||||
constexpr ARM64Reg scale_reg = ARM64Reg::W2;
|
||||
ARM64Reg VS = fpr.RW(inst.RS, RegType::Single, false);
|
||||
|
||||
|
@ -79,24 +79,19 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
|||
|
||||
if (js.assumeNoPairedQuantize)
|
||||
{
|
||||
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
|
||||
// Wipe the registers we are using as temporaries
|
||||
if (!update || early_update)
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
if (jo.memcheck || !jo.fastmem)
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
fprs_in_use[DecodeReg(ARM64Reg::Q0)] = false;
|
||||
if (!jo.memcheck)
|
||||
fprs_in_use[DecodeReg(VS)] = false;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
if (jo.memcheck)
|
||||
scratch_gprs[DecodeReg(ARM64Reg::W0)] = true;
|
||||
|
||||
u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
||||
if (!w)
|
||||
flags |= BackPatchInfo::FLAG_PAIR;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
|
||||
fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, addr_reg, scratch_gprs, scratch_fprs);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -133,14 +128,13 @@ void JitArm64::psq_lXX(UGeckoInstruction inst)
|
|||
MOV(gpr.R(inst.RA), addr_reg);
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
if (!js.assumeNoPairedQuantize)
|
||||
{
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W2, ARM64Reg::W3);
|
||||
fpr.Unlock(ARM64Reg::Q1);
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W3);
|
||||
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
|
||||
}
|
||||
else if (jo.memcheck || !jo.fastmem)
|
||||
else if (jo.memcheck)
|
||||
{
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
}
|
||||
|
@ -167,9 +161,8 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
|||
const int i = indexed ? inst.Ix : inst.I;
|
||||
const int w = indexed ? inst.Wx : inst.W;
|
||||
|
||||
fpr.Lock(ARM64Reg::Q0);
|
||||
if (!js.assumeNoPairedQuantize)
|
||||
fpr.Lock(ARM64Reg::Q1);
|
||||
fpr.Lock(ARM64Reg::Q0, ARM64Reg::Q1);
|
||||
|
||||
const bool have_single = fpr.IsSingle(inst.RS);
|
||||
|
||||
|
@ -205,15 +198,18 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
gpr.Lock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
if (!js.assumeNoPairedQuantize || !jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W0);
|
||||
if (!js.assumeNoPairedQuantize && !jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W3);
|
||||
gpr.Lock(ARM64Reg::W30);
|
||||
if (!js.assumeNoPairedQuantize)
|
||||
{
|
||||
gpr.Lock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2);
|
||||
if (!jo.fastmem)
|
||||
gpr.Lock(ARM64Reg::W3);
|
||||
}
|
||||
|
||||
constexpr ARM64Reg type_reg = ARM64Reg::W0;
|
||||
constexpr ARM64Reg scale_reg = ARM64Reg::W1;
|
||||
constexpr ARM64Reg addr_reg = ARM64Reg::W2;
|
||||
const auto addr_reg = js.assumeNoPairedQuantize ? gpr.GetScopedRegWithPreference(ARM64Reg::W2) :
|
||||
Arm64RegCache::ScopedARM64Reg(ARM64Reg::W2);
|
||||
|
||||
if (inst.RA || update) // Always uses the register on update
|
||||
{
|
||||
|
@ -239,22 +235,17 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
|||
|
||||
if (js.assumeNoPairedQuantize)
|
||||
{
|
||||
BitSet32 gprs_in_use = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_in_use = fpr.GetCallerSavedUsed();
|
||||
BitSet32 scratch_gprs;
|
||||
BitSet32 scratch_fprs;
|
||||
|
||||
// Wipe the registers we are using as temporaries
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W1)] = false;
|
||||
if (!update || early_update)
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W2)] = false;
|
||||
if (!jo.fastmem)
|
||||
gprs_in_use[DecodeReg(ARM64Reg::W0)] = false;
|
||||
scratch_gprs[DecodeReg(addr_reg)] = true;
|
||||
|
||||
u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
||||
if (!w)
|
||||
flags |= BackPatchInfo::FLAG_PAIR;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, EncodeRegTo64(addr_reg), gprs_in_use,
|
||||
fprs_in_use);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, VS, addr_reg, scratch_gprs, scratch_fprs);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -280,12 +271,12 @@ void JitArm64::psq_stXX(UGeckoInstruction inst)
|
|||
MOV(gpr.R(inst.RA), addr_reg);
|
||||
}
|
||||
|
||||
gpr.Unlock(ARM64Reg::W1, ARM64Reg::W2, ARM64Reg::W30);
|
||||
fpr.Unlock(ARM64Reg::Q0);
|
||||
if (!js.assumeNoPairedQuantize || !jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W0);
|
||||
if (!js.assumeNoPairedQuantize && !jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W3);
|
||||
gpr.Unlock(ARM64Reg::W30);
|
||||
if (!js.assumeNoPairedQuantize)
|
||||
fpr.Unlock(ARM64Reg::Q1);
|
||||
{
|
||||
gpr.Unlock(ARM64Reg::W0, ARM64Reg::W1, ARM64Reg::W2);
|
||||
if (!jo.fastmem)
|
||||
gpr.Unlock(ARM64Reg::W3);
|
||||
fpr.Unlock(ARM64Reg::Q0, ARM64Reg::Q1);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <ranges>
|
||||
#include <vector>
|
||||
|
||||
#include "Common/Assert.h"
|
||||
|
@ -63,6 +64,30 @@ ARM64Reg Arm64RegCache::GetReg()
|
|||
return ARM64Reg::INVALID_REG;
|
||||
}
|
||||
|
||||
ARM64Reg Arm64RegCache::GetRegWithPreference(Arm64Gen::ARM64Reg preferred)
|
||||
{
|
||||
// In practice, the preferred register tends to be towards the end of m_host_registers,
|
||||
// so we scan through m_host_registers backwards
|
||||
for (auto& it : m_host_registers | std::views::reverse)
|
||||
{
|
||||
if (it.GetReg() == preferred)
|
||||
{
|
||||
if (it.IsLocked())
|
||||
{
|
||||
return GetReg();
|
||||
}
|
||||
else
|
||||
{
|
||||
it.Lock();
|
||||
return it.GetReg();
|
||||
}
|
||||
}
|
||||
}
|
||||
ASSERT_MSG(DYNA_REC, false, "Preferred register {:#x} is not in register cache",
|
||||
static_cast<int>(preferred));
|
||||
return ARM64Reg::INVALID_REG;
|
||||
}
|
||||
|
||||
void Arm64RegCache::UpdateLastUsed(BitSet32 regs_used)
|
||||
{
|
||||
for (size_t i = 0; i < m_guest_registers.size(); ++i)
|
||||
|
|
|
@ -183,13 +183,16 @@ public:
|
|||
// Returns a temporary register for use
|
||||
// Requires unlocking after done
|
||||
Arm64Gen::ARM64Reg GetReg();
|
||||
Arm64Gen::ARM64Reg GetRegWithPreference(Arm64Gen::ARM64Reg preferred);
|
||||
|
||||
class ScopedARM64Reg
|
||||
{
|
||||
public:
|
||||
inline ScopedARM64Reg() = default;
|
||||
ScopedARM64Reg(const ScopedARM64Reg&) = delete;
|
||||
explicit inline ScopedARM64Reg(Arm64RegCache& cache) : m_reg(cache.GetReg()), m_gpr(&cache) {}
|
||||
inline ScopedARM64Reg(Arm64RegCache& cache, Arm64Gen::ARM64Reg reg) : m_reg(reg), m_gpr(&cache)
|
||||
{
|
||||
}
|
||||
inline ScopedARM64Reg(Arm64Gen::ARM64Reg reg) : m_reg(reg) {}
|
||||
inline ScopedARM64Reg(ScopedARM64Reg&& scoped_reg) { *this = std::move(scoped_reg); }
|
||||
inline ~ScopedARM64Reg() { Unlock(); }
|
||||
|
@ -235,7 +238,11 @@ public:
|
|||
|
||||
// Returns a temporary register
|
||||
// Unlocking is implicitly handled through RAII
|
||||
inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this); }
|
||||
inline ScopedARM64Reg GetScopedReg() { return ScopedARM64Reg(*this, GetReg()); }
|
||||
inline ScopedARM64Reg GetScopedRegWithPreference(Arm64Gen::ARM64Reg preferred)
|
||||
{
|
||||
return ScopedARM64Reg(*this, GetRegWithPreference(preferred));
|
||||
}
|
||||
|
||||
void UpdateLastUsed(BitSet32 regs_used);
|
||||
|
||||
|
|
|
@ -524,12 +524,12 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
// Q0 is the return
|
||||
// Q1 is a temporary
|
||||
ARM64Reg temp_reg = ARM64Reg::X0;
|
||||
ARM64Reg addr_reg = ARM64Reg::X1;
|
||||
ARM64Reg addr_reg = ARM64Reg::W1;
|
||||
ARM64Reg scale_reg = ARM64Reg::X2;
|
||||
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 3};
|
||||
BitSet32 scratch_gprs{0, 3};
|
||||
if (!jo.memcheck)
|
||||
gprs_to_push &= ~BitSet32{1};
|
||||
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
|
||||
scratch_gprs[1] = true;
|
||||
BitSet32 scratch_fprs{0, 1};
|
||||
ARM64FloatEmitter float_emit(this);
|
||||
|
||||
const u8* start = GetCodePtr();
|
||||
|
@ -541,7 +541,7 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
|
||||
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);
|
||||
scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -550,8 +550,8 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||
|
@ -568,8 +568,8 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||
|
@ -586,8 +586,8 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||
|
@ -603,8 +603,8 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||
|
@ -622,7 +622,7 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg,
|
||||
gprs_to_push & ~BitSet32{DecodeReg(scale_reg)}, fprs_to_push, true);
|
||||
scratch_gprs | BitSet32{DecodeReg(scale_reg)}, scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -631,8 +631,8 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
float_emit.UXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||
|
@ -649,8 +649,8 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
float_emit.SXTL(8, ARM64Reg::D0, ARM64Reg::D0);
|
||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||
|
@ -667,8 +667,8 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
float_emit.UXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||
float_emit.UCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||
|
@ -684,8 +684,8 @@ void JitArm64::GenerateQuantizedLoads()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_LOAD | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
float_emit.SXTL(16, ARM64Reg::D0, ARM64Reg::D0);
|
||||
float_emit.SCVTF(32, ARM64Reg::D0, ARM64Reg::D0);
|
||||
|
@ -735,13 +735,13 @@ void JitArm64::GenerateQuantizedStores()
|
|||
// Q1 is a temporary
|
||||
ARM64Reg temp_reg = ARM64Reg::X0;
|
||||
ARM64Reg scale_reg = ARM64Reg::X1;
|
||||
ARM64Reg addr_reg = ARM64Reg::X2;
|
||||
BitSet32 gprs_to_push = CALLER_SAVED_GPRS & ~BitSet32{0, 1};
|
||||
ARM64Reg addr_reg = ARM64Reg::W2;
|
||||
BitSet32 scratch_gprs{0, 1};
|
||||
if (!jo.memcheck)
|
||||
gprs_to_push &= ~BitSet32{2};
|
||||
scratch_gprs[2] = true;
|
||||
if (!jo.fastmem)
|
||||
gprs_to_push &= ~BitSet32{3};
|
||||
BitSet32 fprs_to_push = BitSet32(0xFFFFFFFF) & ~BitSet32{0, 1};
|
||||
scratch_gprs[3] = true;
|
||||
BitSet32 scratch_fprs{0, 1};
|
||||
ARM64FloatEmitter float_emit(this);
|
||||
|
||||
const u8* start = GetCodePtr();
|
||||
|
@ -752,8 +752,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_32;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -771,8 +771,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -790,8 +790,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_8;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -808,8 +808,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -826,8 +826,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags = BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT |
|
||||
BackPatchInfo::FLAG_PAIR | BackPatchInfo::FLAG_SIZE_16;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -837,8 +837,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_32;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -856,8 +856,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -875,8 +875,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_8;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -893,8 +893,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
@ -911,8 +911,8 @@ void JitArm64::GenerateQuantizedStores()
|
|||
constexpr u32 flags =
|
||||
BackPatchInfo::FLAG_STORE | BackPatchInfo::FLAG_FLOAT | BackPatchInfo::FLAG_SIZE_16;
|
||||
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, gprs_to_push,
|
||||
fprs_to_push, true);
|
||||
EmitBackpatchRoutine(flags, MemAccessMode::Auto, ARM64Reg::D0, addr_reg, scratch_gprs,
|
||||
scratch_fprs, true);
|
||||
|
||||
RET(ARM64Reg::X30);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue