From 5eb8876bacd2712f15b28f7f022bb64fa47f5fd7 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 19 Dec 2014 20:45:55 -0600 Subject: [PATCH 1/7] [AArch64] Removes ARMv8 as a generic target. Generic flag disables the ability to have backpatching supporting. Also compiles Dolphin with CRC instructions enabled. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2039279edc..6df8a5bcb0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -164,7 +164,7 @@ if(NOT ENABLE_GENERIC) set(_M_ARM 1) set(_M_ARM_64 1) add_definitions(-D_M_ARM=1 -D_M_ARM_64=1) - set(ENABLE_GENERIC 1) + add_definitions(-march=armv8-a+crc) else() set(ENABLE_GENERIC 1) endif() From eaf17b7d7bb6cd22f368e347bd3d59c42753abc2 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 19 Dec 2014 20:49:44 -0600 Subject: [PATCH 2/7] [AArch64] Register cache improvements. Adds the ability to flush the cache and maintain state. Adds the BindToRegister ability. Sorts register usage as callee saved used first, reduces dumping pressure when jumping to external routines/interpreter. Adds a function to store a register, for use when flushing a register that won't be used during the rest of a block. --- .../PowerPC/JitArm64/JitArm64_RegCache.cpp | 81 ++++++++++++++----- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 19 ++++- 2 files changed, 80 insertions(+), 20 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp index d8d9e7ca07..fc7fc952d6 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp @@ -29,7 +29,7 @@ ARM64Reg Arm64RegCache::GetReg() } // Holy cow, how did you run out of registers? // We can't return anything reasonable in this case. Return INVALID_REG and watch the failure happen - _assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb"); + WARN_LOG(DYNA_REC, "All available registers are locked dumb dumb"); return INVALID_REG; } @@ -45,18 +45,14 @@ u32 Arm64RegCache::GetUnlockedRegisterCount() void Arm64RegCache::LockRegister(ARM64Reg host_reg) { auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg); - if (reg == m_host_registers.end()) - _assert_msg_(DYNA_REC, false, "Don't try locking a register that isn't in the cache"); - _assert_msg_(DYNA_REC, !reg->IsLocked(), "This register is already locked"); + _assert_msg_(DYNA_REC, reg == m_host_registers.end(), "Don't try locking a register that isn't in the cache"); reg->Lock(); } void Arm64RegCache::UnlockRegister(ARM64Reg host_reg) { auto reg = std::find(m_host_registers.begin(), m_host_registers.end(), host_reg); - if (reg == m_host_registers.end()) - _assert_msg_(DYNA_REC, false, "Don't try unlocking a register that isn't in the cache"); - _assert_msg_(DYNA_REC, reg->IsLocked(), "This register is already unlocked"); + _assert_msg_(DYNA_REC, reg == m_host_registers.end(), "Don't try unlocking a register that isn't in the cache"); reg->Unlock(); } @@ -75,17 +71,19 @@ bool Arm64GPRCache::IsCalleeSaved(ARM64Reg reg) return std::find(callee_regs.begin(), callee_regs.end(), EncodeRegTo64(reg)) != callee_regs.end(); } -void Arm64GPRCache::FlushRegister(u32 preg) +void Arm64GPRCache::FlushRegister(u32 preg, bool maintain_state) { OpArg& reg = m_guest_registers[preg]; if (reg.GetType() == REG_REG) { ARM64Reg host_reg = reg.GetReg(); - m_emit->STR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); - UnlockRegister(host_reg); - reg.Flush(); + if (!maintain_state) + { + UnlockRegister(host_reg); + reg.Flush(); + } } else if (reg.GetType() == REG_IMM) { @@ -103,7 +101,8 @@ void Arm64GPRCache::FlushRegister(u32 preg) UnlockRegister(host_reg); } - reg.Flush(); + if (!maintain_state) + reg.Flush(); } } @@ -126,12 +125,12 @@ void Arm64GPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op) // Has to be flushed if it isn't in a callee saved register ARM64Reg host_reg = m_guest_registers[i].GetReg(); if (flush || !IsCalleeSaved(host_reg)) - FlushRegister(i); + FlushRegister(i, mode == FLUSH_MAINTAIN_STATE); } else if (m_guest_registers[i].GetType() == REG_IMM) { if (flush) - FlushRegister(i); + FlushRegister(i, mode == FLUSH_MAINTAIN_STATE); } } } @@ -166,7 +165,7 @@ ARM64Reg Arm64GPRCache::R(u32 preg) } break; default: - _dbg_assert_msg_(DYNA_REC, false, "Invalid OpArg Type!"); + ERROR_LOG(DYNA_REC, "Invalid OpArg Type!"); break; } // We've got an issue if we end up here @@ -177,18 +176,35 @@ void Arm64GPRCache::SetImmediate(u32 preg, u32 imm) { OpArg& reg = m_guest_registers[preg]; if (reg.GetType() == REG_REG) - Unlock(reg.GetReg()); + UnlockRegister(reg.GetReg()); reg.LoadToImm(imm); } +void Arm64GPRCache::BindToRegister(u32 preg, bool do_load) +{ + OpArg& reg = m_guest_registers[preg]; + + if (reg.GetType() == REG_NOTLOADED) + { + ARM64Reg host_reg = GetReg(); + reg.LoadToReg(host_reg); + if (do_load) + m_emit->LDR(INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(gpr[preg])); + } +} + void Arm64GPRCache::GetAllocationOrder() { // Callee saved registers first in hopes that we will keep everything stored there first const std::vector allocation_order = { + // Callee saved W28, W27, W26, W25, W24, W23, W22, W21, W20, - W19, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, - W10, W11, W12, W13, W14, W15, W16, W17, W18, + W19, + + // Caller saved + W18, W17, W16, W15, W14, W13, W12, W11, W10, + W9, W8, W7, W6, W5, W4, W3, W2, W1, W0, W30, }; @@ -210,7 +226,29 @@ void Arm64GPRCache::FlushMostStaleRegister() most_stale_amount = last_used; } } - FlushRegister(most_stale_preg); + FlushRegister(most_stale_preg, false); +} + +BitSet32 Arm64GPRCache::GetCallerSavedUsed() +{ + BitSet32 registers(0); + for (auto& it : m_host_registers) + if (it.IsLocked() && !IsCalleeSaved(it.GetReg())) + registers[it.GetReg()] = 1; + return registers; +} + +void Arm64GPRCache::FlushByHost(ARM64Reg host_reg) +{ + for (int i = 0; i < 32; ++i) + { + OpArg& reg = m_guest_registers[i]; + if (reg.GetType() == REG_REG && reg.GetReg() == host_reg) + { + FlushRegister(i, false); + return; + } + } } // FPR Cache @@ -243,3 +281,8 @@ void Arm64FPRCache::FlushMostStaleRegister() // XXX: Flush a register } +void Arm64FPRCache::FlushByHost(ARM64Reg host_reg) +{ + // XXX: Scan guest registers and flush if found +} + diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 9f1c341624..d032a16f82 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -144,6 +144,7 @@ public: { for (T reg : {args...}) { + FlushByHost(reg); LockRegister(reg); } } @@ -155,6 +156,7 @@ public: { for (T reg : {args...}) { + FlushByHost(reg); UnlockRegister(reg); } } @@ -172,6 +174,9 @@ protected: // Unlock a register void UnlockRegister(ARM64Reg host_reg); + // Flushes a guest register by host provided + virtual void FlushByHost(ARM64Reg host_reg) = 0; + // Get available host registers u32 GetUnlockedRegisterCount(); @@ -208,6 +213,12 @@ public: // Gets the immediate that a register is set to u32 GetImm(u32 reg) { return m_guest_registers[reg].GetImm(); } + void BindToRegister(u32 preg, bool do_load); + + void StoreRegister(u32 preg) { FlushRegister(preg, false); } + + BitSet32 GetCallerSavedUsed(); + protected: // Get the order of the host registers void GetAllocationOrder(); @@ -215,6 +226,9 @@ protected: // Flushes the most stale register void FlushMostStaleRegister(); + // Flushes a guest register by host provided + void FlushByHost(ARM64Reg host_reg) override; + // Our guest GPRs // PowerPC has 32 GPRs OpArg m_guest_registers[32]; @@ -228,7 +242,7 @@ private: reg.IncrementLastUsed(); } - void FlushRegister(u32 preg); + void FlushRegister(u32 preg, bool maintain_state); }; class Arm64FPRCache : public Arm64RegCache @@ -249,6 +263,9 @@ protected: // Flushes the most stale register void FlushMostStaleRegister(); + // Flushes a guest register by host provided + void FlushByHost(ARM64Reg host_reg) override; + // Our guest FPRs // Gekko has 32 paired registers(32x2) OpArg m_guest_registers[32][2]; From edfbb6ab9a0f24b5890b738af801579088727cea Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 19 Dec 2014 20:52:57 -0600 Subject: [PATCH 3/7] [ARM] Adds a ArmCommon folder. Moves the backpatch flags and struct to a common location for ARM. I'm sure there will be more things that use this common ARM location in the future. --- Source/Core/Core/PowerPC/JitArm32/Jit.h | 21 ++------------- .../Core/PowerPC/JitArmCommon/BackPatch.h | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 19 deletions(-) create mode 100644 Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index 98f6884e1c..2d961a20f9 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -24,6 +24,7 @@ #include "Core/PowerPC/JitArm32/JitAsm.h" #include "Core/PowerPC/JitArm32/JitFPRCache.h" #include "Core/PowerPC/JitArm32/JitRegCache.h" +#include "Core/PowerPC/JitArmCommon/BackPatch.h" #include "Core/PowerPC/JitCommon/JitBase.h" #define PPCSTATE_OFF(elem) ((s32)STRUCT_OFF(PowerPC::ppcState, elem) - (s32)STRUCT_OFF(PowerPC::ppcState, spr[0])) @@ -48,26 +49,8 @@ private: ArmFPRCache fpr; PPCAnalyst::CodeBuffer code_buffer; - struct BackPatchInfo - { - enum - { - FLAG_STORE = (1 << 0), - FLAG_LOAD = (1 << 1), - FLAG_SIZE_8 = (1 << 2), - FLAG_SIZE_16 = (1 << 3), - FLAG_SIZE_32 = (1 << 4), - FLAG_SIZE_F32 = (1 << 5), - FLAG_SIZE_F64 = (1 << 6), - FLAG_REVERSE = (1 << 7), - FLAG_EXTEND = (1 << 8), - }; - u32 m_fastmem_size; - u32 m_fastmem_trouble_inst_offset; - u32 m_slowmem_size; - }; - // The key is the flags + // The key is the backpatch flags std::map m_backpatch_info; void DoDownCount(); diff --git a/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h new file mode 100644 index 0000000000..4ba3c4a703 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArmCommon/BackPatch.h @@ -0,0 +1,26 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#pragma once +#include "Common/CommonTypes.h" + +struct BackPatchInfo +{ + enum + { + FLAG_STORE = (1 << 0), + FLAG_LOAD = (1 << 1), + FLAG_SIZE_8 = (1 << 2), + FLAG_SIZE_16 = (1 << 3), + FLAG_SIZE_32 = (1 << 4), + FLAG_SIZE_F32 = (1 << 5), + FLAG_SIZE_F64 = (1 << 6), + FLAG_REVERSE = (1 << 7), + FLAG_EXTEND = (1 << 8), + }; + + u32 m_fastmem_size; + u32 m_fastmem_trouble_inst_offset; + u32 m_slowmem_size; +}; From 32eb0a9d686fbda080120d3363108ec255f44f62 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 19 Dec 2014 20:57:51 -0600 Subject: [PATCH 4/7] [AArch64] Removes CODE_SIZE variable. This is in a global location now which conflicts with this one. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 97f0b00336..8367fcca4e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -12,8 +12,6 @@ using namespace Arm64Gen; -static int CODE_SIZE = 1024*1024*32; - void JitArm64::Init() { AllocCodeSpace(CODE_SIZE); From 13b70c2fbd384f81376052f7e16ce138a98aea02 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 19 Dec 2014 20:59:37 -0600 Subject: [PATCH 5/7] [AArch64] Add an exception exit function with the exit location already in PC. This will be used with idle skipping later. --- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 19 +++++++++++++++++++ Source/Core/Core/PowerPC/JitArm64/Jit.h | 5 +++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 8367fcca4e..acb772d422 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -149,6 +149,25 @@ void JitArm64::WriteExceptionExit(ARM64Reg dest) BR(EncodeRegTo64(dest)); } +void JitArm64::WriteExceptionExit() +{ + DoDownCount(); + + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); + MOVI2R(XA, (u64)&PowerPC::CheckExceptions); + BLR(XA); + LDR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(npc)); + STR(INDEX_UNSIGNED, WA, X29, PPCSTATE_OFF(pc)); + + MOVI2R(XA, (u64)asm_routines.dispatcher); + BR(XA); + + gpr.Unlock(WA); +} + void JitArm64::WriteExitDestInR(ARM64Reg Reg) { STR(INDEX_UNSIGNED, Reg, X29, PPCSTATE_OFF(pc)); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index d52e7c5ba4..d8bde7dcfc 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -112,8 +112,9 @@ private: // Exits void WriteExit(u32 destination); - void WriteExceptionExit(ARM64Reg dest); - void WriteExitDestInR(ARM64Reg dest); + void WriteExceptionExit(Arm64Gen::ARM64Reg dest); + void WriteExceptionExit(); + void WriteExitDestInR(Arm64Gen::ARM64Reg dest); FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); From 17a4208fe54bea02e16fb6a4d05fe8d73b78b4e3 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 19 Dec 2014 21:08:25 -0600 Subject: [PATCH 6/7] [AArch64] Adds backpatching routines. Currently supports only integer loadstores. Floating point loadstores will come later. This system is semi based on the ARMv7 backpatching routine, where we need to initialize our backpatch routine sizes prior to actually using them so we know we won't be overwriting any memory. --- Source/Core/Core/CMakeLists.txt | 1 + Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 2 + Source/Core/Core/PowerPC/JitArm64/Jit.h | 15 +- .../PowerPC/JitArm64/JitArm64_BackPatch.cpp | 453 ++++++++++++++++++ 4 files changed, 468 insertions(+), 3 deletions(-) create mode 100644 Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 4824bc8b7e..c241a1c7ba 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -223,6 +223,7 @@ elseif(_M_ARM_64) PowerPC/JitArm64/JitAsm.cpp PowerPC/JitArm64/JitArm64Cache.cpp PowerPC/JitArm64/JitArm64_RegCache.cpp + PowerPC/JitArm64/JitArm64_BackPatch.cpp PowerPC/JitArm64/JitArm64_Branch.cpp PowerPC/JitArm64/JitArm64_Integer.cpp PowerPC/JitArm64/JitArm64_LoadStore.cpp diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index acb772d422..22d029030b 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -25,6 +25,7 @@ void JitArm64::Init() code_block.m_stats = &js.st; code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; + InitBackpatch(); } void JitArm64::ClearCache() @@ -278,6 +279,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitB js.next_inst = ops[i + 1].inst; js.next_compilerPC = ops[i + 1].address; } + if (!ops[i].skip) { if (js.memcheck && (opinfo->flags & FL_USE_FPU)) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index d8bde7dcfc..b0b207dea3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "Common/Arm64Emitter.h" #include "Core/PowerPC/CPUCoreBase.h" @@ -11,6 +13,7 @@ #include "Core/PowerPC/JitArm64/JitArm64_RegCache.h" #include "Core/PowerPC/JitArm64/JitArm64Cache.h" #include "Core/PowerPC/JitArm64/JitAsm.h" +#include "Core/PowerPC/JitArmCommon/BackPatch.h" #include "Core/PowerPC/JitCommon/JitBase.h" #define PPCSTATE_OFF(elem) ((s64)&PowerPC::ppcState.elem - (s64)&PowerPC::ppcState) @@ -31,11 +34,9 @@ public: JitBaseBlockCache *GetBlockCache() { return &blocks; } - const u8 *BackPatch(u8 *codePtr, u32 em_address, void *ctx) { return nullptr; } - bool IsInCodeSpace(u8 *ptr) { return IsInSpace(ptr); } - bool HandleFault(uintptr_t access_address, SContext* ctx) override { return false; } + bool HandleFault(uintptr_t access_address, SContext* ctx) override; void ClearCache(); @@ -106,6 +107,14 @@ private: PPCAnalyst::CodeBuffer code_buffer; + // The key is the backpatch flags + std::map m_backpatch_info; + + // Backpatching routines + bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg); + void InitBackpatch(); + u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr); + const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); void DoDownCount(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp new file mode 100644 index 0000000000..0b813e4b72 --- /dev/null +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_BackPatch.cpp @@ -0,0 +1,453 @@ +// Copyright 2014 Dolphin Emulator Project +// Licensed under GPLv2 +// Refer to the license.txt file included. + +#include + +#include "Common/CommonTypes.h" +#include "Common/StringUtil.h" + +#include "Core/HW/Memmap.h" +#include "Core/PowerPC/JitArm64/Jit.h" +#include "Core/PowerPC/JitArmCommon/BackPatch.h" + +using namespace Arm64Gen; + +static void DoBacktrace(uintptr_t access_address, SContext* ctx) +{ + for (int i = 0; i < 30; i += 2) + ERROR_LOG(DYNA_REC, "R%d: 0x%016llx\tR%d: 0x%016llx", i, ctx->CTX_REG(i), i + 1, ctx->CTX_REG(i + 1)); + + ERROR_LOG(DYNA_REC, "R30: 0x%016llx\tSP: 0x%016llx", ctx->CTX_REG(30), ctx->CTX_SP); + + ERROR_LOG(DYNA_REC, "Access Address: 0x%016lx", access_address); + ERROR_LOG(DYNA_REC, "PC: 0x%016llx", ctx->CTX_PC); + + ERROR_LOG(DYNA_REC, "Memory Around PC"); + + std::string pc_memory = ""; + for (u64 pc = (ctx->CTX_PC - 32); pc < (ctx->CTX_PC + 32); pc += 16) + { + pc_memory += StringFromFormat("%08x%08x%08x%08x", + *(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12)); + + ERROR_LOG(DYNA_REC, "0x%016lx: %08x %08x %08x %08x", + pc, *(u32*)pc, *(u32*)(pc + 4), *(u32*)(pc + 8), *(u32*)(pc + 12)); + } + + ERROR_LOG(DYNA_REC, "Full block: %s", pc_memory.c_str()); +} + +bool JitArm64::DisasmLoadStore(const u8* ptr, u32* flags, ARM64Reg* reg) +{ + u32 inst = *(u32*)ptr; + u32 prev_inst = *(u32*)(ptr - 4); + u32 next_inst = *(u32*)(ptr + 4); + + u8 op = (inst >> 22) & 0xFF; + u8 size = (inst >> 30) & 0x3; + + if (size == 0) // 8-bit + *flags |= BackPatchInfo::FLAG_SIZE_8; + else if (size == 1) // 16-bit + *flags |= BackPatchInfo::FLAG_SIZE_16; + else // 32-bit + *flags |= BackPatchInfo::FLAG_SIZE_32; + + if (op == 0xE5) // Load + { + *flags |= BackPatchInfo::FLAG_LOAD; + *reg = (ARM64Reg)(inst & 0x1F); + if ((next_inst & 0x7FFFF000) != 0x5AC00000) // REV + *flags |= BackPatchInfo::FLAG_REVERSE; + if ((next_inst & 0x7F800000) == 0x13000000) // SXTH + *flags |= BackPatchInfo::FLAG_EXTEND; + return true; + } + else if (op == 0xE4) // Store + { + *flags |= BackPatchInfo::FLAG_STORE; + + if (size == 0) // 8-bit + *reg = (ARM64Reg)(inst & 0x1F); + else // 16-bit/32-bit register is in previous REV instruction + *reg = (ARM64Reg)((prev_inst >> 5) & 0x1F); + return true; + } + + return false; +} + +u32 JitArm64::EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARM64Reg RS, ARM64Reg addr) +{ + u32 trouble_offset = 0; + const u8* code_base = emit->GetCodePtr(); + + if (fastmem) + { + MOVK(addr, ((u64)Memory::base >> 32) & 0xFFFF, SHIFT_32); + + if (flags & BackPatchInfo::FLAG_STORE && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + { + } + else if (flags & BackPatchInfo::FLAG_LOAD && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + { + } + else if (flags & BackPatchInfo::FLAG_STORE) + { + ARM64Reg temp = W0; + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->REV32(temp, RS); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->REV16(temp, RS); + + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->STR(INDEX_UNSIGNED, temp, addr, 0); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->STRH(INDEX_UNSIGNED, temp, addr, 0); + else + { + emit->STRB(INDEX_UNSIGNED, RS, addr, 0); + emit->HINT(HINT_NOP); + } + } + else + { + trouble_offset = (emit->GetCodePtr() - code_base) / 4; + + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->LDR(INDEX_UNSIGNED, RS, addr, 0); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->LDRH(INDEX_UNSIGNED, RS, addr, 0); + else if (flags & BackPatchInfo::FLAG_SIZE_8) + emit->LDRB(INDEX_UNSIGNED, RS, addr, 0); + + if (!(flags & BackPatchInfo::FLAG_REVERSE)) + { + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->REV32(RS, RS); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->REV16(RS, RS); + } + + if (flags & BackPatchInfo::FLAG_EXTEND) + emit->SXTH(RS, RS); + } + } + else + { + if (flags & BackPatchInfo::FLAG_STORE && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + { + } + else if (flags & BackPatchInfo::FLAG_LOAD && + flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) + { + } + else if (flags & BackPatchInfo::FLAG_STORE) + { + emit->MOV(W0, RS); + + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->MOVI2R(X30, (u64)&Memory::Write_U32); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->MOVI2R(X30, (u64)&Memory::Write_U16); + else + emit->MOVI2R(X30, (u64)&Memory::Write_U8); + + emit->BLR(X30); + } + else + { + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->MOVI2R(X30, (u64)&Memory::Read_U32); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->MOVI2R(X30, (u64)&Memory::Read_U16); + else if (flags & BackPatchInfo::FLAG_SIZE_8) + emit->MOVI2R(X30, (u64)&Memory::Read_U8); + + emit->BLR(X30); + + if (!(flags & BackPatchInfo::FLAG_REVERSE)) + { + emit->MOV(RS, W0); + } + else + { + if (flags & BackPatchInfo::FLAG_SIZE_32) + emit->REV32(RS, W0); + else if (flags & BackPatchInfo::FLAG_SIZE_16) + emit->REV16(RS, W0); + } + + if (flags & BackPatchInfo::FLAG_EXTEND) + emit->SXTH(RS, RS); + } + } + + if (do_padding) + { + BackPatchInfo& info = m_backpatch_info[flags]; + u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size); + + u32 code_size = emit->GetCodePtr() - code_base; + code_size /= 4; + + for (u32 i = 0; i < (num_insts_max - code_size); ++i) + emit->HINT(HINT_NOP); + } + + return trouble_offset; +} + +bool JitArm64::HandleFault(uintptr_t access_address, SContext* ctx) +{ + if (access_address < (uintptr_t)Memory::base) + { + ERROR_LOG(DYNA_REC, "Exception handler - access below memory space. PC: 0x%016llx 0x%016lx < 0x%016lx", ctx->CTX_PC, access_address, (uintptr_t)Memory::base); + + DoBacktrace(access_address, ctx); + return false; + } + + if (!IsInSpace((u8*)ctx->CTX_PC)) + { + ERROR_LOG(DYNA_REC, "Backpatch location not within codespace 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC)); + + DoBacktrace(access_address, ctx); + return false; + } + + ARM64Reg reg = INVALID_REG; + u32 flags = 0; + + if (!DisasmLoadStore((const u8*)ctx->CTX_PC, &flags, ®)) + { + ERROR_LOG(DYNA_REC, "Error disassembling address 0x%016llx(0x%08x)", ctx->CTX_PC, Common::swap32(*(u32*)ctx->CTX_PC)); + + DoBacktrace(access_address, ctx); + return false; + } + + BackPatchInfo& info = m_backpatch_info[flags]; + ARM64XEmitter emitter((u8*)(ctx->CTX_PC - info.m_fastmem_trouble_inst_offset * 4)); + u64 new_pc = (u64)emitter.GetCodePtr(); + + // Slowmem routine doesn't need the address location + // It is already in the correct location + EmitBackpatchRoutine(&emitter, flags, false, true, reg, INVALID_REG); + + emitter.FlushIcache(); + ctx->CTX_PC = new_pc; + + // Wipe the top bits of the addr_register + if (flags & BackPatchInfo::FLAG_STORE) + ctx->CTX_REG(1) &= 0xFFFFFFFFUll; + else + ctx->CTX_REG(0) &= 0xFFFFFFFFUll; + return true; +} + +void JitArm64::InitBackpatch() +{ + u32 flags = 0; + BackPatchInfo info; + u8* code_base = GetWritableCodePtr(); + u8* code_end; + + // Loads + { + // 8bit + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_8; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 16bit + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_16; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 32bit + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_32; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 16bit - Extend + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_16 | + BackPatchInfo::FLAG_EXTEND; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 16bit - Reverse + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_16 | + BackPatchInfo::FLAG_REVERSE; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 32bit - Reverse + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_32 | + BackPatchInfo::FLAG_REVERSE; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + } + + // Stores + { + // 8bit + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_8; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 16bit + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_16; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + // 32bit + { + flags = + BackPatchInfo::FLAG_STORE | + BackPatchInfo::FLAG_SIZE_32; + EmitBackpatchRoutine(this, flags, false, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, W0, X1); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } + } +} + From 4247506c00ca5f5755372af77283002729841d81 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Fri, 19 Dec 2014 21:20:46 -0600 Subject: [PATCH 7/7] [AArch64] Implements loadstore instructions in the JIT recompiler. These instructions are all implemented with fastmem support. Currently loads with update are disabled due to an issue that I've yet to figure out. I'm sure I'll figure that out later. --- Source/Core/Core/PowerPC/JitArm64/Jit.h | 5 + .../PowerPC/JitArm64/JitArm64_LoadStore.cpp | 410 ++++++++++++++++++ .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 28 +- 3 files changed, 429 insertions(+), 14 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index b0b207dea3..95bf4c35f8 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -97,6 +97,8 @@ public: // LoadStore void icbi(UGeckoInstruction inst); + void lXX(UGeckoInstruction inst); + void stX(UGeckoInstruction inst); private: Arm64GPRCache gpr; @@ -114,6 +116,9 @@ private: bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg); void InitBackpatch(); u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr); + // Loadstore routines + void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update); + void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset); const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp index 783a9ce78b..58c1523897 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStore.cpp @@ -17,6 +17,416 @@ using namespace Arm64Gen; void JitArm64::icbi(UGeckoInstruction inst) { + gpr.Flush(FlushMode::FLUSH_ALL); + fpr.Flush(FlushMode::FLUSH_ALL); + FallBackToInterpreter(inst); WriteExit(js.compilerPC + 4); } + +void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update) +{ + // We want to make sure to not get LR as a temp register + gpr.Lock(W0, W30); + + gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg); + ARM64Reg dest_reg = gpr.R(dest); + ARM64Reg up_reg = INVALID_REG; + ARM64Reg off_reg = INVALID_REG; + + if (addr != -1 && !gpr.IsImm(addr)) + up_reg = gpr.R(addr); + + if (offsetReg != -1 && !gpr.IsImm(offsetReg)) + off_reg = gpr.R(offsetReg); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + BitSet32 ignore_mask(0); + regs_in_use[W0] = 0; + regs_in_use[W30] = 0; + ignore_mask[dest_reg] = 1; + + ARM64Reg addr_reg = W0; + u32 imm_addr = 0; + bool is_immediate = false; + + if (offsetReg == -1) + { + if (addr != -1) + { + if (gpr.IsImm(addr)) + { + is_immediate = true; + imm_addr = gpr.GetImm(addr) + offset; + } + else + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, up_reg); + } + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (addr != -1) + { + if (gpr.IsImm(addr) && gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg); + } + else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg)) + { + MOVI2R(addr_reg, gpr.GetImm(addr)); + ADD(addr_reg, addr_reg, off_reg); + } + else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg)) + { + MOVI2R(addr_reg, gpr.GetImm(offsetReg)); + ADD(addr_reg, addr_reg, up_reg); + } + else + { + ADD(addr_reg, up_reg, off_reg); + } + } + else + { + if (gpr.IsImm(offsetReg)) + { + is_immediate = true; + imm_addr = gpr.GetImm(offsetReg); + } + else + { + MOV(addr_reg, off_reg); + } + } + } + + ARM64Reg XA = EncodeRegTo64(addr_reg); + + if (is_immediate) + MOVI2R(XA, imm_addr); + + if (is_immediate && Memory::IsRAMAddress(imm_addr)) + { + EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA); + + if (update) + MOVI2R(up_reg, imm_addr); + } + else + { + if (update) + MOV(up_reg, addr_reg); + + // Has a chance of being backpatched which will destroy our state + // push and pop everything in this instance + ABI_PushRegisters(regs_in_use); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + dest_reg, XA); + ABI_PopRegisters(regs_in_use, ignore_mask); + } + + gpr.Unlock(W0, W30); +} + +void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset) +{ + // We want to make sure to not get LR as a temp register + gpr.Lock(W0, W1, W30); + + ARM64Reg RS = gpr.R(value); + + ARM64Reg reg_dest = INVALID_REG; + ARM64Reg reg_off = INVALID_REG; + + if (regOffset != -1 && !gpr.IsImm(regOffset)) + reg_off = gpr.R(regOffset); + if (dest != -1 && !gpr.IsImm(dest)) + reg_dest = gpr.R(dest); + + BitSet32 regs_in_use = gpr.GetCallerSavedUsed(); + regs_in_use[W0] = 0; + regs_in_use[W1] = 0; + regs_in_use[W30] = 0; + + ARM64Reg addr_reg = W1; + + u32 imm_addr = 0; + bool is_immediate = false; + + if (regOffset == -1) + { + if (dest != -1) + { + if (gpr.IsImm(dest)) + { + is_immediate = true; + imm_addr = gpr.GetImm(dest) + offset; + } + else + { + MOVI2R(addr_reg, offset); + ADD(addr_reg, addr_reg, reg_dest); + } + } + else + { + is_immediate = true; + imm_addr = offset; + } + } + else + { + if (dest != -1) + { + if (gpr.IsImm(dest) && gpr.IsImm(regOffset)) + { + is_immediate = true; + imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset); + } + else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset)) + { + MOVI2R(addr_reg, gpr.GetImm(dest)); + ADD(addr_reg, addr_reg, reg_off); + } + else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset)) + { + MOVI2R(addr_reg, gpr.GetImm(regOffset)); + ADD(addr_reg, addr_reg, reg_dest); + } + else + { + ADD(addr_reg, reg_dest, reg_off); + } + } + else + { + if (gpr.IsImm(regOffset)) + { + is_immediate = true; + imm_addr = gpr.GetImm(regOffset); + } + else + { + MOV(addr_reg, reg_off); + } + } + } + + ARM64Reg XA = EncodeRegTo64(addr_reg); + + if (is_immediate) + MOVI2R(XA, imm_addr); + + if (is_immediate && Memory::IsRAMAddress(imm_addr)) + { + EmitBackpatchRoutine(this, flags, true, false, RS, XA); + } + else + { + // Has a chance of being backpatched which will destroy our state + // push and pop everything in this instance + ABI_PushRegisters(regs_in_use); + EmitBackpatchRoutine(this, flags, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, + RS, XA); + ABI_PopRegisters(regs_in_use); + } + + gpr.Unlock(W0, W1, W30); +} + +void JitArm64::lXX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + u32 a = inst.RA, b = inst.RB, d = inst.RD; + s32 offset = inst.SIMM_16; + s32 offsetReg = -1; + u32 flags = BackPatchInfo::FLAG_LOAD; + bool update = false; + + switch (inst.OPCD) + { + case 31: + switch (inst.SUBOP10) + { + case 55: // lwzux + update = true; + case 23: // lwzx + flags |= BackPatchInfo::FLAG_SIZE_32; + offsetReg = b; + break; + case 119: //lbzux + update = true; + case 87: // lbzx + flags |= BackPatchInfo::FLAG_SIZE_8; + offsetReg = b; + break; + case 311: // lhzux + update = true; + case 279: // lhzx + flags |= BackPatchInfo::FLAG_SIZE_16; + offsetReg = b; + break; + case 375: // lhaux + update = true; + case 343: // lhax + flags |= BackPatchInfo::FLAG_EXTEND | + BackPatchInfo::FLAG_SIZE_16; + offsetReg = b; + break; + case 534: // lwbrx + flags |= BackPatchInfo::FLAG_REVERSE | + BackPatchInfo::FLAG_SIZE_32; + break; + case 790: // lhbrx + flags |= BackPatchInfo::FLAG_REVERSE | + BackPatchInfo::FLAG_SIZE_16; + break; + } + break; + case 33: // lwzu + update = true; + case 32: // lwz + flags |= BackPatchInfo::FLAG_SIZE_32; + break; + case 35: // lbzu + update = true; + case 34: // lbz + flags |= BackPatchInfo::FLAG_SIZE_8; + break; + case 41: // lhzu + update = true; + case 40: // lhz + flags |= BackPatchInfo::FLAG_SIZE_16; + break; + case 43: // lhau + update = true; + case 42: // lha + flags |= BackPatchInfo::FLAG_EXTEND | + BackPatchInfo::FLAG_SIZE_16; + break; + } + + FALLBACK_IF(update); + + SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update); + + // LWZ idle skipping + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && + inst.OPCD == 32 && + (inst.hex & 0xFFFF0000) == 0x800D0000 && + (Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 || + (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) && + Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8) + { + // if it's still 0, we can wait until the next event + FixupBranch noIdle = CBNZ(gpr.R(d)); + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + + ARM64Reg WA = gpr.GetReg(); + ARM64Reg XA = EncodeRegTo64(WA); + + MOVI2R(XA, (u64)&PowerPC::OnIdle); + MOVI2R(W0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16); + BLR(XA); + + gpr.Unlock(WA); + WriteExceptionExit(); + + SetJumpTarget(noIdle); + + //js.compilerPC += 8; + return; + } +} + +void JitArm64::stX(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITLoadStoreOff); + + u32 a = inst.RA, b = inst.RB, s = inst.RS; + s32 offset = inst.SIMM_16; + s32 regOffset = -1; + u32 flags = BackPatchInfo::FLAG_STORE; + bool update = false; + switch (inst.OPCD) + { + case 31: + switch (inst.SUBOP10) + { + case 183: // stwux + update = true; + case 151: // stwx + flags |= BackPatchInfo::FLAG_SIZE_32; + regOffset = b; + break; + case 247: // stbux + update = true; + case 215: // stbx + flags |= BackPatchInfo::FLAG_SIZE_8; + regOffset = b; + break; + case 439: // sthux + update = true; + case 407: // sthx + flags |= BackPatchInfo::FLAG_SIZE_16; + regOffset = b; + break; + } + break; + case 37: // stwu + update = true; + case 36: // stw + flags |= BackPatchInfo::FLAG_SIZE_32; + break; + case 39: // stbu + update = true; + case 38: // stb + flags |= BackPatchInfo::FLAG_SIZE_8; + break; + case 45: // sthu + update = true; + case 44: // sth + flags |= BackPatchInfo::FLAG_SIZE_16; + break; + + } + + SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset); + + if (update) + { + ARM64Reg WA = gpr.GetReg(); + ARM64Reg RB; + ARM64Reg RA = gpr.R(a); + if (regOffset != -1) + RB = gpr.R(regOffset); + if (regOffset == -1) + { + MOVI2R(WA, offset); + ADD(RA, RA, WA); + } + else + { + ADD(RA, RA, RB); + } + gpr.Unlock(WA); + } +} diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 336bf47640..6e27274511 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -65,21 +65,21 @@ static GekkoOPTemplate primarytable[] = {28, &JitArm64::arith_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {29, &JitArm64::arith_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, - {32, &JitArm64::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {33, &JitArm64::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {34, &JitArm64::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {35, &JitArm64::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {40, &JitArm64::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {41, &JitArm64::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {42, &JitArm64::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, - {43, &JitArm64::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {32, &JitArm64::lXX}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {33, &JitArm64::lXX}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {34, &JitArm64::lXX}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {35, &JitArm64::lXX}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {40, &JitArm64::lXX}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {41, &JitArm64::lXX}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, + {42, &JitArm64::lXX}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, + {43, &JitArm64::lXX}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, - {44, &JitArm64::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {45, &JitArm64::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {36, &JitArm64::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {37, &JitArm64::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, - {38, &JitArm64::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, - {39, &JitArm64::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {44, &JitArm64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {45, &JitArm64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {36, &JitArm64::stX}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {37, &JitArm64::stX}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, + {38, &JitArm64::stX}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, + {39, &JitArm64::stX}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, {46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},