From 34549445a84f07c8d3c608e7142d72d051161e1d Mon Sep 17 00:00:00 2001 From: kd-11 Date: Sun, 4 Aug 2024 18:33:28 +0300 Subject: [PATCH] Migrate PPU to post-process execution model --- rpcs3/Emu/CPU/Backends/AArch64JIT.cpp | 4 +- rpcs3/Emu/CPU/Backends/AArch64JIT.h | 8 +- rpcs3/Emu/Cell/PPUThread.cpp | 7 ++ rpcs3/Emu/Cell/PPUTranslator.cpp | 108 +++++++++++--------------- rpcs3/Emu/Cell/PPUTranslator.h | 3 - 5 files changed, 58 insertions(+), 72 deletions(-) diff --git a/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp b/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp index e2a2e0ef93..47ee1a6232 100644 --- a/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp +++ b/rpcs3/Emu/CPU/Backends/AArch64JIT.cpp @@ -23,7 +23,7 @@ namespace aarch64 using function_info_t = GHC_frame_preservation_pass::function_info_t; GHC_frame_preservation_pass::GHC_frame_preservation_pass( - gprs base_reg, + gpr base_reg, u32 hv_ctx_offset, std::function exclusion_callback) { @@ -226,7 +226,7 @@ namespace aarch64 "add x30, x%u, x30;\n" // Add to base register "ldr x30, [x30];\n", // Load x30 execution_context.hypervisor_context_offset, - execution_context.base_register); + static_cast(execution_context.base_register)); if (function_info.stack_frame_size > 0) { diff --git a/rpcs3/Emu/CPU/Backends/AArch64JIT.h b/rpcs3/Emu/CPU/Backends/AArch64JIT.h index 77ec184184..5cfe5eafd8 100644 --- a/rpcs3/Emu/CPU/Backends/AArch64JIT.h +++ b/rpcs3/Emu/CPU/Backends/AArch64JIT.h @@ -11,7 +11,7 @@ namespace aarch64 { - enum gprs : s32 + enum gpr : s32 { x0 = 0, x1, x2, x3, x4, x5, x6, x7, x8, x9, @@ -21,7 +21,7 @@ namespace aarch64 // On non-x86 architectures GHC runs stackless. SP is treated as a pointer to scratchpad memory. // This pass keeps this behavior intact while preserving the expectations of the host's C++ ABI. - class GHC_frame_preservation_pass : translator_pass + class GHC_frame_preservation_pass : public translator_pass { public: struct function_info_t @@ -46,7 +46,7 @@ namespace aarch64 struct { - gprs base_register; + gpr base_register; u32 hypervisor_context_offset; } execution_context; @@ -60,7 +60,7 @@ namespace aarch64 public: GHC_frame_preservation_pass( - gprs base_reg, + gpr base_reg, u32 hv_ctx_offset, std::function exclusion_callback = {}); ~GHC_frame_preservation_pass() = default; diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 61471ad2d4..595103173d 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -291,6 +291,13 @@ const auto ppu_gateway = build_function_asm("ppu_gateway", // and https://developer.arm.com/documentation/den0024/a/The-ABI-for-ARM-64-bit-Architecture/Register-use-in-the-AArch64-Procedure-Call-Standard/Parameters-in-general-purpose-registers // for AArch64 calling convention + // PPU function argument layout: + // x19 = m_exec + // x20 = m_thread, + // x21 = seg0 + // x22 = m_base + // x23 - x25 = gpr[0] - gpr[3] + // Push callee saved registers to the hv context // Assume our LLVM compiled code is unsafe and can clobber our stack. GHC on aarch64 treats stack as scratch. // We also want to store the register context at a fixed place so we can read the hypervisor state from any lcoation. diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 165955fe88..1ac44770d1 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -16,6 +16,10 @@ #include #include +#ifdef ARCH_ARM64 +#include "Emu/CPU/Backends/AArch64JIT.h" +#endif + using namespace llvm; const ppu_decoder s_ppu_decoder; @@ -30,6 +34,14 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo // Bind context cpu_translator::initialize(context, engine); + // Initialize transform passes +#ifdef ARCH_ARM64 + std::unique_ptr ghc_fixup_pass = std::make_unique( + aarch64::x20, ::offset32(&ppu_thread::hv_ctx)); + + register_transform_pass(ghc_fixup_pass); +#endif + // Thread context struct (TODO: safer member access) const u32 off0 = offset32(&ppu_thread::state); const u32 off1 = offset32(&ppu_thread::gpr); @@ -208,7 +220,8 @@ Function* PPUTranslator::Translate(const ppu_function& info) m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, ptr, m_ir->getInt32((+cpu_flag::wait).operator u32()), llvm::MaybeAlign{4}, llvm::AtomicOrdering::AcquireRelease); // Create tail call to the check function - VMEscape(Call(GetType(), "__check", m_thread, GetAddr())); + Call(GetType(), "__check", m_thread, GetAddr())->setTailCall(); + m_ir->CreateRetVoid(); } else { @@ -269,7 +282,7 @@ Function* PPUTranslator::Translate(const ppu_function& info) } } - run_transforms(*m_function); + replace_intrinsics(*m_function); return m_function; } @@ -320,8 +333,8 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) if (vec_addrs.empty()) { // Possible special case for no functions (allowing the do-while optimization) - m_ir->CreateRetVoid(); // FIXME: Aarch64. It should work fine as long as there is no callchain beyond this function with a ret path. - run_transforms(*m_function); + m_ir->CreateRetVoid(); + replace_intrinsics(*m_function); return m_function; } @@ -377,9 +390,9 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info) // Set insertion point to afterloop_block m_ir->SetInsertPoint(after_loop); - m_ir->CreateRetVoid(); // FIXME: Aarch64 - Should be ok as long as no ret-based callchain proceeds from here + m_ir->CreateRetVoid(); - run_transforms(*m_function); + replace_intrinsics(*m_function); return m_function; } @@ -481,8 +494,8 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) if (_target >= u32{umax}) { - auto c = Call(GetType(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr(::narrow(m_addr + base))))); - VMEscape(c); + Call(GetType(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr(::narrow(m_addr + base))))); + m_ir->CreateRetVoid(); return; } else if (_target >= caddr && _target <= cend) @@ -564,7 +577,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect) const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)}); c->setTailCallKind(llvm::CallInst::TCK_Tail); c->setCallingConv(CallingConv::GHC); - VMEscape(c); + m_ir->CreateRetVoid(); } Value* PPUTranslator::RegInit(Value*& local) @@ -778,8 +791,8 @@ void PPUTranslator::TestAborted() m_ir->SetInsertPoint(vcheck); // Create tail call to the check function - auto c = Call(GetType(), "__check", m_thread, GetAddr()); - VMEscape(c); + Call(GetType(), "__check", m_thread, GetAddr())->setTailCall(); + m_ir->CreateRetVoid(); m_ir->SetInsertPoint(body); } @@ -2205,14 +2218,16 @@ void PPUTranslator::SC(ppu_opcode_t op) if (index < 1024) { - auto c = Call(GetType(), fmt::format("%s", ppu_syscall_code(index)), m_thread); - VMEscape(c, true); + Call(GetType(), fmt::format("%s", ppu_syscall_code(index)), m_thread); + //Call(GetType(), "__escape", m_thread)->setTailCall(); + m_ir->CreateRetVoid(); return; } } - auto c = Call(GetType(), op.lev ? "__lv1call" : "__syscall", m_thread, num); - VMEscape(c, true); + Call(GetType(), op.lev ? "__lv1call" : "__syscall", m_thread, num); + //Call(GetType(), "__escape", m_thread)->setTailCall(); + m_ir->CreateRetVoid(); } void PPUTranslator::B(ppu_opcode_t op) @@ -2773,9 +2788,9 @@ void PPUTranslator::LWARX(ppu_opcode_t op) { RegStore(Trunc(GetAddr()), m_cia); FlushRegisters(); - - auto inst = Call(GetType(), "__resinterp", m_thread); - VMEscape(inst, true); + Call(GetType(), "__resinterp", m_thread); + //Call(GetType(), "__escape", m_thread)->setTailCall(); + m_ir->CreateRetVoid(); return; } @@ -2925,9 +2940,9 @@ void PPUTranslator::LDARX(ppu_opcode_t op) { RegStore(Trunc(GetAddr()), m_cia); FlushRegisters(); - - auto inst = Call(GetType(), "__resinterp", m_thread); - VMEscape(inst, true); + Call(GetType(), "__resinterp", m_thread); + //Call(GetType(), "__escape", m_thread)->setTailCall(); + m_ir->CreateRetVoid(); return; } @@ -4995,8 +5010,9 @@ void PPUTranslator::FCFID(ppu_opcode_t op) void PPUTranslator::UNK(ppu_opcode_t op) { FlushRegisters(); - auto c = Call(GetType(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode)); - VMEscape(c, true); + Call(GetType(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode)); + //Call(GetType(), "__escape", m_thread)->setTailCall(); + m_ir->CreateRetVoid(); } @@ -5275,8 +5291,9 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right) void PPUTranslator::Trap() { - auto c = Call(GetType(), "__trap", m_thread, GetAddr()); - VMEscape(c); + Call(GetType(), "__trap", m_thread, GetAddr()); + //Call(GetType(), "__escape", m_thread)->setTailCall(); + m_ir->CreateRetVoid(); } Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi) @@ -5323,42 +5340,6 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo) return nullptr; } -void PPUTranslator::VMEscape([[maybe_unused]] llvm::CallInst* tail_call, [[maybe_unused]] bool skip_flush) -{ - //if (!skip_flush) - { - // Flush - FlushRegisters(); - } - -#ifdef ARCH_X64 - // Optionally flag last call as a tail - if (tail_call) - { - tail_call->setTailCall(); - } - - // This is actually AMD64 specific but good enough for now - m_ir->CreateRetVoid(); -#else - - // Validation. Make sure we're escaping from a correct context. Only guest JIT should ever go through the "escape" gate. - const auto bb = m_ir->GetInsertPoint(); - const auto arg = llvm::dyn_cast(m_thread); - ensure(bb->getParent()->getName().str() == arg->getParent()->getName().str()); - - const u32 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs); - const std::string asm_ = fmt::format( - "ldr x20, $0;\n" - "ldr x30, [x20, #%u];\n", - hv_register_array_offset); - - LLVM_ASM(asm_, std::array{ m_thread }, "m", m_ir, m_function->getContext()); - m_ir->CreateRetVoid(); - -#endif -} - void PPUTranslator::build_interpreter() { #define BUILD_VEC_INST(i) { \ @@ -5374,8 +5355,9 @@ void PPUTranslator::build_interpreter() op.vb = 2; \ op.vc = 3; \ this->i(op); \ - VMEscape(); \ - run_transforms(*m_function); \ + FlushRegisters(); \ + m_ir->CreateRetVoid(); \ + replace_intrinsics(*m_function); \ } BUILD_VEC_INST(VADDCUW); diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index f854297b3d..a71e42a033 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -150,9 +150,6 @@ public: // Emit function call void CallFunction(u64 target, llvm::Value* indirect = nullptr); - // Emit escape sequence back to hypervisor - void VMEscape(llvm::CallInst* tail_call = nullptr, bool skip_flush = false); - // Emit state check mid-block void TestAborted();