mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 19:45:20 +00:00
Migrate PPU to post-process execution model
This commit is contained in:
parent
a976ac3353
commit
34549445a8
5 changed files with 58 additions and 72 deletions
|
@ -23,7 +23,7 @@ namespace aarch64
|
|||
using function_info_t = GHC_frame_preservation_pass::function_info_t;
|
||||
|
||||
GHC_frame_preservation_pass::GHC_frame_preservation_pass(
|
||||
gprs base_reg,
|
||||
gpr base_reg,
|
||||
u32 hv_ctx_offset,
|
||||
std::function<bool(const std::string&)> exclusion_callback)
|
||||
{
|
||||
|
@ -226,7 +226,7 @@ namespace aarch64
|
|||
"add x30, x%u, x30;\n" // Add to base register
|
||||
"ldr x30, [x30];\n", // Load x30
|
||||
execution_context.hypervisor_context_offset,
|
||||
execution_context.base_register);
|
||||
static_cast<u32>(execution_context.base_register));
|
||||
|
||||
if (function_info.stack_frame_size > 0)
|
||||
{
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
namespace aarch64
|
||||
{
|
||||
enum gprs : s32
|
||||
enum gpr : s32
|
||||
{
|
||||
x0 = 0,
|
||||
x1, x2, x3, x4, x5, x6, x7, x8, x9,
|
||||
|
@ -21,7 +21,7 @@ namespace aarch64
|
|||
|
||||
// On non-x86 architectures GHC runs stackless. SP is treated as a pointer to scratchpad memory.
|
||||
// This pass keeps this behavior intact while preserving the expectations of the host's C++ ABI.
|
||||
class GHC_frame_preservation_pass : translator_pass
|
||||
class GHC_frame_preservation_pass : public translator_pass
|
||||
{
|
||||
public:
|
||||
struct function_info_t
|
||||
|
@ -46,7 +46,7 @@ namespace aarch64
|
|||
|
||||
struct
|
||||
{
|
||||
gprs base_register;
|
||||
gpr base_register;
|
||||
u32 hypervisor_context_offset;
|
||||
} execution_context;
|
||||
|
||||
|
@ -60,7 +60,7 @@ namespace aarch64
|
|||
public:
|
||||
|
||||
GHC_frame_preservation_pass(
|
||||
gprs base_reg,
|
||||
gpr base_reg,
|
||||
u32 hv_ctx_offset,
|
||||
std::function<bool(const std::string&)> exclusion_callback = {});
|
||||
~GHC_frame_preservation_pass() = default;
|
||||
|
|
|
@ -291,6 +291,13 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
// and https://developer.arm.com/documentation/den0024/a/The-ABI-for-ARM-64-bit-Architecture/Register-use-in-the-AArch64-Procedure-Call-Standard/Parameters-in-general-purpose-registers
|
||||
// for AArch64 calling convention
|
||||
|
||||
// PPU function argument layout:
|
||||
// x19 = m_exec
|
||||
// x20 = m_thread,
|
||||
// x21 = seg0
|
||||
// x22 = m_base
|
||||
// x23 - x25 = gpr[0] - gpr[3]
|
||||
|
||||
// Push callee saved registers to the hv context
|
||||
// Assume our LLVM compiled code is unsafe and can clobber our stack. GHC on aarch64 treats stack as scratch.
|
||||
// We also want to store the register context at a fixed place so we can read the hypervisor state from any lcoation.
|
||||
|
|
|
@ -16,6 +16,10 @@
|
|||
#include <unordered_set>
|
||||
#include <span>
|
||||
|
||||
#ifdef ARCH_ARM64
|
||||
#include "Emu/CPU/Backends/AArch64JIT.h"
|
||||
#endif
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
const ppu_decoder<PPUTranslator> s_ppu_decoder;
|
||||
|
@ -30,6 +34,14 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
|
|||
// Bind context
|
||||
cpu_translator::initialize(context, engine);
|
||||
|
||||
// Initialize transform passes
|
||||
#ifdef ARCH_ARM64
|
||||
std::unique_ptr<translator_pass> ghc_fixup_pass = std::make_unique<aarch64::GHC_frame_preservation_pass>(
|
||||
aarch64::x20, ::offset32(&ppu_thread::hv_ctx));
|
||||
|
||||
register_transform_pass(ghc_fixup_pass);
|
||||
#endif
|
||||
|
||||
// Thread context struct (TODO: safer member access)
|
||||
const u32 off0 = offset32(&ppu_thread::state);
|
||||
const u32 off1 = offset32(&ppu_thread::gpr);
|
||||
|
@ -208,7 +220,8 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
|||
m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, ptr, m_ir->getInt32((+cpu_flag::wait).operator u32()), llvm::MaybeAlign{4}, llvm::AtomicOrdering::AcquireRelease);
|
||||
|
||||
// Create tail call to the check function
|
||||
VMEscape(Call(GetType<void>(), "__check", m_thread, GetAddr()));
|
||||
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -269,7 +282,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
|||
}
|
||||
}
|
||||
|
||||
run_transforms(*m_function);
|
||||
replace_intrinsics(*m_function);
|
||||
return m_function;
|
||||
}
|
||||
|
||||
|
@ -320,8 +333,8 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
|
|||
if (vec_addrs.empty())
|
||||
{
|
||||
// Possible special case for no functions (allowing the do-while optimization)
|
||||
m_ir->CreateRetVoid(); // FIXME: Aarch64. It should work fine as long as there is no callchain beyond this function with a ret path.
|
||||
run_transforms(*m_function);
|
||||
m_ir->CreateRetVoid();
|
||||
replace_intrinsics(*m_function);
|
||||
return m_function;
|
||||
}
|
||||
|
||||
|
@ -377,9 +390,9 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
|
|||
// Set insertion point to afterloop_block
|
||||
m_ir->SetInsertPoint(after_loop);
|
||||
|
||||
m_ir->CreateRetVoid(); // FIXME: Aarch64 - Should be ok as long as no ret-based callchain proceeds from here
|
||||
m_ir->CreateRetVoid();
|
||||
|
||||
run_transforms(*m_function);
|
||||
replace_intrinsics(*m_function);
|
||||
return m_function;
|
||||
}
|
||||
|
||||
|
@ -481,8 +494,8 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
|||
|
||||
if (_target >= u32{umax})
|
||||
{
|
||||
auto c = Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr<u32>(::narrow<u32>(m_addr + base)))));
|
||||
VMEscape(c);
|
||||
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr<u32>(::narrow<u32>(m_addr + base)))));
|
||||
m_ir->CreateRetVoid();
|
||||
return;
|
||||
}
|
||||
else if (_target >= caddr && _target <= cend)
|
||||
|
@ -564,7 +577,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
|||
const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)});
|
||||
c->setTailCallKind(llvm::CallInst::TCK_Tail);
|
||||
c->setCallingConv(CallingConv::GHC);
|
||||
VMEscape(c);
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
|
||||
Value* PPUTranslator::RegInit(Value*& local)
|
||||
|
@ -778,8 +791,8 @@ void PPUTranslator::TestAborted()
|
|||
m_ir->SetInsertPoint(vcheck);
|
||||
|
||||
// Create tail call to the check function
|
||||
auto c = Call(GetType<void>(), "__check", m_thread, GetAddr());
|
||||
VMEscape(c);
|
||||
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->SetInsertPoint(body);
|
||||
}
|
||||
|
||||
|
@ -2205,14 +2218,16 @@ void PPUTranslator::SC(ppu_opcode_t op)
|
|||
|
||||
if (index < 1024)
|
||||
{
|
||||
auto c = Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
|
||||
VMEscape(c, true);
|
||||
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto c = Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
|
||||
VMEscape(c, true);
|
||||
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
|
||||
void PPUTranslator::B(ppu_opcode_t op)
|
||||
|
@ -2773,9 +2788,9 @@ void PPUTranslator::LWARX(ppu_opcode_t op)
|
|||
{
|
||||
RegStore(Trunc(GetAddr()), m_cia);
|
||||
FlushRegisters();
|
||||
|
||||
auto inst = Call(GetType<void>(), "__resinterp", m_thread);
|
||||
VMEscape(inst, true);
|
||||
Call(GetType<void>(), "__resinterp", m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2925,9 +2940,9 @@ void PPUTranslator::LDARX(ppu_opcode_t op)
|
|||
{
|
||||
RegStore(Trunc(GetAddr()), m_cia);
|
||||
FlushRegisters();
|
||||
|
||||
auto inst = Call(GetType<void>(), "__resinterp", m_thread);
|
||||
VMEscape(inst, true);
|
||||
Call(GetType<void>(), "__resinterp", m_thread);
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -4995,8 +5010,9 @@ void PPUTranslator::FCFID(ppu_opcode_t op)
|
|||
void PPUTranslator::UNK(ppu_opcode_t op)
|
||||
{
|
||||
FlushRegisters();
|
||||
auto c = Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
|
||||
VMEscape(c, true);
|
||||
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
|
||||
|
||||
|
@ -5275,8 +5291,9 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
|
|||
|
||||
void PPUTranslator::Trap()
|
||||
{
|
||||
auto c = Call(GetType<void>(), "__trap", m_thread, GetAddr());
|
||||
VMEscape(c);
|
||||
Call(GetType<void>(), "__trap", m_thread, GetAddr());
|
||||
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
|
||||
Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi)
|
||||
|
@ -5323,42 +5340,6 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void PPUTranslator::VMEscape([[maybe_unused]] llvm::CallInst* tail_call, [[maybe_unused]] bool skip_flush)
|
||||
{
|
||||
//if (!skip_flush)
|
||||
{
|
||||
// Flush
|
||||
FlushRegisters();
|
||||
}
|
||||
|
||||
#ifdef ARCH_X64
|
||||
// Optionally flag last call as a tail
|
||||
if (tail_call)
|
||||
{
|
||||
tail_call->setTailCall();
|
||||
}
|
||||
|
||||
// This is actually AMD64 specific but good enough for now
|
||||
m_ir->CreateRetVoid();
|
||||
#else
|
||||
|
||||
// Validation. Make sure we're escaping from a correct context. Only guest JIT should ever go through the "escape" gate.
|
||||
const auto bb = m_ir->GetInsertPoint();
|
||||
const auto arg = llvm::dyn_cast<llvm::Argument>(m_thread);
|
||||
ensure(bb->getParent()->getName().str() == arg->getParent()->getName().str());
|
||||
|
||||
const u32 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
|
||||
const std::string asm_ = fmt::format(
|
||||
"ldr x20, $0;\n"
|
||||
"ldr x30, [x20, #%u];\n",
|
||||
hv_register_array_offset);
|
||||
|
||||
LLVM_ASM(asm_, std::array{ m_thread }, "m", m_ir, m_function->getContext());
|
||||
m_ir->CreateRetVoid();
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void PPUTranslator::build_interpreter()
|
||||
{
|
||||
#define BUILD_VEC_INST(i) { \
|
||||
|
@ -5374,8 +5355,9 @@ void PPUTranslator::build_interpreter()
|
|||
op.vb = 2; \
|
||||
op.vc = 3; \
|
||||
this->i(op); \
|
||||
VMEscape(); \
|
||||
run_transforms(*m_function); \
|
||||
FlushRegisters(); \
|
||||
m_ir->CreateRetVoid(); \
|
||||
replace_intrinsics(*m_function); \
|
||||
}
|
||||
|
||||
BUILD_VEC_INST(VADDCUW);
|
||||
|
|
|
@ -150,9 +150,6 @@ public:
|
|||
// Emit function call
|
||||
void CallFunction(u64 target, llvm::Value* indirect = nullptr);
|
||||
|
||||
// Emit escape sequence back to hypervisor
|
||||
void VMEscape(llvm::CallInst* tail_call = nullptr, bool skip_flush = false);
|
||||
|
||||
// Emit state check mid-block
|
||||
void TestAborted();
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue