Migrate PPU to post-process execution model

This commit is contained in:
kd-11 2024-08-04 18:33:28 +03:00 committed by kd-11
parent a976ac3353
commit 34549445a8
5 changed files with 58 additions and 72 deletions

View file

@ -23,7 +23,7 @@ namespace aarch64
using function_info_t = GHC_frame_preservation_pass::function_info_t;
GHC_frame_preservation_pass::GHC_frame_preservation_pass(
gprs base_reg,
gpr base_reg,
u32 hv_ctx_offset,
std::function<bool(const std::string&)> exclusion_callback)
{
@ -226,7 +226,7 @@ namespace aarch64
"add x30, x%u, x30;\n" // Add to base register
"ldr x30, [x30];\n", // Load x30
execution_context.hypervisor_context_offset,
execution_context.base_register);
static_cast<u32>(execution_context.base_register));
if (function_info.stack_frame_size > 0)
{

View file

@ -11,7 +11,7 @@
namespace aarch64
{
enum gprs : s32
enum gpr : s32
{
x0 = 0,
x1, x2, x3, x4, x5, x6, x7, x8, x9,
@ -21,7 +21,7 @@ namespace aarch64
// On non-x86 architectures GHC runs stackless. SP is treated as a pointer to scratchpad memory.
// This pass keeps this behavior intact while preserving the expectations of the host's C++ ABI.
class GHC_frame_preservation_pass : translator_pass
class GHC_frame_preservation_pass : public translator_pass
{
public:
struct function_info_t
@ -46,7 +46,7 @@ namespace aarch64
struct
{
gprs base_register;
gpr base_register;
u32 hypervisor_context_offset;
} execution_context;
@ -60,7 +60,7 @@ namespace aarch64
public:
GHC_frame_preservation_pass(
gprs base_reg,
gpr base_reg,
u32 hv_ctx_offset,
std::function<bool(const std::string&)> exclusion_callback = {});
~GHC_frame_preservation_pass() = default;

View file

@ -291,6 +291,13 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
// and https://developer.arm.com/documentation/den0024/a/The-ABI-for-ARM-64-bit-Architecture/Register-use-in-the-AArch64-Procedure-Call-Standard/Parameters-in-general-purpose-registers
// for AArch64 calling convention
// PPU function argument layout:
// x19 = m_exec
// x20 = m_thread,
// x21 = seg0
// x22 = m_base
// x23 - x25 = gpr[0] - gpr[3]
// Push callee saved registers to the hv context
// Assume our LLVM compiled code is unsafe and can clobber our stack. GHC on aarch64 treats stack as scratch.
// We also want to store the register context at a fixed place so we can read the hypervisor state from any lcoation.

View file

@ -16,6 +16,10 @@
#include <unordered_set>
#include <span>
#ifdef ARCH_ARM64
#include "Emu/CPU/Backends/AArch64JIT.h"
#endif
using namespace llvm;
const ppu_decoder<PPUTranslator> s_ppu_decoder;
@ -30,6 +34,14 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_mo
// Bind context
cpu_translator::initialize(context, engine);
// Initialize transform passes
#ifdef ARCH_ARM64
std::unique_ptr<translator_pass> ghc_fixup_pass = std::make_unique<aarch64::GHC_frame_preservation_pass>(
aarch64::x20, ::offset32(&ppu_thread::hv_ctx));
register_transform_pass(ghc_fixup_pass);
#endif
// Thread context struct (TODO: safer member access)
const u32 off0 = offset32(&ppu_thread::state);
const u32 off1 = offset32(&ppu_thread::gpr);
@ -208,7 +220,8 @@ Function* PPUTranslator::Translate(const ppu_function& info)
m_ir->CreateAtomicRMW(llvm::AtomicRMWInst::Or, ptr, m_ir->getInt32((+cpu_flag::wait).operator u32()), llvm::MaybeAlign{4}, llvm::AtomicOrdering::AcquireRelease);
// Create tail call to the check function
VMEscape(Call(GetType<void>(), "__check", m_thread, GetAddr()));
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
m_ir->CreateRetVoid();
}
else
{
@ -269,7 +282,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
}
}
run_transforms(*m_function);
replace_intrinsics(*m_function);
return m_function;
}
@ -320,8 +333,8 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
if (vec_addrs.empty())
{
// Possible special case for no functions (allowing the do-while optimization)
m_ir->CreateRetVoid(); // FIXME: Aarch64. It should work fine as long as there is no callchain beyond this function with a ret path.
run_transforms(*m_function);
m_ir->CreateRetVoid();
replace_intrinsics(*m_function);
return m_function;
}
@ -377,9 +390,9 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module& info)
// Set insertion point to afterloop_block
m_ir->SetInsertPoint(after_loop);
m_ir->CreateRetVoid(); // FIXME: Aarch64 - Should be ok as long as no ret-based callchain proceeds from here
m_ir->CreateRetVoid();
run_transforms(*m_function);
replace_intrinsics(*m_function);
return m_function;
}
@ -481,8 +494,8 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
if (_target >= u32{umax})
{
auto c = Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr<u32>(::narrow<u32>(m_addr + base)))));
VMEscape(c);
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(*ensure(m_info.get_ptr<u32>(::narrow<u32>(m_addr + base)))));
m_ir->CreateRetVoid();
return;
}
else if (_target >= caddr && _target <= cend)
@ -564,7 +577,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
const auto c = m_ir->CreateCall(callee, {m_exec, m_thread, seg0, m_base, GetGpr(0), GetGpr(1), GetGpr(2)});
c->setTailCallKind(llvm::CallInst::TCK_Tail);
c->setCallingConv(CallingConv::GHC);
VMEscape(c);
m_ir->CreateRetVoid();
}
Value* PPUTranslator::RegInit(Value*& local)
@ -778,8 +791,8 @@ void PPUTranslator::TestAborted()
m_ir->SetInsertPoint(vcheck);
// Create tail call to the check function
auto c = Call(GetType<void>(), "__check", m_thread, GetAddr());
VMEscape(c);
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
m_ir->CreateRetVoid();
m_ir->SetInsertPoint(body);
}
@ -2205,14 +2218,16 @@ void PPUTranslator::SC(ppu_opcode_t op)
if (index < 1024)
{
auto c = Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
VMEscape(c, true);
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return;
}
}
auto c = Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
VMEscape(c, true);
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
}
void PPUTranslator::B(ppu_opcode_t op)
@ -2773,9 +2788,9 @@ void PPUTranslator::LWARX(ppu_opcode_t op)
{
RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters();
auto inst = Call(GetType<void>(), "__resinterp", m_thread);
VMEscape(inst, true);
Call(GetType<void>(), "__resinterp", m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return;
}
@ -2925,9 +2940,9 @@ void PPUTranslator::LDARX(ppu_opcode_t op)
{
RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters();
auto inst = Call(GetType<void>(), "__resinterp", m_thread);
VMEscape(inst, true);
Call(GetType<void>(), "__resinterp", m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
return;
}
@ -4995,8 +5010,9 @@ void PPUTranslator::FCFID(ppu_opcode_t op)
void PPUTranslator::UNK(ppu_opcode_t op)
{
FlushRegisters();
auto c = Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
VMEscape(c, true);
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
}
@ -5275,8 +5291,9 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
void PPUTranslator::Trap()
{
auto c = Call(GetType<void>(), "__trap", m_thread, GetAddr());
VMEscape(c);
Call(GetType<void>(), "__trap", m_thread, GetAddr());
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid();
}
Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi)
@ -5323,42 +5340,6 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
return nullptr;
}
void PPUTranslator::VMEscape([[maybe_unused]] llvm::CallInst* tail_call, [[maybe_unused]] bool skip_flush)
{
//if (!skip_flush)
{
// Flush
FlushRegisters();
}
#ifdef ARCH_X64
// Optionally flag last call as a tail
if (tail_call)
{
tail_call->setTailCall();
}
// This is actually AMD64 specific but good enough for now
m_ir->CreateRetVoid();
#else
// Validation. Make sure we're escaping from a correct context. Only guest JIT should ever go through the "escape" gate.
const auto bb = m_ir->GetInsertPoint();
const auto arg = llvm::dyn_cast<llvm::Argument>(m_thread);
ensure(bb->getParent()->getName().str() == arg->getParent()->getName().str());
const u32 hv_register_array_offset = ::offset32(&ppu_thread::hv_ctx, &rpcs3::hypervisor_context_t::regs);
const std::string asm_ = fmt::format(
"ldr x20, $0;\n"
"ldr x30, [x20, #%u];\n",
hv_register_array_offset);
LLVM_ASM(asm_, std::array{ m_thread }, "m", m_ir, m_function->getContext());
m_ir->CreateRetVoid();
#endif
}
void PPUTranslator::build_interpreter()
{
#define BUILD_VEC_INST(i) { \
@ -5374,8 +5355,9 @@ void PPUTranslator::build_interpreter()
op.vb = 2; \
op.vc = 3; \
this->i(op); \
VMEscape(); \
run_transforms(*m_function); \
FlushRegisters(); \
m_ir->CreateRetVoid(); \
replace_intrinsics(*m_function); \
}
BUILD_VEC_INST(VADDCUW);

View file

@ -150,9 +150,6 @@ public:
// Emit function call
void CallFunction(u64 target, llvm::Value* indirect = nullptr);
// Emit escape sequence back to hypervisor
void VMEscape(llvm::CallInst* tail_call = nullptr, bool skip_flush = false);
// Emit state check mid-block
void TestAborted();