mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-19 19:15:26 +00:00
Separate segment and function addresses in PPU LLVM
This commit is contained in:
parent
d1b047b5f1
commit
9d2acf3b1c
4 changed files with 54 additions and 37 deletions
|
@ -225,13 +225,13 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
c.mov(x86::rbp, args[0]);
|
||||
c.mov(x86::edx, x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia))); // Load PC
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r13, x86::edx, 1, 0)); // Load call target
|
||||
c.mov(x86::rdx, x86::rax);
|
||||
c.shl(x86::rax, 16);
|
||||
c.shr(x86::rax, 16);
|
||||
c.shr(x86::rdx, 48);
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::r13, x86::rdx, 1, 0)); // Load call target
|
||||
c.movabs(x86::r12, vm::g_exec_addr_seg_offset);
|
||||
c.add(x86::r12, x86::r13);
|
||||
c.shr(x86::edx, 1);
|
||||
c.mov(x86::edx, x86::word_ptr(x86::r12, x86::edx)); // Load relocation base
|
||||
c.shl(x86::edx, 13);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
c.mov(x86::r12d, x86::edx); // Set relocation base
|
||||
|
||||
c.movabs(x86::rbx, reinterpret_cast<u64>(&vm::g_base_addr));
|
||||
c.mov(x86::rbx, x86::qword_ptr(x86::rbx));
|
||||
|
@ -348,14 +348,11 @@ const auto ppu_gateway = build_function_asm<void(*)(ppu_thread*)>("ppu_gateway",
|
|||
c.ldr(call_target, arm::Mem(a64::x19, pc));
|
||||
// Compute REG_Hp
|
||||
const arm::GpX reg_hp = a64::x21;
|
||||
c.mov(reg_hp, call_target);
|
||||
c.lsr(reg_hp, reg_hp, 48);
|
||||
c.mov(reg_hp, Imm(vm::g_exec_addr_seg_offset));
|
||||
c.add(reg_hp, reg_hp, pc, arm::Shift(arm::ShiftOp::kLSR, 2));
|
||||
c.ldrh(reg_hp.w(), arm::Mem(a64::x19, reg_hp));
|
||||
c.lsl(reg_hp.w(), reg_hp.w(), 13);
|
||||
|
||||
// Zero top 16 bits of call target
|
||||
c.lsl(call_target, call_target, Imm(16));
|
||||
c.lsr(call_target, call_target, Imm(16));
|
||||
|
||||
// Load registers
|
||||
c.mov(a64::x22, Imm(reinterpret_cast<u64>(&vm::g_base_addr)));
|
||||
c.ldr(a64::x22, arm::Mem(a64::x22));
|
||||
|
@ -475,6 +472,11 @@ static inline u8* ppu_ptr(u32 addr)
|
|||
return vm::g_exec_addr + u64{addr} * 2;
|
||||
}
|
||||
|
||||
static inline u8* ppu_seg_ptr(u32 addr)
|
||||
{
|
||||
return vm::g_exec_addr + vm::g_exec_addr_seg_offset + (addr >> 1);
|
||||
}
|
||||
|
||||
static inline ppu_intrp_func_t ppu_read(u32 addr)
|
||||
{
|
||||
return read_from_ptr<ppu_intrp_func_t>(ppu_ptr(addr));
|
||||
|
@ -520,7 +522,7 @@ void ppu_recompiler_fallback(ppu_thread& ppu)
|
|||
|
||||
while (true)
|
||||
{
|
||||
if (uptr func = uptr(ppu_read(ppu.cia)); (func << 16 >> 16) != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
if (uptr func = uptr(ppu_read(ppu.cia)); func != reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
{
|
||||
// We found a recompiler function at cia, return
|
||||
break;
|
||||
|
@ -775,6 +777,9 @@ extern void ppu_register_range(u32 addr, u32 size)
|
|||
utils::memory_commit(ppu_ptr(addr), u64{size} * 2, utils::protection::rw);
|
||||
ensure(vm::page_protect(addr, size, 0, vm::page_executable));
|
||||
|
||||
// Segment data
|
||||
utils::memory_commit(ppu_seg_ptr(addr), size >> 1, utils::protection::rw);
|
||||
|
||||
if (g_cfg.core.ppu_debug)
|
||||
{
|
||||
utils::memory_commit(vm::g_stat_addr + addr, size);
|
||||
|
@ -787,12 +792,13 @@ extern void ppu_register_range(u32 addr, u32 size)
|
|||
if (g_cfg.core.ppu_decoder == ppu_decoder_type::llvm)
|
||||
{
|
||||
// Assume addr is the start of first segment of PRX
|
||||
const uptr entry_value = reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc) | (seg_base << (32 + 3));
|
||||
write_to_ptr<uptr>(ppu_ptr(addr), entry_value);
|
||||
write_to_ptr<uptr>(ppu_ptr(addr), std::bit_cast<uptr>(ppu_recompiler_fallback_ghc));
|
||||
write_to_ptr<u16>(ppu_seg_ptr(addr), static_cast<u16>(seg_base >> 13));
|
||||
}
|
||||
else
|
||||
{
|
||||
write_to_ptr<ppu_intrp_func_t>(ppu_ptr(addr), ppu_fallback);
|
||||
write_to_ptr<u16>(ppu_seg_ptr(addr), 0);
|
||||
}
|
||||
|
||||
addr += 4;
|
||||
|
@ -807,7 +813,7 @@ extern void ppu_register_function_at(u32 addr, u32 size, ppu_intrp_func_t ptr =
|
|||
// Initialize specific function
|
||||
if (ptr)
|
||||
{
|
||||
write_to_ptr<uptr>(ppu_ptr(addr), (reinterpret_cast<uptr>(ptr) & 0xffff'ffff'ffffu) | (uptr(ppu_read(addr)) & ~0xffff'ffff'ffffu));
|
||||
write_to_ptr<uptr>(ppu_ptr(addr), std::bit_cast<uptr>(ptr));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -5087,14 +5093,14 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
c.mov(x86::rax, x86::qword_ptr(x86::rax));
|
||||
c.mov(x86::dword_ptr(x86::rbp, ::offset32(&ppu_thread::cia)), x86::edx);
|
||||
|
||||
c.mov(x86::rax, x86::qword_ptr(x86::rax, x86::rdx, 1, 0)); // Load call target
|
||||
c.mov(x86::rdx, x86::rax);
|
||||
c.shl(x86::rax, 16);
|
||||
c.shr(x86::rax, 16);
|
||||
c.shr(x86::rdx, 48);
|
||||
c.mov(x86::rcx, x86::qword_ptr(x86::rax, x86::rdx, 1, 0)); // Load call target
|
||||
c.movabs(x86::r12, vm::g_exec_addr_seg_offset);
|
||||
c.add(x86::rax, x86::r12);
|
||||
c.shr(x86::edx, 1);
|
||||
c.mov(x86::edx, x86::word_ptr(x86::rax, x86::edx)); // Load relocation base
|
||||
c.shl(x86::edx, 13);
|
||||
c.mov(x86::r12d, x86::edx); // Load relocation base
|
||||
c.jmp(x86::rax);
|
||||
c.mov(x86::r12d, x86::edx); // Set relocation base
|
||||
c.jmp(x86::rcx);
|
||||
#else
|
||||
// Load REG_Base - use absolute jump target to bypass rel jmp range limits
|
||||
// X19 contains vm::g_exec_addr
|
||||
|
@ -5130,14 +5136,11 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
// Compute REG_Hp
|
||||
const arm::GpX reg_hp = a64::x21;
|
||||
c.mov(reg_hp, call_target);
|
||||
c.lsr(reg_hp, reg_hp, 48);
|
||||
c.mov(reg_hp, Imm(vm::g_exec_addr_seg_offset));
|
||||
c.add(reg_hp, reg_hp, pc, arm::Shift(arm::ShiftOp::kLSR, 2));
|
||||
c.ldrh(reg_hp.w(), arm::Mem(exec_addr, reg_hp));
|
||||
c.lsl(reg_hp.w(), reg_hp.w(), 13);
|
||||
|
||||
// Zero top 16 bits of call target
|
||||
c.lsl(call_target, call_target, 16);
|
||||
c.lsr(call_target, call_target, 16);
|
||||
|
||||
// Execute LLE call
|
||||
c.br(call_target);
|
||||
#endif
|
||||
|
@ -5405,7 +5408,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
settings += ppu_settings::contains_symbol_resolver; // Avoid invalidating all modules for this purpose
|
||||
|
||||
// Write version, hash, CPU, settings
|
||||
fmt::append(obj_name, "v6-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
fmt::append(obj_name, "v7-kusa-%s-%s-%s.obj", fmt::base57(output, 16), fmt::base57(settings), jit_compiler::cpu(g_cfg.core.llvm_cpu));
|
||||
}
|
||||
|
||||
if (cpu ? cpu->state.all_of(cpu_flag::exit) : Emu.IsStopped())
|
||||
|
@ -5717,7 +5720,7 @@ bool ppu_initialize(const ppu_module<lv2_obj>& info, bool check_only, u64 file_s
|
|||
|
||||
for (u32 addr = info.segs[0].addr; addr < info.segs[0].addr + info.segs[0].size; addr += 4, inst_ptr++)
|
||||
{
|
||||
if (*inst_ptr == ppu_instructions::BLR() && (reinterpret_cast<uptr>(ppu_read(addr)) << 16 >> 16) == reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
if (*inst_ptr == ppu_instructions::BLR() && reinterpret_cast<uptr>(ppu_read(addr)) == reinterpret_cast<uptr>(ppu_recompiler_fallback_ghc))
|
||||
{
|
||||
write_to_ptr<ppu_intrp_func_t>(ppu_ptr(addr), BLR_func);
|
||||
}
|
||||
|
|
|
@ -411,12 +411,19 @@ Function* PPUTranslator::GetSymbolResolver(const ppu_module<lv2_obj>& info)
|
|||
|
||||
const auto faddr = m_ir->CreateLoad(ptr_inst->getResultElementType(), ptr_inst);
|
||||
const auto faddr_int = m_ir->CreatePtrToInt(faddr, get_type<uptr>());
|
||||
const auto fval = m_ir->CreateOr(m_ir->CreateShl(m_seg0, 32 + 3), faddr_int);
|
||||
const auto pos = m_ir->CreateShl(m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc, 1);
|
||||
const auto pos_32 = m_reloc ? m_ir->CreateAdd(func_pc, m_seg0) : func_pc;
|
||||
const auto pos = m_ir->CreateShl(pos_32, 1);
|
||||
const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), m_exec, pos));
|
||||
|
||||
const auto seg_base_ptr = m_ir->CreateIntToPtr(m_ir->CreateAdd(
|
||||
m_ir->CreatePtrToInt(m_exec, get_type<u64>()), m_ir->getInt64(vm::g_exec_addr_seg_offset)), m_exec->getType());
|
||||
const auto seg_pos = m_ir->CreateLShr(pos_32, 1);
|
||||
const auto seg_ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), seg_base_ptr, seg_pos));
|
||||
const auto seg_val = m_ir->CreateTrunc(m_ir->CreateLShr(m_seg0, 13), get_type<u16>());
|
||||
|
||||
// Store to jumptable
|
||||
m_ir->CreateStore(fval, ptr);
|
||||
m_ir->CreateStore(faddr_int, ptr);
|
||||
m_ir->CreateStore(seg_val, seg_ptr);
|
||||
|
||||
// Increment index and branch back to loop
|
||||
const auto post_add = m_ir->CreateAdd(index_value, m_ir->getInt64(1));
|
||||
|
@ -605,10 +612,15 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
|
|||
const auto pos = m_ir->CreateShl(indirect, 1);
|
||||
const auto ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), m_exec, pos));
|
||||
const auto val = m_ir->CreateLoad(get_type<u64>(), ptr);
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(m_ir->CreateAnd(val, 0xffff'ffff'ffff), type->getPointerTo()));
|
||||
callee = FunctionCallee(type, m_ir->CreateIntToPtr(val, type->getPointerTo()));
|
||||
|
||||
// Load new segment address
|
||||
seg0 = m_ir->CreateShl(m_ir->CreateLShr(val, 48), 13);
|
||||
const auto seg_base_ptr = m_ir->CreateIntToPtr(m_ir->CreateAdd(
|
||||
m_ir->CreatePtrToInt(m_exec, get_type<u64>()), m_ir->getInt64(vm::g_exec_addr_seg_offset)), m_exec->getType());
|
||||
const auto seg_pos = m_ir->CreateLShr(indirect, 1);
|
||||
const auto seg_ptr = dyn_cast<GetElementPtrInst>(m_ir->CreateGEP(get_type<u8>(), seg_base_ptr, seg_pos));
|
||||
const auto seg_val = m_ir->CreateZExt(m_ir->CreateLoad(get_type<u16>(), seg_ptr), get_type<u64>());
|
||||
seg0 = m_ir->CreateShl(seg_val, 13);
|
||||
}
|
||||
|
||||
m_ir->SetInsertPoint(block);
|
||||
|
|
|
@ -47,7 +47,7 @@ namespace vm
|
|||
u8* const g_sudo_addr = g_base_addr + 0x1'0000'0000;
|
||||
|
||||
// Auxiliary virtual memory for executable areas
|
||||
u8* const g_exec_addr = memory_reserve_4GiB(g_sudo_addr, 0x200000000);
|
||||
u8* const g_exec_addr = memory_reserve_4GiB(g_sudo_addr, 0x300000000);
|
||||
|
||||
// Hooks for memory R/W interception (default: zero offset to some function with only ret instructions)
|
||||
u8* const g_hook_addr = memory_reserve_4GiB(g_exec_addr, 0x800000000);
|
||||
|
|
|
@ -34,6 +34,8 @@ namespace vm
|
|||
extern u8* const g_free_addr;
|
||||
extern u8 g_reservations[65536 / 128 * 64];
|
||||
|
||||
static constexpr u64 g_exec_addr_seg_offset = 0x2'0000'0000ULL;
|
||||
|
||||
struct writer_lock;
|
||||
|
||||
enum memory_location_t : uint
|
||||
|
|
Loading…
Add table
Reference in a new issue