mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
PPU Analyser: compile certain functions on per-instruction basis
PPU LLVM: optimize small blocks
This commit is contained in:
parent
891ebd0cb1
commit
8a029159cd
2 changed files with 101 additions and 16 deletions
|
@ -633,7 +633,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
|||
{
|
||||
if (!_seg.addr) continue;
|
||||
|
||||
if (value >= _seg.addr && value < _seg.addr + _seg.size)
|
||||
if (value >= start && value < end)
|
||||
{
|
||||
addr_heap.emplace(value);
|
||||
break;
|
||||
|
@ -1527,6 +1527,25 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
|||
// Decompose functions to basic blocks
|
||||
for (auto&& [_, func] : as_rvalue(std::move(fmap)))
|
||||
{
|
||||
if (func.attr & ppu_attr::no_size && entry)
|
||||
{
|
||||
// Disabled for PRX for now
|
||||
const u32 lim = get_limit(func.addr);
|
||||
|
||||
ppu_log.warning("Function 0x%x will be compiled on per-instruction basis (next=0x%x)", func.addr, lim);
|
||||
|
||||
for (u32 addr = func.addr; addr < lim; addr += 4)
|
||||
{
|
||||
auto& block = fmap[addr];
|
||||
block.addr = addr;
|
||||
block.size = 4;
|
||||
block.toc = func.toc;
|
||||
block.attr = ppu_attr::no_size;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto [addr, size] : func.blocks)
|
||||
{
|
||||
if (!size)
|
||||
|
@ -1583,7 +1602,7 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
|||
case 109:
|
||||
case 110:
|
||||
{
|
||||
ppu_log.notice("Added block from reloc: 0x%x (0x%x, %u)", target, rel.addr, rel.type);
|
||||
ppu_log.trace("Added block from reloc: 0x%x (0x%x, %u) (heap=%d)", target, rel.addr, rel.type, addr_heap.count(target));
|
||||
block_queue.emplace_back(target, 0);
|
||||
block_set.emplace(target);
|
||||
continue;
|
||||
|
@ -1598,8 +1617,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
|||
u32 exp = start;
|
||||
u32 lim = end;
|
||||
|
||||
// Start with full scan
|
||||
block_queue.emplace_back(exp, lim);
|
||||
// Start with full scan (disabled for PRX for now)
|
||||
if (entry)
|
||||
{
|
||||
block_queue.emplace_back(exp, lim);
|
||||
}
|
||||
|
||||
// block_queue may grow
|
||||
for (usz i = 0; i < block_queue.size(); i++)
|
||||
|
@ -1731,6 +1753,11 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
|||
block.addr = exp;
|
||||
block.size = i_pos - exp;
|
||||
ppu_log.trace("Block __0x%x added (size=0x%x)", block.addr, block.size);
|
||||
|
||||
if (get_limit(exp) == end)
|
||||
{
|
||||
block.attr += ppu_attr::no_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1750,9 +1777,26 @@ void ppu_module::analyse(u32 lib_toc, u32 entry)
|
|||
}
|
||||
|
||||
// Convert map to vector (destructive)
|
||||
for (auto&& pair : as_rvalue(std::move(fmap)))
|
||||
for (auto&& [_, block] : as_rvalue(std::move(fmap)))
|
||||
{
|
||||
funcs.emplace_back(std::move(pair.second));
|
||||
if (block.attr & ppu_attr::no_size && block.size > 4 && entry)
|
||||
{
|
||||
// Disabled for PRX for now
|
||||
ppu_log.warning("Block 0x%x will be compiled on per-instruction basis (size=0x%x)", block.addr, block.size);
|
||||
|
||||
for (u32 addr = block.addr; addr < block.addr + block.size; addr += 4)
|
||||
{
|
||||
auto& i = funcs.emplace_back();
|
||||
i.addr = addr;
|
||||
i.size = 4;
|
||||
i.toc = block.toc;
|
||||
i.attr = ppu_attr::no_size;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
funcs.emplace_back(std::move(block));
|
||||
}
|
||||
|
||||
ppu_log.notice("Block analysis: %zu blocks (%zu enqueued)", funcs.size(), block_queue.size());
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
using namespace llvm;
|
||||
|
||||
const ppu_decoder<PPUTranslator> s_ppu_decoder;
|
||||
const ppu_decoder<ppu_itype> s_ppu_itype;
|
||||
const ppu_decoder<ppu_iname> s_ppu_iname;
|
||||
|
||||
PPUTranslator::PPUTranslator(LLVMContext& context, Module* _module, const ppu_module& info, ExecutionEngine& engine)
|
||||
|
@ -161,20 +162,60 @@ Function* PPUTranslator::Translate(const ppu_function& info)
|
|||
const u64 base = m_reloc ? m_reloc->addr : 0;
|
||||
m_addr = info.addr - base;
|
||||
|
||||
// Don't emit check in small blocks without terminator
|
||||
bool need_check = info.size >= 16;
|
||||
|
||||
for (u32 addr = m_addr; addr < m_addr + info.size; addr += 4)
|
||||
{
|
||||
const u32 op = vm::read32(vm::cast(addr + base));
|
||||
|
||||
switch (s_ppu_itype.decode(op))
|
||||
{
|
||||
case ppu_itype::UNK:
|
||||
case ppu_itype::ECIWX:
|
||||
case ppu_itype::ECOWX:
|
||||
case ppu_itype::TD:
|
||||
case ppu_itype::TDI:
|
||||
case ppu_itype::TW:
|
||||
case ppu_itype::TWI:
|
||||
case ppu_itype::B:
|
||||
case ppu_itype::BC:
|
||||
case ppu_itype::BCCTR:
|
||||
case ppu_itype::BCLR:
|
||||
case ppu_itype::SC:
|
||||
{
|
||||
need_check = true;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
m_thread = &*m_function->arg_begin();
|
||||
m_base_loaded = m_ir->CreateLoad(m_base);
|
||||
|
||||
const auto body = BasicBlock::Create(m_context, "__body", m_function);
|
||||
|
||||
// Check status register in the entry block
|
||||
const auto vstate = m_ir->CreateLoad(m_ir->CreateStructGEP(nullptr, m_thread, 1), true);
|
||||
const auto vcheck = BasicBlock::Create(m_context, "__test", m_function);
|
||||
m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), body, vcheck, m_md_likely);
|
||||
if (need_check)
|
||||
{
|
||||
// Check status register in the entry block
|
||||
const auto vstate = m_ir->CreateLoad(m_ir->CreateStructGEP(nullptr, m_thread, 1), true);
|
||||
const auto vcheck = BasicBlock::Create(m_context, "__test", m_function);
|
||||
m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), body, vcheck, m_md_likely);
|
||||
|
||||
// Create tail call to the check function
|
||||
m_ir->SetInsertPoint(vcheck);
|
||||
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCallKind(llvm::CallInst::TCK_Tail);
|
||||
m_ir->CreateRetVoid();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_ir->CreateBr(body);
|
||||
}
|
||||
|
||||
// Create tail call to the check function
|
||||
m_ir->SetInsertPoint(vcheck);
|
||||
Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCallKind(llvm::CallInst::TCK_Tail);
|
||||
m_ir->CreateRetVoid();
|
||||
m_ir->SetInsertPoint(body);
|
||||
|
||||
// Process blocks
|
||||
|
@ -2990,7 +3031,7 @@ void PPUTranslator::EQV(ppu_opcode_t op)
|
|||
|
||||
void PPUTranslator::ECIWX(ppu_opcode_t op)
|
||||
{
|
||||
SetGpr(op.rd, Call(GetType<u64>(), "__eciwx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb)));
|
||||
UNK(op);
|
||||
}
|
||||
|
||||
void PPUTranslator::LHZUX(ppu_opcode_t op)
|
||||
|
@ -3111,7 +3152,7 @@ void PPUTranslator::ORC(ppu_opcode_t op)
|
|||
|
||||
void PPUTranslator::ECOWX(ppu_opcode_t op)
|
||||
{
|
||||
Call(GetType<void>(), "__ecowx", op.ra ? m_ir->CreateAdd(GetGpr(op.ra), GetGpr(op.rb)) : GetGpr(op.rb), GetGpr(op.rs, 32));
|
||||
UNK(op);
|
||||
}
|
||||
|
||||
void PPUTranslator::STHUX(ppu_opcode_t op)
|
||||
|
|
Loading…
Add table
Reference in a new issue