mirror of
https://github.com/RPCS3/rpcs3.git
synced 2025-04-20 11:36:13 +00:00
PPU LLVM: New "stack" strategy
Minor pessimization: all memory accesses are volatile Special handling of prologues and epilogues Minor optimizing assumption for SP
This commit is contained in:
parent
712c04b2ad
commit
5c9f83c3a8
3 changed files with 47 additions and 65 deletions
|
@ -585,6 +585,32 @@ extern void ppu_initialize(const std::string& name, const std::vector<ppu_functi
|
|||
ReplaceInstWithInst(ci, CallInst::Create(f, {ci->getArgOperand(0)}));
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (const auto li = dyn_cast<LoadInst>(inst))
|
||||
{
|
||||
// TODO: more careful check
|
||||
if (li->getNumUses() == 0)
|
||||
{
|
||||
// Remove unreferenced volatile loads
|
||||
li->eraseFromParent();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (const auto si = dyn_cast<StoreInst>(inst))
|
||||
{
|
||||
// TODO: more careful check
|
||||
if (isa<UndefValue>(si->getOperand(0)) && si->getParent() == &func->getEntryBlock())
|
||||
{
|
||||
// Remove undef volatile stores
|
||||
si->eraseFromParent();
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -595,6 +621,7 @@ extern void ppu_initialize(const std::string& name, const std::vector<ppu_functi
|
|||
// Remove unused functions, structs, global variables, etc
|
||||
mpm.add(createStripDeadPrototypesPass());
|
||||
//mpm.add(createFunctionInliningPass());
|
||||
mpm.add(createDeadInstEliminationPass());
|
||||
mpm.run(*module);
|
||||
|
||||
std::string result;
|
||||
|
|
|
@ -116,7 +116,6 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
|||
m_start_addr = info.addr;
|
||||
m_end_addr = info.addr + info.size;
|
||||
m_blocks.clear();
|
||||
m_value_usage.clear();
|
||||
std::fill(std::begin(m_globals), std::end(m_globals), nullptr);
|
||||
std::fill(std::begin(m_locals), std::end(m_locals), nullptr);
|
||||
|
||||
|
@ -129,9 +128,10 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
|||
|
||||
// Non-volatile registers with special meaning (TODO)
|
||||
if (info.attr & ppu_attr::uses_r0) m_g_gpr[0] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 0, ".r0g");
|
||||
m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".sp");
|
||||
m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".spg");
|
||||
m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 2, ".rtoc");
|
||||
m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 13, ".tls");
|
||||
m_gpr[1] = m_ir->CreateAlloca(GetType<u64>(), nullptr, ".sp");
|
||||
|
||||
// Registers used for args or results (TODO)
|
||||
for (u32 i = 3; i <= 10; i++) m_g_gpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + i, fmt::format(".r%u", i));
|
||||
|
@ -139,9 +139,9 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
|||
for (u32 i = 2; i <= 13; i++) m_g_vr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 65 + i, fmt::format(".v%u", i));
|
||||
|
||||
/* Create local variables */
|
||||
for (u32 i = 0; i < 32; i++) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType<u64>(), nullptr, fmt::format(".r%d", i));
|
||||
for (u32 i = 0; i < 32; i++) m_fpr[i] = m_g_fpr[i] ? m_g_fpr[i] : m_ir->CreateAlloca(GetType<f64>(), nullptr, fmt::format(".f%d", i));
|
||||
for (u32 i = 0; i < 32; i++) m_vr[i] = m_g_vr[i] ? m_g_vr[i] : m_ir->Insert(new AllocaInst(GetType<u32[4]>(), nullptr, 16, fmt::format(".v%d", i)));
|
||||
for (u32 i = 0; i < 32; i++) if (!m_gpr[i]) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType<u64>(), nullptr, fmt::format(".r%d", i));
|
||||
for (u32 i = 0; i < 32; i++) if (!m_fpr[i]) m_fpr[i] = m_g_fpr[i] ? m_g_fpr[i] : m_ir->CreateAlloca(GetType<f64>(), nullptr, fmt::format(".f%d", i));
|
||||
for (u32 i = 0; i < 32; i++) if (!m_vr[i]) m_vr[i] = m_g_vr[i] ? m_g_vr[i] : m_ir->Insert(new AllocaInst(GetType<u32[4]>(), nullptr, 16, fmt::format(".v%d", i)));
|
||||
|
||||
for (u32 i = 0; i < 32; i++)
|
||||
{
|
||||
|
@ -201,6 +201,7 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
|||
//m_fpscr_rnl = m_fpscr[31] = m_ir->CreateAlloca(GetType<bool>(), nullptr, "fpscr.rn.lsb");
|
||||
|
||||
/* Initialize local variables */
|
||||
m_ir->CreateStore(m_ir->CreateLoad(m_g_gpr[1]), m_gpr[1]); // SP
|
||||
m_ir->CreateStore(m_ir->getFalse(), m_xer_so); // XER.SO
|
||||
m_ir->CreateStore(m_ir->getFalse(), m_vscr_sat); // VSCR.SAT
|
||||
m_ir->CreateStore(m_ir->getTrue(), m_vscr_nj);
|
||||
|
@ -275,23 +276,6 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t<u32>* bin,
|
|||
CallFunction(0, true, _ctr);
|
||||
}
|
||||
|
||||
//for (auto i = inst_begin(*m_function), end = inst_end(*m_function); i != end;)
|
||||
//{
|
||||
// const auto inst = &*i++;
|
||||
|
||||
// // Remove unnecessary stores of global variables created by PrepareGlobalArguments() and similar functions
|
||||
// if (const auto si = dyn_cast<StoreInst>(inst))
|
||||
// {
|
||||
// const auto g = dyn_cast<GlobalVariable>(si->getOperand(1));
|
||||
|
||||
// if (g && m_value_usage[g] == 0)
|
||||
// {
|
||||
// si->eraseFromParent();
|
||||
// continue;
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
|
||||
return m_function;
|
||||
}
|
||||
|
||||
|
@ -572,12 +556,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
|
|||
{
|
||||
// Read, byteswap, bitcast
|
||||
const auto int_type = m_ir->getIntNTy(size);
|
||||
const auto value = m_ir->CreateAlignedLoad(GetMemory(addr, int_type), align, !IsStackAddr(addr));
|
||||
const auto value = m_ir->CreateAlignedLoad(GetMemory(addr, int_type), align, true);
|
||||
return m_ir->CreateBitCast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
|
||||
}
|
||||
|
||||
// Read normally
|
||||
return m_ir->CreateAlignedLoad(GetMemory(addr, type), align, !IsStackAddr(addr));
|
||||
return m_ir->CreateAlignedLoad(GetMemory(addr, type), align, true);
|
||||
}
|
||||
|
||||
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
|
||||
|
@ -593,7 +577,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
|
|||
}
|
||||
|
||||
// Write
|
||||
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), align, !IsStackAddr(addr));
|
||||
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), align, true);
|
||||
}
|
||||
|
||||
void PPUTranslator::CompilationError(const std::string& error)
|
||||
|
@ -3942,20 +3926,27 @@ void PPUTranslator::UNK(ppu_opcode_t op)
|
|||
|
||||
Value* PPUTranslator::GetGpr(u32 r, u32 num_bits)
|
||||
{
|
||||
m_value_usage[m_gpr[r]]++;
|
||||
return m_ir->CreateTrunc(m_ir->CreateLoad(m_gpr[r]), m_ir->getIntNTy(num_bits));
|
||||
}
|
||||
|
||||
void PPUTranslator::SetGpr(u32 r, Value* value)
|
||||
{
|
||||
m_ir->CreateStore(m_ir->CreateZExt(value, GetType<u64>()), m_gpr[r]);
|
||||
m_value_usage[m_gpr[r]]++;
|
||||
const auto i64_val = m_ir->CreateZExt(value, GetType<u64>());
|
||||
|
||||
if (true) // Update local: all regs
|
||||
{
|
||||
m_ir->CreateStore(i64_val, m_gpr[r]);
|
||||
}
|
||||
|
||||
if (r == 1) // Update global: SP
|
||||
{
|
||||
m_ir->CreateStore(i64_val, m_g_gpr[r]);
|
||||
}
|
||||
}
|
||||
|
||||
Value* PPUTranslator::GetFpr(u32 r, u32 bits, bool as_int)
|
||||
{
|
||||
const auto value = m_ir->CreateAlignedLoad(m_fpr[r], 8);
|
||||
m_value_usage[m_fpr[r]]++;
|
||||
|
||||
if (!as_int && bits == 64)
|
||||
{
|
||||
|
@ -3979,13 +3970,11 @@ void PPUTranslator::SetFpr(u32 r, Value* val)
|
|||
val->getType() == GetType<f32>() ? m_ir->CreateFPExt(val, GetType<f64>()) : val;
|
||||
|
||||
m_ir->CreateAlignedStore(f64_val, m_fpr[r], 8);
|
||||
m_value_usage[m_fpr[r]]++;
|
||||
}
|
||||
|
||||
Value* PPUTranslator::GetVr(u32 vr, VrType type)
|
||||
{
|
||||
const auto value = m_ir->CreateAlignedLoad(m_vr[vr], 16);
|
||||
m_value_usage[m_vr[vr]]++;
|
||||
|
||||
switch (type)
|
||||
{
|
||||
|
@ -4019,7 +4008,6 @@ void PPUTranslator::SetVr(u32 vr, Value* value)
|
|||
}
|
||||
|
||||
m_ir->CreateAlignedStore(m_ir->CreateBitCast(value, GetType<u32[4]>()), m_vr[vr], 16);
|
||||
m_value_usage[m_vr[vr]]++;
|
||||
}
|
||||
|
||||
Value* PPUTranslator::GetCrb(u32 crb)
|
||||
|
@ -4230,31 +4218,4 @@ Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi)
|
|||
return use_ctr ? use_ctr : use_cond;
|
||||
}
|
||||
|
||||
bool PPUTranslator::IsStackAddr(Value* addr)
|
||||
{
|
||||
// Analyse various binary ops
|
||||
if (const auto bin_op = dyn_cast<BinaryOperator>(addr))
|
||||
{
|
||||
if (bin_op->isBinaryOp(Instruction::Add) || bin_op->isBinaryOp(Instruction::And) || bin_op->isBinaryOp(Instruction::Or) || bin_op->isBinaryOp(Instruction::Xor))
|
||||
{
|
||||
return IsStackAddr(bin_op->getOperand(0)) || IsStackAddr(bin_op->getOperand(1));
|
||||
}
|
||||
|
||||
if (bin_op->isBinaryOp(Instruction::Sub))
|
||||
{
|
||||
return IsStackAddr(bin_op->getOperand(0));
|
||||
}
|
||||
|
||||
// TODO
|
||||
}
|
||||
|
||||
// Detect load instruction
|
||||
if (const auto load_op = dyn_cast<LoadInst>(addr))
|
||||
{
|
||||
return load_op->getOperand(0) == m_gpr[1];
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -149,9 +149,6 @@ class PPUTranslator final //: public CPUTranslator
|
|||
|
||||
/* Variables */
|
||||
|
||||
// Explicit register usage counter
|
||||
std::unordered_map<llvm::Value*, u64> m_value_usage;
|
||||
|
||||
// Memory base
|
||||
llvm::Value* m_base;
|
||||
|
||||
|
@ -386,9 +383,6 @@ public:
|
|||
// Branch to next instruction if condition failed, never branch on nullptr
|
||||
void UseCondition(llvm::Value* = nullptr);
|
||||
|
||||
// Check whether the address is stack
|
||||
bool IsStackAddr(llvm::Value* addr);
|
||||
|
||||
// Get memory pointer
|
||||
llvm::Value* GetMemory(llvm::Value* addr, llvm::Type* type);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue