diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index a3c628547c..7f2e6e106d 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -600,6 +600,12 @@ extern void sse_cellbe_stvrx(u64 addr, __m128i a); fmt::throw_exception("Unreachable! (0x%llx)", addr); } +static void ppu_check(ppu_thread& ppu, u64 addr) +{ + ppu.cia = addr; + ppu.check_state(); +} + static void ppu_trace(u64 addr) { LOG_NOTICE(PPU, "Trace: 0x%llx", addr); @@ -671,6 +677,7 @@ static void ppu_initialize() { "__cptr", (u64)&s_ppu_compiled }, { "__trap", (u64)&ppu_trap }, { "__end", (u64)&ppu_unreachable }, + { "__check", (u64)&ppu_check }, { "__trace", (u64)&ppu_trace }, { "__hlecall", (u64)&ppu_execute_function }, { "__syscall", (u64)&ppu_execute_syscall }, diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 1bb05d25fe..5523186964 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -74,8 +74,12 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, u64 base) m_base = new GlobalVariable(*module, ArrayType::get(GetType(), 0x100000000)->getPointerTo(), true, GlobalValue::ExternalLinkage, 0, "__mptr"); // Thread context struct (TODO: safer member access) - std::vector thread_struct{ArrayType::get(GetType(), OFFSET_32(ppu_thread, gpr))}; - + const auto off0 = OFFSET_32(ppu_thread, state); + const auto off1 = OFFSET_32(ppu_thread, gpr); + std::vector thread_struct; + thread_struct.emplace_back(ArrayType::get(GetType(), off0)); + thread_struct.emplace_back(GetType()); // state + thread_struct.emplace_back(ArrayType::get(GetType(), off1 - off0 - 4)); thread_struct.insert(thread_struct.end(), 32, GetType()); // gpr[0..31] thread_struct.insert(thread_struct.end(), 32, GetType()); // fpr[0..31] thread_struct.insert(thread_struct.end(), 32, GetType()); // vr[0..31] @@ -85,6 +89,14 @@ PPUTranslator::PPUTranslator(LLVMContext& context, Module* module, u64 base) // Callable m_call = new GlobalVariable(*module, ArrayType::get(GetType(), 0x40000000)->getPointerTo(), true, GlobalValue::ExternalLinkage, 0, "__cptr"); + + const auto md_name = MDString::get(m_context, "branch_weights"); + const auto md_low = ValueAsMetadata::get(ConstantInt::get(GetType(), 1)); + const auto md_high = ValueAsMetadata::get(ConstantInt::get(GetType(), 666)); + + // Metadata for branch weights + m_md_likely = MDTuple::get(m_context, {md_name, md_high, md_low}); + m_md_unlikely = MDTuple::get(m_context, {md_name, md_low, md_high}); } PPUTranslator::~PPUTranslator() @@ -128,16 +140,16 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t* bin, m_base_loaded = m_ir->CreateLoad(m_base); // Non-volatile registers with special meaning (TODO) - if (test(info.attr, ppu_attr::uses_r0)) m_g_gpr[0] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 0, ".r0g"); - m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 1, ".spg"); - m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 2, ".rtoc"); - m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 13, ".tls"); + if (test(info.attr, ppu_attr::uses_r0)) m_g_gpr[0] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 0, ".r0g"); + m_g_gpr[1] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 1, ".spg"); + m_g_gpr[2] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 2, ".rtoc"); + m_g_gpr[13] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 13, ".tls"); m_gpr[1] = m_ir->CreateAlloca(GetType(), nullptr, ".sp"); // Registers used for args or results (TODO) - for (u32 i = 3; i <= 10; i++) m_g_gpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + i, fmt::format(".r%u", i)); - for (u32 i = 1; i <= 13; i++) m_g_fpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 33 + i, fmt::format(".f%u", i)); - for (u32 i = 2; i <= 13; i++) m_g_vr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 65 + i, fmt::format(".v%u", i)); + for (u32 i = 3; i <= 10; i++) m_g_gpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + i, fmt::format(".r%u", i)); + for (u32 i = 1; i <= 13; i++) m_g_fpr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 35 + i, fmt::format(".f%u", i)); + for (u32 i = 2; i <= 13; i++) m_g_vr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 67 + i, fmt::format(".v%u", i)); /* Create local variables */ for (u32 i = 0; i < 32; i++) if (!m_gpr[i]) m_gpr[i] = m_g_gpr[i] ? m_g_gpr[i] : m_ir->CreateAlloca(GetType(), nullptr, fmt::format(".r%d", i)); @@ -154,7 +166,7 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t* bin, "so", }; - //m_cr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 97 + i, fmt::format("cr%u.%s", i / 4, names[i % 4])); + //m_cr[i] = m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 99 + i, fmt::format("cr%u.%s", i / 4, names[i % 4])); m_cr[i] = m_ir->CreateAlloca(GetType(), 0, fmt::format("cr%u.%s", i / 4, names[i % 4])); } @@ -208,8 +220,8 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t* bin, m_ir->CreateStore(m_ir->getTrue(), m_vscr_nj); // TODO: only loaded r0 and r12 (r12 is extended argument for program initialization) - if (!m_g_gpr[0]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 0)), m_gpr[0]); - if (!m_g_gpr[12]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1 + 12)), m_gpr[12]); + if (!m_g_gpr[0]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 0)), m_gpr[0]); + if (!m_g_gpr[12]) m_ir->CreateStore(m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 3 + 12)), m_gpr[12]); m_jtr = BasicBlock::Create(m_context, "__jtr", m_function); @@ -223,6 +235,20 @@ Function* PPUTranslator::TranslateToIR(const ppu_function& info, be_t* bin, // Preserve current address (m_current_addr may be changed by the decoder) const u64 addr = m_current_addr; + if (m_current_addr == m_start_addr || info.blocks.count(m_current_addr)) + { + // Bloat the beginning of each block: check state + const auto vstate = m_ir->CreateLoad(m_ir->CreateConstGEP2_32(nullptr, m_thread, 0, 1)); + const auto vblock = BasicBlock::Create(m_context, fmt::format("l0c_%llx", m_current_addr), m_function); + const auto vcheck = BasicBlock::Create(m_context, fmt::format("lcc_%llx", m_current_addr), m_function); + + m_ir->CreateCondBr(m_ir->CreateIsNull(vstate), vblock, vcheck, m_md_unlikely); + m_ir->SetInsertPoint(vcheck); + Call(GetType(), "__check", m_thread, m_ir->getInt64(m_current_addr)); + m_ir->CreateBr(vblock); + m_ir->SetInsertPoint(vblock); + } + // Translate opcode const u32 op = *(m_bin = bin + (addr - m_start_addr) / sizeof(u32)); (this->*(s_ppu_decoder.decode(op)))({op}); @@ -534,12 +560,12 @@ Value* PPUTranslator::Trunc(Value* value, Type* type) return m_ir->CreateTrunc(value, type ? type : ScaleType(value->getType(), -1)); } -void PPUTranslator::UseCondition(Value* cond) +void PPUTranslator::UseCondition(MDNode* hint, Value* cond) { if (cond) { const auto local = BasicBlock::Create(m_context, fmt::format("loc_%llx.cond", m_current_addr/* - m_start_addr*/), m_function); - m_ir->CreateCondBr(cond, local, GetBasicBlock(m_current_addr + 4)); + m_ir->CreateCondBr(cond, local, GetBasicBlock(m_current_addr + 4), hint); m_ir->SetInsertPoint(local); } } @@ -1659,13 +1685,13 @@ void PPUTranslator::VXOR(ppu_opcode_t op) void PPUTranslator::TDI(ppu_opcode_t op) { - UseCondition(CheckTrapCondition(op.bo, GetGpr(op.ra), m_ir->getInt64(op.simm16))); + UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra), m_ir->getInt64(op.simm16))); Trap(m_current_addr); } void PPUTranslator::TWI(ppu_opcode_t op) { - UseCondition(CheckTrapCondition(op.bo, GetGpr(op.ra, 32), m_ir->getInt32(op.simm16))); + UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra, 32), m_ir->getInt32(op.simm16))); Trap(m_current_addr); } @@ -1735,7 +1761,7 @@ void PPUTranslator::BC(ppu_opcode_t op) } else if (cond) { - m_ir->CreateCondBr(cond, GetBasicBlock(target), GetBasicBlock(m_current_addr + 4)); + m_ir->CreateCondBr(cond, GetBasicBlock(target), GetBasicBlock(m_current_addr + 4), CheckBranchProbability(op.bo)); return; } else @@ -1746,7 +1772,7 @@ void PPUTranslator::BC(ppu_opcode_t op) } // External branch - UseCondition(cond); + UseCondition(CheckBranchProbability(op.bo), cond); CallFunction(target, !op.lk); } @@ -1803,7 +1829,7 @@ void PPUTranslator::MCRF(ppu_opcode_t op) void PPUTranslator::BCLR(ppu_opcode_t op) { - UseCondition(CheckBranchCondition(op.bo, op.bi)); + UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi)); if (op.lk) { @@ -1868,7 +1894,7 @@ void PPUTranslator::CROR(ppu_opcode_t op) void PPUTranslator::BCCTR(ppu_opcode_t op) { - UseCondition(CheckBranchCondition(op.bo | 0x4, op.bi)); + UseCondition(CheckBranchProbability(op.bo | 0x4), CheckBranchCondition(op.bo | 0x4, op.bi)); // Jumptable: sorted set of possible targets std::set targets; @@ -2234,7 +2260,7 @@ void PPUTranslator::CMP(ppu_opcode_t op) void PPUTranslator::TW(ppu_opcode_t op) { - UseCondition(CheckTrapCondition(op.bo, GetGpr(op.ra, 32), GetGpr(op.rb, 32))); + UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra, 32), GetGpr(op.rb, 32))); Trap(m_current_addr); } @@ -2435,7 +2461,7 @@ void PPUTranslator::ANDC(ppu_opcode_t op) void PPUTranslator::TD(ppu_opcode_t op) { - UseCondition(CheckTrapCondition(op.bo, GetGpr(op.ra), GetGpr(op.rb))); + UseCondition(m_md_unlikely, CheckTrapCondition(op.bo, GetGpr(op.ra), GetGpr(op.rb))); Trap(m_current_addr); } @@ -4238,4 +4264,20 @@ Value* PPUTranslator::CheckBranchCondition(u32 bo, u32 bi) return use_ctr ? use_ctr : use_cond; } +MDNode* PPUTranslator::CheckBranchProbability(u32 bo) +{ + const bool bo0 = (bo & 0x10) != 0; + const bool bo1 = (bo & 0x08) != 0; + const bool bo2 = (bo & 0x04) != 0; + const bool bo3 = (bo & 0x02) != 0; + const bool bo4 = (bo & 0x01) != 0; + + if ((bo0 && bo1) || (bo2 && bo3)) + { + return bo4 ? m_md_likely : m_md_unlikely; + } + + return nullptr; +} + #endif diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index e38009bdd8..a27ef85894 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -145,6 +145,9 @@ class PPUTranslator final //: public CPUTranslator // JT resolver block llvm::BasicBlock* m_jtr; + llvm::MDNode* m_md_unlikely; + llvm::MDNode* m_md_likely; + // Current binary data be_t* m_bin{}; @@ -379,11 +382,14 @@ public: // Emit trap llvm::Value* Trap(u64 addr); - // Check condition for branch instructions + // Get condition for branch instructions llvm::Value* CheckBranchCondition(u32 bo, u32 bi); + // Get hint for branch instructions + llvm::MDNode* CheckBranchProbability(u32 bo); + // Branch to next instruction if condition failed, never branch on nullptr - void UseCondition(llvm::Value* = nullptr); + void UseCondition(llvm::MDNode* hint, llvm::Value* = nullptr); // Get memory pointer llvm::Value* GetMemory(llvm::Value* addr, llvm::Type* type);