From dac13baade0c2622aad7ded64e21506797387e00 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 09:16:55 +0200 Subject: [PATCH] Core: Postpone page table updates when DR isn't set Page table mappings are only used when DR is set, so if page tables are updated when DR isn't set, we can wait with updating page table mappings until DR gets set. This lets us batch page table updates in the Disney Trio of Destruction, improving performance when the games are loading data. It doesn't help much for GameCube games, because those run tlbie with DR set. The PowerPCState struct has had its members slightly reordered. I had to put pagetable_update_pending less than 4 KiB from the start so AArch64's LDRB (immediate) can access it, and that gave me an opportunity to move around some other members to cut down on padding. --- Source/Core/Core/Boot/Boot.cpp | 4 +-- Source/Core/Core/Boot/Boot.h | 2 +- Source/Core/Core/Boot/Boot_BS2Emu.cpp | 9 +++--- Source/Core/Core/FifoPlayer/FifoPlayer.cpp | 2 +- Source/Core/Core/IOS/MIOS.cpp | 2 +- Source/Core/Core/PowerPC/Expression.cpp | 2 +- Source/Core/Core/PowerPC/GDBStub.cpp | 2 +- .../Interpreter/Interpreter_Branch.cpp | 2 +- .../Interpreter_SystemRegisters.cpp | 2 +- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 23 +++++++++++++- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 13 +++++--- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 31 ++++++++++++++++++- Source/Core/Core/PowerPC/MMU.cpp | 16 ++++++++-- Source/Core/Core/PowerPC/MMU.h | 1 + Source/Core/Core/PowerPC/PowerPC.cpp | 13 ++++++-- Source/Core/Core/PowerPC/PowerPC.h | 26 +++++++++------- .../DolphinQt/Debugger/RegisterWidget.cpp | 2 +- 17 files changed, 115 insertions(+), 37 deletions(-) diff --git a/Source/Core/Core/Boot/Boot.cpp b/Source/Core/Core/Boot/Boot.cpp index e43ffc52d4..7509324449 100644 --- a/Source/Core/Core/Boot/Boot.cpp +++ b/Source/Core/Core/Boot/Boot.cpp @@ -460,7 +460,7 @@ bool CBoot::Load_BS2(Core::System& system, const std::string& boot_rom_filename) ppc_state.pc = 0x81200150; - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); return true; } @@ -530,7 +530,7 @@ bool CBoot::BootUp(Core::System& system, const Core::CPUThreadGuard& guard, auto& ppc_state = system.GetPPCState(); - SetupMSR(ppc_state); + SetupMSR(system); SetupHID(ppc_state, system.IsWii()); SetupBAT(system, system.IsWii()); CopyDefaultExceptionHandlers(system); diff --git a/Source/Core/Core/Boot/Boot.h b/Source/Core/Core/Boot/Boot.h index f892ee7c75..087245ee0f 100644 --- a/Source/Core/Core/Boot/Boot.h +++ b/Source/Core/Core/Boot/Boot.h @@ -169,7 +169,7 @@ private: static bool Boot_WiiWAD(Core::System& system, const DiscIO::VolumeWAD& wad); static bool BootNANDTitle(Core::System& system, u64 title_id); - static void SetupMSR(PowerPC::PowerPCState& ppc_state); + static void SetupMSR(Core::System& system); static void SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii); static void SetupBAT(Core::System& system, bool is_wii); static bool RunApploader(Core::System& system, const Core::CPUThreadGuard& guard, bool is_wii, diff --git a/Source/Core/Core/Boot/Boot_BS2Emu.cpp b/Source/Core/Core/Boot/Boot_BS2Emu.cpp index 884a11bd87..4c37069a2e 100644 --- a/Source/Core/Core/Boot/Boot_BS2Emu.cpp +++ b/Source/Core/Core/Boot/Boot_BS2Emu.cpp @@ -68,14 +68,15 @@ void CBoot::RunFunction(Core::System& system, u32 address) power_pc.SingleStep(); } -void CBoot::SetupMSR(PowerPC::PowerPCState& ppc_state) +void CBoot::SetupMSR(Core::System& system) { + PowerPC::PowerPCState& ppc_state = system.GetPPCState(); // 0x0002032 ppc_state.msr.RI = 1; ppc_state.msr.DR = 1; ppc_state.msr.IR = 1; ppc_state.msr.FP = 1; - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); } void CBoot::SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii) @@ -286,7 +287,7 @@ bool CBoot::EmulatedBS2_GC(Core::System& system, const Core::CPUThreadGuard& gua auto& ppc_state = system.GetPPCState(); - SetupMSR(ppc_state); + SetupMSR(system); SetupHID(ppc_state, /*is_wii*/ false); SetupBAT(system, /*is_wii*/ false); @@ -593,7 +594,7 @@ bool CBoot::EmulatedBS2_Wii(Core::System& system, const Core::CPUThreadGuard& gu auto& ppc_state = system.GetPPCState(); - SetupMSR(ppc_state); + SetupMSR(system); SetupHID(ppc_state, /*is_wii*/ true); SetupBAT(system, /*is_wii*/ true); diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index bb1ae839f5..e9cafcd6a6 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -650,7 +650,7 @@ void FifoPlayer::LoadMemory() HID4(ppc_state).SBE = 1; } - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(m_system); auto& mmu = m_system.GetMMU(); mmu.DBATUpdated(); diff --git a/Source/Core/Core/IOS/MIOS.cpp b/Source/Core/Core/IOS/MIOS.cpp index d47400c9d2..fb0ce67f67 100644 --- a/Source/Core/Core/IOS/MIOS.cpp +++ b/Source/Core/Core/IOS/MIOS.cpp @@ -89,7 +89,7 @@ bool Load(Core::System& system) PowerPC::PowerPCState& ppc_state = power_pc.GetPPCState(); ppc_state.msr.Hex = 0; ppc_state.pc = 0x3400; - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); NOTICE_LOG_FMT(IOS, "Loaded MIOS and bootstrapped PPC."); diff --git a/Source/Core/Core/PowerPC/Expression.cpp b/Source/Core/Core/PowerPC/Expression.cpp index bd97e8a8b1..315faa9104 100644 --- a/Source/Core/Core/PowerPC/Expression.cpp +++ b/Source/Core/Core/PowerPC/Expression.cpp @@ -469,7 +469,7 @@ void Expression::SynchronizeBindings(Core::System& system, SynchronizeDirection else { ppc_state.msr.Hex = static_cast(static_cast(v->value)); - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); } break; } diff --git a/Source/Core/Core/PowerPC/GDBStub.cpp b/Source/Core/Core/PowerPC/GDBStub.cpp index 92204d8be9..a20090d9c9 100644 --- a/Source/Core/Core/PowerPC/GDBStub.cpp +++ b/Source/Core/Core/PowerPC/GDBStub.cpp @@ -664,7 +664,7 @@ static void WriteRegister() break; case 65: ppc_state.msr.Hex = re32hex(bufptr); - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); break; case 66: ppc_state.cr.Set(re32hex(bufptr)); diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp index 1d57d971b4..120482a4f6 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp @@ -161,7 +161,7 @@ void Interpreter::rfi(Interpreter& interpreter, UGeckoInstruction inst) // set NPC to saved offset and resume ppc_state.npc = SRR0(ppc_state); - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(interpreter.m_system); interpreter.m_end_block = true; } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index e32875cdef..26876537f4 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -181,7 +181,7 @@ void Interpreter::mtmsr(Interpreter& interpreter, UGeckoInstruction inst) ppc_state.msr.Hex = ppc_state.gpr[inst.RS]; - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(interpreter.m_system); // FE0/FE1 may have been set CheckFPExceptions(ppc_state); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index b09279d458..5199dc8a1e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -506,6 +506,8 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) { ASSERT(!msr.IsSimpleReg(scratch_reg)); + constexpr u32 dr_bit = 1 << UReg_MSR{}.DR.StartBit(); + // Update mem_ptr auto& memory = m_system.GetMemory(); if (msr.IsImm()) @@ -517,7 +519,7 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) { MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase())); MOV(64, R(scratch_reg), ImmPtr(memory.GetPhysicalBase())); - TEST(32, msr, Imm32(1 << (31 - 27))); + TEST(32, msr, Imm32(dr_bit)); CMOVcc(64, RMEM, R(scratch_reg), CC_Z); } MOV(64, PPCSTATE(mem_ptr), R(RMEM)); @@ -541,6 +543,25 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) OR(32, R(scratch_reg), Imm32(other_feature_flags)); MOV(32, PPCSTATE(feature_flags), R(scratch_reg)); } + + // Call PageTableUpdatedFromJit if needed + if (!msr.IsImm() || UReg_MSR(msr.Imm32()).DR) + { + gpr.Flush(); + fpr.Flush(); + FixupBranch dr_unset; + if (!msr.IsImm()) + { + TEST(32, msr, Imm32(dr_bit)); + dr_unset = J_CC(CC_Z); + } + CMP(8, PPCSTATE(pagetable_update_pending), Imm8(0)); + FixupBranch update_not_pending = J_CC(CC_E); + ABI_CallFunctionP(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + if (!msr.IsImm()) + SetJumpTarget(dr_unset); + } } void Jit64::WriteExit(u32 destination, bool bl, u32 after) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 6331358359..c485e39517 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -436,11 +436,14 @@ void Jit64::mtmsr(UGeckoInstruction inst) FALLBACK_IF(jo.fp_exceptions); { - RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); - RegCache::Realize(Rs); - MOV(32, PPCSTATE(msr), Rs); - - MSRUpdated(Rs, RSCRATCH2); + OpArg Rs_op_arg; + { + RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); + RegCache::Realize(Rs); + MOV(32, PPCSTATE(msr), Rs); + Rs_op_arg = Rs; + } + MSRUpdated(Rs_op_arg, RSCRATCH2); } gpr.Flush(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 65a452ceca..b648510672 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -445,10 +445,27 @@ void JitArm64::MSRUpdated(u32 msr) MOVI2R(WA, feature_flags); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); } + + // Call PageTableUpdatedFromJit if needed + if (UReg_MSR(msr).DR) + { + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + + auto WA = gpr.GetScopedReg(); + + static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending)); + FixupBranch update_not_pending = CBZ(WA); + ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + } } void JitArm64::MSRUpdated(ARM64Reg msr) { + constexpr LogicalImm dr_bit(1ULL << UReg_MSR{}.DR.StartBit(), GPRSize::B32); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -456,7 +473,7 @@ void JitArm64::MSRUpdated(ARM64Reg msr) auto& memory = m_system.GetMemory(); MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); - TST(msr, LogicalImm(1 << (31 - 27), GPRSize::B32)); + TST(msr, dr_bit); CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ); STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); @@ -470,6 +487,18 @@ void JitArm64::MSRUpdated(ARM64Reg msr) if (other_feature_flags != 0) ORR(WA, WA, LogicalImm(other_feature_flags, GPRSize::B32)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); + + // Call PageTableUpdatedFromJit if needed + MOV(WA, msr); + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + FixupBranch dr_unset = TBZ(WA, dr_bit); + static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending)); + FixupBranch update_not_pending = CBZ(WA); + ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + SetJumpTarget(dr_unset); } void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return, diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 63cfa0afa6..fc89187b4e 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1346,7 +1346,10 @@ void MMU::SDRUpdated() void MMU::SRUpdated() { - PageTableUpdated(); + if (m_ppc_state.msr.DR) + PageTableUpdated(); + else + m_ppc_state.pagetable_update_pending = true; } enum class TLBLookupResult @@ -1437,11 +1440,15 @@ void MMU::InvalidateTLBEntry(u32 address) m_ppc_state.tlb[PowerPC::DATA_TLB_INDEX][entry_index].Invalidate(); m_ppc_state.tlb[PowerPC::INST_TLB_INDEX][entry_index].Invalidate(); - PageTableUpdated(); + if (m_ppc_state.msr.DR) + PageTableUpdated(); + else + m_ppc_state.pagetable_update_pending = true; } void MMU::PageTableUpdated() { + m_ppc_state.pagetable_update_pending = false; #ifndef _ARCH_32 m_page_mappings.clear(); @@ -1556,6 +1563,11 @@ void MMU::PageTableUpdated() #endif } +void MMU::PageTableUpdatedFromJit(MMU* mmu) +{ + mmu->PageTableUpdated(); +} + // Page Address Translation template MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress address, bool* wi) diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 0b7dd8a668..5293de56b0 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -247,6 +247,7 @@ public: void SRUpdated(); void InvalidateTLBEntry(u32 address); void PageTableUpdated(); + static void PageTableUpdatedFromJit(MMU* mmu); void DBATUpdated(); void IBATUpdated(); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 255ad7d0be..ca83370577 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -97,6 +97,7 @@ void PowerPCManager::DoState(PointerWrap& p) p.DoArray(m_ppc_state.tlb); p.Do(m_ppc_state.pagetable_base); p.Do(m_ppc_state.pagetable_hashmask); + p.Do(m_ppc_state.pagetable_update_pending); p.Do(m_ppc_state.reserve); p.Do(m_ppc_state.reserve_address); @@ -283,6 +284,7 @@ void PowerPCManager::Reset() { m_ppc_state.pagetable_base = 0; m_ppc_state.pagetable_hashmask = 0; + m_ppc_state.pagetable_update_pending = false; m_ppc_state.tlb = {}; ResetRegisters(); @@ -576,7 +578,7 @@ void PowerPCManager::CheckExceptions() } m_system.GetJitInterface().UpdateMembase(); - MSRUpdated(m_ppc_state); + MSRUpdated(m_system); } void PowerPCManager::CheckExternalExceptions() @@ -629,7 +631,7 @@ void PowerPCManager::CheckExternalExceptions() ERROR_LOG_FMT(POWERPC, "Unknown EXTERNAL INTERRUPT exception: Exceptions == {:08x}", exceptions); } - MSRUpdated(m_ppc_state); + MSRUpdated(m_system); } m_system.GetJitInterface().UpdateMembase(); @@ -689,15 +691,20 @@ void RoundingModeUpdated(PowerPCState& ppc_state) Common::FPU::SetSIMDMode(ppc_state.fpscr.RN, ppc_state.fpscr.NI); } -void MSRUpdated(PowerPCState& ppc_state) +void MSRUpdated(Core::System& system) { static_assert(UReg_MSR{}.DR.StartBit() == 4); static_assert(UReg_MSR{}.IR.StartBit() == 5); static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + PowerPCState& ppc_state = system.GetPPCState(); + ppc_state.feature_flags = static_cast( (ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3)); + + if (ppc_state.msr.DR && ppc_state.pagetable_update_pending) + system.GetMMU().PageTableUpdated(); } void MMCRUpdated(PowerPCState& ppc_state) diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 834b1fb432..7f931c2b8f 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -122,6 +122,9 @@ struct PowerPCState u32 pc = 0; // program counter u32 npc = 0; + // Storage for the stack pointer of the BLR optimization. + u8* stored_stack_pointer = nullptr; + // gather pipe pointer for JIT access u8* gather_pipe_ptr = nullptr; u8* gather_pipe_base_ptr = nullptr; @@ -157,6 +160,14 @@ struct PowerPCState // lscbx u16 xer_stringctrl = 0; + // Reservation monitor for lwarx and its friend stwcxd. These two don't really need to be + // this early in the struct, but due to how the padding works out, they fit nicely here. + u32 reserve_address; + bool reserve; + + bool pagetable_update_pending = false; + bool m_enable_dcache = false; + #ifdef _M_X86_64 // This member exists only for the purpose of an assertion that its offset <= 0x100. std::tuple<> above_fits_in_first_0x100; @@ -171,22 +182,15 @@ struct PowerPCState // JitArm64 needs 64-bit alignment for SPR_TL. alignas(8) u32 spr[1024]{}; - // Storage for the stack pointer of the BLR optimization. - u8* stored_stack_pointer = nullptr; u8* mem_ptr = nullptr; - std::array, NUM_TLBS> tlb; - u32 pagetable_base = 0; u32 pagetable_hashmask = 0; - InstructionCache iCache; - bool m_enable_dcache = false; - Cache dCache; + std::array, NUM_TLBS> tlb; - // Reservation monitor for lwarx and its friend stwcxd. - bool reserve; - u32 reserve_address; + InstructionCache iCache; + Cache dCache; void UpdateCR1() { @@ -354,7 +358,7 @@ void CheckAndHandleBreakPointsFromJIT(PowerPCManager& power_pc); #define TU(ppc_state) (ppc_state).spr[SPR_TU] void RoundingModeUpdated(PowerPCState& ppc_state); -void MSRUpdated(PowerPCState& ppc_state); +void MSRUpdated(Core::System& system); void MMCRUpdated(PowerPCState& ppc_state); void RecalculateAllFeatureFlags(PowerPCState& ppc_state); diff --git a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp index 774c58abfe..8ebfd0241e 100644 --- a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp +++ b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp @@ -453,7 +453,7 @@ void RegisterWidget::PopulateTable() 23, 5, RegisterType::msr, "MSR", [this] { return m_system.GetPPCState().msr.Hex; }, [this](u64 value) { m_system.GetPPCState().msr.Hex = value; - PowerPC::MSRUpdated(m_system.GetPPCState()); + PowerPC::MSRUpdated(m_system); }); // SRR 0-1