Core: Postpone page table updates when DR isn't set

Page table mappings are only used when DR is set, so if page tables are
updated when DR isn't set, we can wait with updating page table mappings
until DR gets set. This lets us batch page table updates in the Disney
Trio of Destruction, improving performance when the games are loading
data. It doesn't help much for GameCube games, because those run tlbie
with DR set.

The PowerPCState struct has had its members slightly reordered. I had to
put pagetable_update_pending less than 4 KiB from the start so AArch64's
LDRB (immediate) can access it, and that gave me an opportunity to move
around some other members to cut down on padding.
This commit is contained in:
JosJuice 2025-06-20 09:16:55 +02:00
commit dac13baade
17 changed files with 115 additions and 37 deletions

View file

@ -460,7 +460,7 @@ bool CBoot::Load_BS2(Core::System& system, const std::string& boot_rom_filename)
ppc_state.pc = 0x81200150;
PowerPC::MSRUpdated(ppc_state);
PowerPC::MSRUpdated(system);
return true;
}
@ -530,7 +530,7 @@ bool CBoot::BootUp(Core::System& system, const Core::CPUThreadGuard& guard,
auto& ppc_state = system.GetPPCState();
SetupMSR(ppc_state);
SetupMSR(system);
SetupHID(ppc_state, system.IsWii());
SetupBAT(system, system.IsWii());
CopyDefaultExceptionHandlers(system);

View file

@ -169,7 +169,7 @@ private:
static bool Boot_WiiWAD(Core::System& system, const DiscIO::VolumeWAD& wad);
static bool BootNANDTitle(Core::System& system, u64 title_id);
static void SetupMSR(PowerPC::PowerPCState& ppc_state);
static void SetupMSR(Core::System& system);
static void SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii);
static void SetupBAT(Core::System& system, bool is_wii);
static bool RunApploader(Core::System& system, const Core::CPUThreadGuard& guard, bool is_wii,

View file

@ -68,14 +68,15 @@ void CBoot::RunFunction(Core::System& system, u32 address)
power_pc.SingleStep();
}
void CBoot::SetupMSR(PowerPC::PowerPCState& ppc_state)
void CBoot::SetupMSR(Core::System& system)
{
PowerPC::PowerPCState& ppc_state = system.GetPPCState();
// 0x0002032
ppc_state.msr.RI = 1;
ppc_state.msr.DR = 1;
ppc_state.msr.IR = 1;
ppc_state.msr.FP = 1;
PowerPC::MSRUpdated(ppc_state);
PowerPC::MSRUpdated(system);
}
void CBoot::SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii)
@ -286,7 +287,7 @@ bool CBoot::EmulatedBS2_GC(Core::System& system, const Core::CPUThreadGuard& gua
auto& ppc_state = system.GetPPCState();
SetupMSR(ppc_state);
SetupMSR(system);
SetupHID(ppc_state, /*is_wii*/ false);
SetupBAT(system, /*is_wii*/ false);
@ -593,7 +594,7 @@ bool CBoot::EmulatedBS2_Wii(Core::System& system, const Core::CPUThreadGuard& gu
auto& ppc_state = system.GetPPCState();
SetupMSR(ppc_state);
SetupMSR(system);
SetupHID(ppc_state, /*is_wii*/ true);
SetupBAT(system, /*is_wii*/ true);

View file

@ -650,7 +650,7 @@ void FifoPlayer::LoadMemory()
HID4(ppc_state).SBE = 1;
}
PowerPC::MSRUpdated(ppc_state);
PowerPC::MSRUpdated(m_system);
auto& mmu = m_system.GetMMU();
mmu.DBATUpdated();

View file

@ -89,7 +89,7 @@ bool Load(Core::System& system)
PowerPC::PowerPCState& ppc_state = power_pc.GetPPCState();
ppc_state.msr.Hex = 0;
ppc_state.pc = 0x3400;
PowerPC::MSRUpdated(ppc_state);
PowerPC::MSRUpdated(system);
NOTICE_LOG_FMT(IOS, "Loaded MIOS and bootstrapped PPC.");

View file

@ -469,7 +469,7 @@ void Expression::SynchronizeBindings(Core::System& system, SynchronizeDirection
else
{
ppc_state.msr.Hex = static_cast<u32>(static_cast<s64>(v->value));
PowerPC::MSRUpdated(ppc_state);
PowerPC::MSRUpdated(system);
}
break;
}

View file

@ -664,7 +664,7 @@ static void WriteRegister()
break;
case 65:
ppc_state.msr.Hex = re32hex(bufptr);
PowerPC::MSRUpdated(ppc_state);
PowerPC::MSRUpdated(system);
break;
case 66:
ppc_state.cr.Set(re32hex(bufptr));

View file

@ -161,7 +161,7 @@ void Interpreter::rfi(Interpreter& interpreter, UGeckoInstruction inst)
// set NPC to saved offset and resume
ppc_state.npc = SRR0(ppc_state);
PowerPC::MSRUpdated(ppc_state);
PowerPC::MSRUpdated(interpreter.m_system);
interpreter.m_end_block = true;
}

View file

@ -181,7 +181,7 @@ void Interpreter::mtmsr(Interpreter& interpreter, UGeckoInstruction inst)
ppc_state.msr.Hex = ppc_state.gpr[inst.RS];
PowerPC::MSRUpdated(ppc_state);
PowerPC::MSRUpdated(interpreter.m_system);
// FE0/FE1 may have been set
CheckFPExceptions(ppc_state);

View file

@ -506,6 +506,8 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg)
{
ASSERT(!msr.IsSimpleReg(scratch_reg));
constexpr u32 dr_bit = 1 << UReg_MSR{}.DR.StartBit();
// Update mem_ptr
auto& memory = m_system.GetMemory();
if (msr.IsImm())
@ -517,7 +519,7 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg)
{
MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase()));
MOV(64, R(scratch_reg), ImmPtr(memory.GetPhysicalBase()));
TEST(32, msr, Imm32(1 << (31 - 27)));
TEST(32, msr, Imm32(dr_bit));
CMOVcc(64, RMEM, R(scratch_reg), CC_Z);
}
MOV(64, PPCSTATE(mem_ptr), R(RMEM));
@ -541,6 +543,25 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg)
OR(32, R(scratch_reg), Imm32(other_feature_flags));
MOV(32, PPCSTATE(feature_flags), R(scratch_reg));
}
// Call PageTableUpdatedFromJit if needed
if (!msr.IsImm() || UReg_MSR(msr.Imm32()).DR)
{
gpr.Flush();
fpr.Flush();
FixupBranch dr_unset;
if (!msr.IsImm())
{
TEST(32, msr, Imm32(dr_bit));
dr_unset = J_CC(CC_Z);
}
CMP(8, PPCSTATE(pagetable_update_pending), Imm8(0));
FixupBranch update_not_pending = J_CC(CC_E);
ABI_CallFunctionP(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU());
SetJumpTarget(update_not_pending);
if (!msr.IsImm())
SetJumpTarget(dr_unset);
}
}
void Jit64::WriteExit(u32 destination, bool bl, u32 after)

View file

@ -436,11 +436,14 @@ void Jit64::mtmsr(UGeckoInstruction inst)
FALLBACK_IF(jo.fp_exceptions);
{
RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs);
MSRUpdated(Rs, RSCRATCH2);
OpArg Rs_op_arg;
{
RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read);
RegCache::Realize(Rs);
MOV(32, PPCSTATE(msr), Rs);
Rs_op_arg = Rs;
}
MSRUpdated(Rs_op_arg, RSCRATCH2);
}
gpr.Flush();

View file

@ -445,10 +445,27 @@ void JitArm64::MSRUpdated(u32 msr)
MOVI2R(WA, feature_flags);
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
}
// Call PageTableUpdatedFromJit if needed
if (UReg_MSR(msr).DR)
{
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
auto WA = gpr.GetScopedReg();
static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000);
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending));
FixupBranch update_not_pending = CBZ(WA);
ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU());
SetJumpTarget(update_not_pending);
}
}
void JitArm64::MSRUpdated(ARM64Reg msr)
{
constexpr LogicalImm dr_bit(1ULL << UReg_MSR{}.DR.StartBit(), GPRSize::B32);
auto WA = gpr.GetScopedReg();
ARM64Reg XA = EncodeRegTo64(WA);
@ -456,7 +473,7 @@ void JitArm64::MSRUpdated(ARM64Reg msr)
auto& memory = m_system.GetMemory();
MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase());
MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase());
TST(msr, LogicalImm(1 << (31 - 27), GPRSize::B32));
TST(msr, dr_bit);
CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ);
STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr));
@ -470,6 +487,18 @@ void JitArm64::MSRUpdated(ARM64Reg msr)
if (other_feature_flags != 0)
ORR(WA, WA, LogicalImm(other_feature_flags, GPRSize::B32));
STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags));
// Call PageTableUpdatedFromJit if needed
MOV(WA, msr);
gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG);
FixupBranch dr_unset = TBZ(WA, dr_bit);
static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000);
LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending));
FixupBranch update_not_pending = CBZ(WA);
ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU());
SetJumpTarget(update_not_pending);
SetJumpTarget(dr_unset);
}
void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return,

View file

@ -1346,7 +1346,10 @@ void MMU::SDRUpdated()
void MMU::SRUpdated()
{
PageTableUpdated();
if (m_ppc_state.msr.DR)
PageTableUpdated();
else
m_ppc_state.pagetable_update_pending = true;
}
enum class TLBLookupResult
@ -1437,11 +1440,15 @@ void MMU::InvalidateTLBEntry(u32 address)
m_ppc_state.tlb[PowerPC::DATA_TLB_INDEX][entry_index].Invalidate();
m_ppc_state.tlb[PowerPC::INST_TLB_INDEX][entry_index].Invalidate();
PageTableUpdated();
if (m_ppc_state.msr.DR)
PageTableUpdated();
else
m_ppc_state.pagetable_update_pending = true;
}
void MMU::PageTableUpdated()
{
m_ppc_state.pagetable_update_pending = false;
#ifndef _ARCH_32
m_page_mappings.clear();
@ -1556,6 +1563,11 @@ void MMU::PageTableUpdated()
#endif
}
void MMU::PageTableUpdatedFromJit(MMU* mmu)
{
mmu->PageTableUpdated();
}
// Page Address Translation
template <const XCheckTLBFlag flag>
MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress address, bool* wi)

View file

@ -247,6 +247,7 @@ public:
void SRUpdated();
void InvalidateTLBEntry(u32 address);
void PageTableUpdated();
static void PageTableUpdatedFromJit(MMU* mmu);
void DBATUpdated();
void IBATUpdated();

View file

@ -97,6 +97,7 @@ void PowerPCManager::DoState(PointerWrap& p)
p.DoArray(m_ppc_state.tlb);
p.Do(m_ppc_state.pagetable_base);
p.Do(m_ppc_state.pagetable_hashmask);
p.Do(m_ppc_state.pagetable_update_pending);
p.Do(m_ppc_state.reserve);
p.Do(m_ppc_state.reserve_address);
@ -283,6 +284,7 @@ void PowerPCManager::Reset()
{
m_ppc_state.pagetable_base = 0;
m_ppc_state.pagetable_hashmask = 0;
m_ppc_state.pagetable_update_pending = false;
m_ppc_state.tlb = {};
ResetRegisters();
@ -576,7 +578,7 @@ void PowerPCManager::CheckExceptions()
}
m_system.GetJitInterface().UpdateMembase();
MSRUpdated(m_ppc_state);
MSRUpdated(m_system);
}
void PowerPCManager::CheckExternalExceptions()
@ -629,7 +631,7 @@ void PowerPCManager::CheckExternalExceptions()
ERROR_LOG_FMT(POWERPC, "Unknown EXTERNAL INTERRUPT exception: Exceptions == {:08x}",
exceptions);
}
MSRUpdated(m_ppc_state);
MSRUpdated(m_system);
}
m_system.GetJitInterface().UpdateMembase();
@ -689,15 +691,20 @@ void RoundingModeUpdated(PowerPCState& ppc_state)
Common::FPU::SetSIMDMode(ppc_state.fpscr.RN, ppc_state.fpscr.NI);
}
void MSRUpdated(PowerPCState& ppc_state)
void MSRUpdated(Core::System& system)
{
static_assert(UReg_MSR{}.DR.StartBit() == 4);
static_assert(UReg_MSR{}.IR.StartBit() == 5);
static_assert(FEATURE_FLAG_MSR_DR == 1 << 0);
static_assert(FEATURE_FLAG_MSR_IR == 1 << 1);
PowerPCState& ppc_state = system.GetPPCState();
ppc_state.feature_flags = static_cast<CPUEmuFeatureFlags>(
(ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3));
if (ppc_state.msr.DR && ppc_state.pagetable_update_pending)
system.GetMMU().PageTableUpdated();
}
void MMCRUpdated(PowerPCState& ppc_state)

View file

@ -122,6 +122,9 @@ struct PowerPCState
u32 pc = 0; // program counter
u32 npc = 0;
// Storage for the stack pointer of the BLR optimization.
u8* stored_stack_pointer = nullptr;
// gather pipe pointer for JIT access
u8* gather_pipe_ptr = nullptr;
u8* gather_pipe_base_ptr = nullptr;
@ -157,6 +160,14 @@ struct PowerPCState
// lscbx
u16 xer_stringctrl = 0;
// Reservation monitor for lwarx and its friend stwcxd. These two don't really need to be
// this early in the struct, but due to how the padding works out, they fit nicely here.
u32 reserve_address;
bool reserve;
bool pagetable_update_pending = false;
bool m_enable_dcache = false;
#ifdef _M_X86_64
// This member exists only for the purpose of an assertion that its offset <= 0x100.
std::tuple<> above_fits_in_first_0x100;
@ -171,22 +182,15 @@ struct PowerPCState
// JitArm64 needs 64-bit alignment for SPR_TL.
alignas(8) u32 spr[1024]{};
// Storage for the stack pointer of the BLR optimization.
u8* stored_stack_pointer = nullptr;
u8* mem_ptr = nullptr;
std::array<std::array<TLBEntry, TLB_SIZE / TLB_WAYS>, NUM_TLBS> tlb;
u32 pagetable_base = 0;
u32 pagetable_hashmask = 0;
InstructionCache iCache;
bool m_enable_dcache = false;
Cache dCache;
std::array<std::array<TLBEntry, TLB_SIZE / TLB_WAYS>, NUM_TLBS> tlb;
// Reservation monitor for lwarx and its friend stwcxd.
bool reserve;
u32 reserve_address;
InstructionCache iCache;
Cache dCache;
void UpdateCR1()
{
@ -354,7 +358,7 @@ void CheckAndHandleBreakPointsFromJIT(PowerPCManager& power_pc);
#define TU(ppc_state) (ppc_state).spr[SPR_TU]
void RoundingModeUpdated(PowerPCState& ppc_state);
void MSRUpdated(PowerPCState& ppc_state);
void MSRUpdated(Core::System& system);
void MMCRUpdated(PowerPCState& ppc_state);
void RecalculateAllFeatureFlags(PowerPCState& ppc_state);

View file

@ -453,7 +453,7 @@ void RegisterWidget::PopulateTable()
23, 5, RegisterType::msr, "MSR", [this] { return m_system.GetPPCState().msr.Hex; },
[this](u64 value) {
m_system.GetPPCState().msr.Hex = value;
PowerPC::MSRUpdated(m_system.GetPPCState());
PowerPC::MSRUpdated(m_system);
});
// SRR 0-1