From d6405669e312bbec09528ea37f3c43207b9f778b Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 29 Dec 2023 16:19:41 +0100 Subject: [PATCH 1/6] Common/MemArena: Add function for getting page size and alignment --- Source/Core/Common/MemArena.h | 10 ++++++++++ Source/Core/Common/MemArenaAndroid.cpp | 10 ++++++++++ Source/Core/Common/MemArenaDarwin.cpp | 12 ++++++++++++ Source/Core/Common/MemArenaUnix.cpp | 10 ++++++++++ Source/Core/Common/MemArenaWin.cpp | 20 ++++++++++++++++++++ 5 files changed, 62 insertions(+) diff --git a/Source/Core/Common/MemArena.h b/Source/Core/Common/MemArena.h index d9d472079c..32b8999e1f 100644 --- a/Source/Core/Common/MemArena.h +++ b/Source/Core/Common/MemArena.h @@ -115,6 +115,16 @@ public: /// void UnmapFromMemoryRegion(void* view, size_t size); + /// + /// Return the system's page size. + /// + size_t GetPageSize() const; + + /// + /// Return the system's required page alignment. + /// + size_t GetPageAlignment() const; + private: #ifdef _WIN32 WindowsMemoryRegion* EnsureSplitRegionForMapping(void* address, size_t size); diff --git a/Source/Core/Common/MemArenaAndroid.cpp b/Source/Core/Common/MemArenaAndroid.cpp index bcba64e1e1..61aa6cd824 100644 --- a/Source/Core/Common/MemArenaAndroid.cpp +++ b/Source/Core/Common/MemArenaAndroid.cpp @@ -144,6 +144,16 @@ void MemArena::UnmapFromMemoryRegion(void* view, size_t size) NOTICE_LOG_FMT(MEMMAP, "mmap failed"); } +size_t MemArena::GetPageSize() const +{ + return sysconf(_SC_PAGESIZE); +} + +size_t MemArena::GetPageAlignment() const +{ + return GetPageSize(); +} + LazyMemoryRegion::LazyMemoryRegion() = default; LazyMemoryRegion::~LazyMemoryRegion() diff --git a/Source/Core/Common/MemArenaDarwin.cpp b/Source/Core/Common/MemArenaDarwin.cpp index c528e32799..b077a71f2e 100644 --- a/Source/Core/Common/MemArenaDarwin.cpp +++ b/Source/Core/Common/MemArenaDarwin.cpp @@ -3,6 +3,8 @@ #include "Common/MemArena.h" +#include + #include "Common/Assert.h" #include "Common/Logging/Log.h" @@ -163,6 +165,16 @@ void MemArena::UnmapFromMemoryRegion(void* view, size_t size) } } +size_t MemArena::GetPageSize() const +{ + return getpagesize(); +} + +size_t MemArena::GetPageAlignment() const +{ + return GetPageSize(); +} + LazyMemoryRegion::LazyMemoryRegion() = default; LazyMemoryRegion::~LazyMemoryRegion() diff --git a/Source/Core/Common/MemArenaUnix.cpp b/Source/Core/Common/MemArenaUnix.cpp index 83026f76b8..04a81c7db8 100644 --- a/Source/Core/Common/MemArenaUnix.cpp +++ b/Source/Core/Common/MemArenaUnix.cpp @@ -110,6 +110,16 @@ void MemArena::UnmapFromMemoryRegion(void* view, size_t size) NOTICE_LOG_FMT(MEMMAP, "mmap failed"); } +size_t MemArena::GetPageSize() const +{ + return sysconf(_SC_PAGESIZE); +} + +size_t MemArena::GetPageAlignment() const +{ + return GetPageSize(); +} + LazyMemoryRegion::LazyMemoryRegion() = default; LazyMemoryRegion::~LazyMemoryRegion() diff --git a/Source/Core/Common/MemArenaWin.cpp b/Source/Core/Common/MemArenaWin.cpp index 7907979775..06afdb055a 100644 --- a/Source/Core/Common/MemArenaWin.cpp +++ b/Source/Core/Common/MemArenaWin.cpp @@ -438,6 +438,26 @@ void MemArena::UnmapFromMemoryRegion(void* view, size_t size) UnmapViewOfFile(view); } +size_t MemArena::GetPageSize() const +{ + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +} + +size_t MemArena::GetPageAlignment() const +{ + SYSTEM_INFO si; + GetSystemInfo(&si); + if (!m_memory_functions.m_address_MapViewOfFile3) + { + // In this case, we can only map pages that are 64K aligned. + // See https://devblogs.microsoft.com/oldnewthing/20031008-00/?p=42223 + return std::max(si.dwPageSize, 64 * 1024); + } + return si.dwPageSize; +} + LazyMemoryRegion::LazyMemoryRegion() { InitWindowsMemoryFunctions(&m_memory_functions); From ab19d714d2161fec4d191c0796ef7e12d9ad8521 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 08:56:15 +0200 Subject: [PATCH 2/6] Core: Detect SR updates --- Source/Core/Core/PowerPC/GDBStub.cpp | 2 ++ .../Interpreter_SystemRegisters.cpp | 6 +++-- Source/Core/Core/PowerPC/JitArm64/Jit.h | 2 -- .../JitArm64/JitArm64_SystemRegisters.cpp | 26 ------------------- .../Core/PowerPC/JitArm64/JitArm64_Tables.cpp | 24 ++++++++--------- Source/Core/Core/PowerPC/MMU.cpp | 4 +++ Source/Core/Core/PowerPC/MMU.h | 1 + Source/Core/Core/PowerPC/PowerPC.cpp | 6 ----- Source/Core/Core/PowerPC/PowerPC.h | 2 -- .../DolphinQt/Debugger/RegisterWidget.cpp | 6 ++++- 10 files changed, 28 insertions(+), 51 deletions(-) diff --git a/Source/Core/Core/PowerPC/GDBStub.cpp b/Source/Core/Core/PowerPC/GDBStub.cpp index b8b607d592..92204d8be9 100644 --- a/Source/Core/Core/PowerPC/GDBStub.cpp +++ b/Source/Core/Core/PowerPC/GDBStub.cpp @@ -36,6 +36,7 @@ typedef SSIZE_T ssize_t; #include "Core/Host.h" #include "Core/PowerPC/BreakPoints.h" #include "Core/PowerPC/Gekko.h" +#include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PPCCache.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" @@ -648,6 +649,7 @@ static void WriteRegister() else if (id >= 71 && id < 87) { ppc_state.sr[id - 71] = re32hex(bufptr); + system.GetMMU().SRUpdated(); } else if (id >= 88 && id < 104) { diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index a28a968653..e32875cdef 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -203,7 +203,8 @@ void Interpreter::mtsr(Interpreter& interpreter, UGeckoInstruction inst) const u32 index = inst.SR; const u32 value = ppc_state.gpr[inst.RS]; - ppc_state.SetSR(index, value); + ppc_state.sr[index] = value; + interpreter.m_system.GetMMU().SRUpdated(); } void Interpreter::mtsrin(Interpreter& interpreter, UGeckoInstruction inst) @@ -217,7 +218,8 @@ void Interpreter::mtsrin(Interpreter& interpreter, UGeckoInstruction inst) const u32 index = (ppc_state.gpr[inst.RB] >> 28) & 0xF; const u32 value = ppc_state.gpr[inst.RS]; - ppc_state.SetSR(index, value); + ppc_state.sr[index] = value; + interpreter.m_system.GetMMU().SRUpdated(); } void Interpreter::mftb(Interpreter& interpreter, UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index fd5cc4e32e..794889df8e 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -122,9 +122,7 @@ public: void mcrf(UGeckoInstruction inst); void mcrxr(UGeckoInstruction inst); void mfsr(UGeckoInstruction inst); - void mtsr(UGeckoInstruction inst); void mfsrin(UGeckoInstruction inst); - void mtsrin(UGeckoInstruction inst); void twx(UGeckoInstruction inst); void mfspr(UGeckoInstruction inst); void mftb(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 54592951e4..21a0003887 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -291,14 +291,6 @@ void JitArm64::mfsr(UGeckoInstruction inst) LDR(IndexType::Unsigned, gpr.R(inst.RD), PPC_REG, PPCSTATE_OFF_SR(inst.SR)); } -void JitArm64::mtsr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - STR(IndexType::Unsigned, gpr.R(inst.RS), PPC_REG, PPCSTATE_OFF_SR(inst.SR)); -} - void JitArm64::mfsrin(UGeckoInstruction inst) { INSTRUCTION_START @@ -317,24 +309,6 @@ void JitArm64::mfsrin(UGeckoInstruction inst) LDR(RD, addr, ArithOption(EncodeRegTo64(index), true)); } -void JitArm64::mtsrin(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - u32 b = inst.RB, d = inst.RD; - gpr.BindToRegister(d, d == b); - - ARM64Reg RB = gpr.R(b); - ARM64Reg RD = gpr.R(d); - auto index = gpr.GetScopedReg(); - auto addr = gpr.GetScopedReg(); - - UBFM(index, RB, 28, 31); - ADDI2R(EncodeRegTo64(addr), PPC_REG, PPCSTATE_OFF_SR(0), EncodeRegTo64(addr)); - STR(RD, EncodeRegTo64(addr), ArithOption(EncodeRegTo64(index), true)); -} - void JitArm64::twx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp index 0aac345296..1bb02eb5bf 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Tables.cpp @@ -266,18 +266,18 @@ constexpr std::array s_table31{{ {759, &JitArm64::stfXX}, // stfdux {983, &JitArm64::stfXX}, // stfiwx - {19, &JitArm64::mfcr}, // mfcr - {83, &JitArm64::mfmsr}, // mfmsr - {144, &JitArm64::mtcrf}, // mtcrf - {146, &JitArm64::mtmsr}, // mtmsr - {210, &JitArm64::mtsr}, // mtsr - {242, &JitArm64::mtsrin}, // mtsrin - {339, &JitArm64::mfspr}, // mfspr - {467, &JitArm64::mtspr}, // mtspr - {371, &JitArm64::mftb}, // mftb - {512, &JitArm64::mcrxr}, // mcrxr - {595, &JitArm64::mfsr}, // mfsr - {659, &JitArm64::mfsrin}, // mfsrin + {19, &JitArm64::mfcr}, // mfcr + {83, &JitArm64::mfmsr}, // mfmsr + {144, &JitArm64::mtcrf}, // mtcrf + {146, &JitArm64::mtmsr}, // mtmsr + {210, &JitArm64::FallBackToInterpreter}, // mtsr + {242, &JitArm64::FallBackToInterpreter}, // mtsrin + {339, &JitArm64::mfspr}, // mfspr + {467, &JitArm64::mtspr}, // mtspr + {371, &JitArm64::mftb}, // mftb + {512, &JitArm64::mcrxr}, // mcrxr + {595, &JitArm64::mfsr}, // mfsr + {659, &JitArm64::mfsrin}, // mfsrin {4, &JitArm64::twx}, // tw {598, &JitArm64::DoNothing}, // sync diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 7cb9b9ac52..e84765acb8 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1325,6 +1325,10 @@ void MMU::SDRUpdated() m_ppc_state.pagetable_hashmask = ((htabmask << 10) | 0x3ff); } +void MMU::SRUpdated() +{ +} + enum class TLBLookupResult { Found, diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index cee7d3213b..4db2c68247 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -238,6 +238,7 @@ public: // TLB functions void SDRUpdated(); + void SRUpdated(); void InvalidateTLBEntry(u32 address); void DBATUpdated(); void IBATUpdated(); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 3cc1112bd7..1e75d7b5af 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -662,12 +662,6 @@ bool PowerPCManager::CheckAndHandleBreakPoints() return false; } -void PowerPCState::SetSR(u32 index, u32 value) -{ - DEBUG_LOG_FMT(POWERPC, "{:08x}: MMU: Segment register {} set to {:08x}", pc, index, value); - sr[index] = value; -} - // FPSCR update functions void PowerPCState::UpdateFPRFDouble(double dvalue) diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 662507697e..834b1fb432 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -193,8 +193,6 @@ struct PowerPCState cr.SetField(1, (fpscr.FX << 3) | (fpscr.FEX << 2) | (fpscr.VX << 1) | fpscr.OX); } - void SetSR(u32 index, u32 value); - void SetCarry(u32 ca) { xer_ca = ca; } u32 GetCarry() const { return xer_ca; } diff --git a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp index 01ff696d83..774c58abfe 100644 --- a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp +++ b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp @@ -15,6 +15,7 @@ #include "Core/Core.h" #include "Core/Debugger/CodeTrace.h" #include "Core/HW/ProcessorInterface.h" +#include "Core/PowerPC/MMU.h" #include "Core/PowerPC/PowerPC.h" #include "Core/System.h" #include "DolphinQt/Host.h" @@ -405,7 +406,10 @@ void RegisterWidget::PopulateTable() AddRegister( i, 7, RegisterType::sr, "SR" + std::to_string(i), [this, i] { return m_system.GetPPCState().sr[i]; }, - [this, i](u64 value) { m_system.GetPPCState().sr[i] = value; }); + [this, i](u64 value) { + m_system.GetPPCState().sr[i] = value; + m_system.GetMMU().SRUpdated(); + }); } // Special registers From 8d9ea7fb8316b2818d9083359a7bc22a742d1f4f Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 09:17:21 +0200 Subject: [PATCH 3/6] Core: Create fastmem mappings for page address translation Previously we've only been setting up fastmem mappings for block address translation, but now we also do it for page address translation. This increases performance when games access memory using page tables, but decreases performance when games set up page tables. The tlbie instruction is used as an indication that the mappings need to be updated. There are some accuracy downsides: * The TLB is now effectively infinitely large, which matters if games don't use tlbie when modifying page tables. * The R and C bits for page table entries get set pessimistically rather than when the page is actually accessed. No games are known to be broken by these inaccuracies, but unfortunately the second inaccuracy causes a large performance regression in Rogue Squadron 3. You still get the old, more accurate behavior if Enable Write-Back Cache is on. --- Source/Core/Core/HW/Memmap.cpp | 89 ++++++++++++++--- Source/Core/Core/HW/Memmap.h | 10 +- Source/Core/Core/PowerPC/MMU.cpp | 141 ++++++++++++++++++++++++++- Source/Core/Core/PowerPC/MMU.h | 11 +++ Source/Core/Core/PowerPC/PowerPC.cpp | 9 +- Source/Core/Core/State.cpp | 2 +- 6 files changed, 245 insertions(+), 17 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index 675e942009..f51780dfaa 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,9 @@ namespace Memory { -MemoryManager::MemoryManager(Core::System& system) : m_system(system) +MemoryManager::MemoryManager(Core::System& system) + : m_page_size(m_arena.GetPageSize()), m_page_alignment(m_arena.GetPageAlignment()), + m_system(system) { } @@ -233,13 +236,19 @@ bool MemoryManager::InitFastmemArena() return true; } -void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) +void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) { - for (auto& entry : m_logical_mapped_entries) + for (auto& entry : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } - m_logical_mapped_entries.clear(); + m_dbat_mapped_entries.clear(); + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); m_logical_page_mappings.fill(nullptr); @@ -288,13 +297,12 @@ void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); if (!mapped_pointer) { - PanicAlertFmt( - "Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} " - "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", - intersection_start, mapped_size, logical_address); - exit(0); + PanicAlertFmt("Memory::UpdateDBATMappings(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); + continue; } - m_logical_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size}); } m_logical_page_mappings[i] = @@ -305,6 +313,57 @@ void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) } } +void MemoryManager::UpdatePageTableMappings(const std::map& page_mappings) +{ + if (m_page_size > PowerPC::HW_PAGE_SIZE) + return; + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); + + for (const auto [logical_address, translated_address] : page_mappings) + { + if (logical_address % m_page_alignment != 0) + continue; + + constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; + for (const auto& physical_region : m_physical_regions) + { + if (!physical_region.active) + continue; + + u32 mapping_address = physical_region.physical_address; + u32 mapping_end = mapping_address + physical_region.size; + u32 intersection_start = std::max(mapping_address, translated_address); + u32 intersection_end = std::min(mapping_end, translated_address + logical_size); + if (intersection_start < intersection_end) + { + // Found an overlapping region; map it. + if (m_is_fastmem_arena_initialized) + { + u32 position = physical_region.shm_position + intersection_start - mapping_address; + u8* base = m_logical_base + logical_address + intersection_start - translated_address; + u32 mapped_size = intersection_end - intersection_start; + + void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); + if (!mapped_pointer) + { + PanicAlertFmt( + "Memory::UpdatePageTableMappings(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); + continue; + } + m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size}); + } + } + } + } +} + void MemoryManager::DoState(PointerWrap& p) { const u32 current_ram_size = GetRamSize(); @@ -386,11 +445,17 @@ void MemoryManager::ShutdownFastmemArena() m_arena.UnmapFromMemoryRegion(base, region.size); } - for (auto& entry : m_logical_mapped_entries) + for (auto& entry : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } - m_logical_mapped_entries.clear(); + m_dbat_mapped_entries.clear(); + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); m_arena.ReleaseMemoryRegion(); diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index e0708605db..8d224e1ca7 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include @@ -99,7 +100,8 @@ public: void ShutdownFastmemArena(); void DoState(PointerWrap& p); - void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table); + void UpdateDBATMappings(const PowerPC::BatTable& dbat_table); + void UpdatePageTableMappings(const std::map& page_mappings); void Clear(); @@ -207,6 +209,9 @@ private: // The MemArena class Common::MemArena m_arena; + const size_t m_page_size; + const size_t m_page_alignment; + // Dolphin allocates memory to represent four regions: // - 32MB RAM (actually 24MB on hardware), available on GameCube and Wii // - 64MB "EXRAM", RAM only available on Wii @@ -247,7 +252,8 @@ private: // TODO: Do we want to handle the mirrors of the GC RAM? std::array m_physical_regions{}; - std::vector m_logical_mapped_entries; + std::vector m_dbat_mapped_entries; + std::vector m_page_table_mapped_entries; std::array m_physical_page_mappings{}; std::array m_logical_page_mappings{}; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index e84765acb8..63cfa0afa6 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -33,6 +33,7 @@ #include "Common/Align.h" #include "Common/Assert.h" #include "Common/BitUtils.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" @@ -58,6 +59,22 @@ MMU::MMU(Core::System& system, Memory::MemoryManager& memory, PowerPC::PowerPCMa MMU::~MMU() = default; +void MMU::Reset() +{ + m_page_mappings.clear(); +#ifndef _ARCH_32 + m_memory.UpdatePageTableMappings(m_page_mappings); +#endif +} + +void MMU::DoState(PointerWrap& p) +{ + // Instead of storing m_page_mappings in savestates, we *could* recalculate it based on memory + // here in DoState, but this could lead to us getting a more up-to-date set of page mappings + // than we had when the savestate was created, which could be a problem for TAS determinism. + p.Do(m_page_mappings); +} + // Overloaded byteswap functions, for use within the templated functions below. [[maybe_unused]] static u8 bswap(u8 val) { @@ -1323,10 +1340,13 @@ void MMU::SDRUpdated() m_ppc_state.pagetable_base = htaborg << 16; m_ppc_state.pagetable_hashmask = ((htabmask << 10) | 0x3ff); + + PageTableUpdated(); } void MMU::SRUpdated() { + PageTableUpdated(); } enum class TLBLookupResult @@ -1416,6 +1436,124 @@ void MMU::InvalidateTLBEntry(u32 address) m_ppc_state.tlb[PowerPC::DATA_TLB_INDEX][entry_index].Invalidate(); m_ppc_state.tlb[PowerPC::INST_TLB_INDEX][entry_index].Invalidate(); + + PageTableUpdated(); +} + +void MMU::PageTableUpdated() +{ +#ifndef _ARCH_32 + m_page_mappings.clear(); + + if (m_ppc_state.m_enable_dcache) + { + // Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings + // would be a waste of time. Skipping setting up mappings also comes with the bonus of skipping + // the inaccurate behavior of setting the R and C bits of PTE2 as soon as a page is mapped. + return; + } + + const u32 page_table_mask = m_ppc_state.pagetable_hashmask; + const u32 page_table_base = m_ppc_state.pagetable_base; + const u32 page_table_end = (page_table_base | (page_table_mask << 6)) + (1 << 6); + const u32 page_table_size = page_table_end - page_table_base; + + u8* page_table_view = m_system.GetMemory().GetPointerForRange(page_table_base, page_table_size); + if (!page_table_view) + { + WARN_LOG_FMT(POWERPC, "Failed to read page table at {:#010x}-{:#010x}", page_table_base, + page_table_end); + m_memory.UpdatePageTableMappings(m_page_mappings); + return; + } + + const auto read_page_table = [&](u32 H) { + for (u32 i = 0; i <= page_table_mask; ++i) + { + for (u32 j = 0; j < 8; ++j) + { + const u32 pte_addr = (page_table_base | ((i & page_table_mask) << 6)) + j * 8; + + UPTE_Lo pte1(Common::swap32(page_table_view + pte_addr - page_table_base)); + UPTE_Hi pte2(Common::swap32(page_table_view + pte_addr - page_table_base + 4)); + + if (!pte1.V) + continue; + + if (pte1.H != H) + continue; + + // There are quirks related to uncached memory that can't be correctly emulated by fast + // accesses, so we don't map uncached memory. (However, no software at all is known to + // trigger these quirks through page address translation, only through block address + // translation.) + const bool wi = (pte2.WIMG & 0b1100) != 0; + if (wi) + continue; + + // Due to hash masking, the upper bits of page_index_from_hash might not match the actual + // page index. But these bits fully overlap with the API (abbreviated page index), so we can + // overwrite these bits with the API from pte1 and thereby get the correct page index. + // + // In other words: logical_address.API must be written to after logical_address.page_index! + u32 page_index_from_hash = i ^ pte1.VSID; + if (pte1.H) + page_index_from_hash = ~page_index_from_hash; + EffectiveAddress logical_address; + logical_address.offset = 0; + logical_address.page_index = page_index_from_hash; + logical_address.API = pte1.API; + + for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + { + const auto sr = UReg_SR{m_ppc_state.sr[k]}; + if (sr.VSID != pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = k; + + // Block address translation takes priority over page address translation. + if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & + PowerPC::BAT_MAPPED_BIT) + { + continue; + } + + // Fast accesses don't support memchecks, so force slow accesses by removing fastmem + // mappings for all overlapping virtual pages. + constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; + if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, logical_size)) + continue; + + const u32 physical_address = pte2.RPN << 12; + + // Important: This doesn't overwrite anything already present in m_page_mappings. + m_page_mappings.emplace(logical_address.Hex, physical_address); + + // HACK: We set R and C, which indicate whether a page have been read from and written to + // respectively, when a page is mapped rather than when it's actually accessed. The latter + // is probably possible using some fault handling logic, but for now it seems like more + // work than it's worth. + if (!pte2.R || !pte2.C) + { + pte2.R = 1; + pte2.C = 1; + + const u32 pte2_swapped = Common::swap32(pte2.Hex); + std::memcpy(page_table_view + pte_addr - page_table_base + 4, &pte2_swapped, + sizeof(pte2_swapped)); + } + } + } + } + }; + + // We need to read all H=0 PTEs first, because H=0 takes priority over H=1. + read_page_table(0); + read_page_table(1); + + m_memory.UpdatePageTableMappings(m_page_mappings); +#endif } // Page Address Translation @@ -1643,7 +1781,8 @@ void MMU::DBATUpdated() } #ifndef _ARCH_32 - m_memory.UpdateLogicalMemory(m_dbat_table); + m_memory.UpdateDBATMappings(m_dbat_table); + m_memory.UpdatePageTableMappings(m_page_mappings); #endif // IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here. diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 4db2c68247..0b7dd8a668 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -5,12 +5,15 @@ #include #include +#include #include #include #include "Common/BitField.h" #include "Common/CommonTypes.h" +class PointerWrap; + namespace Core { class CPUThreadGuard; @@ -116,6 +119,9 @@ public: MMU& operator=(MMU&& other) = delete; ~MMU(); + void Reset(); + void DoState(PointerWrap& p); + // Routines for debugger UI, cheats, etc. to access emulated memory from the // perspective of the CPU. Not for use by core emulation routines. // Use "Host" prefix. @@ -240,6 +246,7 @@ public: void SDRUpdated(); void SRUpdated(); void InvalidateTLBEntry(u32 address); + void PageTableUpdated(); void DBATUpdated(); void IBATUpdated(); @@ -326,6 +333,10 @@ private: PowerPC::PowerPCManager& m_power_pc; PowerPC::PowerPCState& m_ppc_state; + // STATE_TO_SAVE + std::map m_page_mappings; + // END STATE_TO_SAVE + BatTable m_ibat_table; BatTable m_dbat_table; }; diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 1e75d7b5af..255ad7d0be 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -105,6 +105,9 @@ void PowerPCManager::DoState(PointerWrap& p) m_ppc_state.iCache.DoState(memory, p); m_ppc_state.dCache.DoState(memory, p); + auto& mmu = m_system.GetMMU(); + mmu.DoState(p); + if (p.IsReadMode()) { if (!m_ppc_state.m_enable_dcache) @@ -116,7 +119,6 @@ void PowerPCManager::DoState(PointerWrap& p) RoundingModeUpdated(m_ppc_state); RecalculateAllFeatureFlags(m_ppc_state); - auto& mmu = m_system.GetMMU(); mmu.IBATUpdated(); mmu.DBATUpdated(); } @@ -253,6 +255,10 @@ void PowerPCManager::RefreshConfig() { INFO_LOG_FMT(POWERPC, "Flushing data cache"); m_ppc_state.dCache.FlushAll(m_system.GetMemory()); + + // No page table mappings are created when accurate dcache emulation is enabled. + // If there are any that can be created, let's create them now. + m_system.GetMMU().PageTableUpdated(); } } @@ -282,6 +288,7 @@ void PowerPCManager::Reset() ResetRegisters(); m_ppc_state.iCache.Reset(m_system.GetJitInterface()); m_ppc_state.dCache.Reset(); + m_system.GetMMU().Reset(); } void PowerPCManager::ScheduleInvalidateCacheThreadSafe(u32 address) diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index a947ad19a3..76bab5c634 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -95,7 +95,7 @@ static size_t s_state_writes_in_queue; static std::condition_variable s_state_write_queue_is_empty; // Don't forget to increase this after doing changes on the savestate system -constexpr u32 STATE_VERSION = 175; // Last changed in PR 13751 +constexpr u32 STATE_VERSION = 176; // Last changed in PR 13768 // Increase this if the StateExtendedHeader definition changes constexpr u32 EXTENDED_HEADER_VERSION = 1; // Last changed in PR 12217 From dac13baade0c2622aad7ded64e21506797387e00 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 09:16:55 +0200 Subject: [PATCH 4/6] Core: Postpone page table updates when DR isn't set Page table mappings are only used when DR is set, so if page tables are updated when DR isn't set, we can wait with updating page table mappings until DR gets set. This lets us batch page table updates in the Disney Trio of Destruction, improving performance when the games are loading data. It doesn't help much for GameCube games, because those run tlbie with DR set. The PowerPCState struct has had its members slightly reordered. I had to put pagetable_update_pending less than 4 KiB from the start so AArch64's LDRB (immediate) can access it, and that gave me an opportunity to move around some other members to cut down on padding. --- Source/Core/Core/Boot/Boot.cpp | 4 +-- Source/Core/Core/Boot/Boot.h | 2 +- Source/Core/Core/Boot/Boot_BS2Emu.cpp | 9 +++--- Source/Core/Core/FifoPlayer/FifoPlayer.cpp | 2 +- Source/Core/Core/IOS/MIOS.cpp | 2 +- Source/Core/Core/PowerPC/Expression.cpp | 2 +- Source/Core/Core/PowerPC/GDBStub.cpp | 2 +- .../Interpreter/Interpreter_Branch.cpp | 2 +- .../Interpreter_SystemRegisters.cpp | 2 +- Source/Core/Core/PowerPC/Jit64/Jit.cpp | 23 +++++++++++++- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 13 +++++--- Source/Core/Core/PowerPC/JitArm64/Jit.cpp | 31 ++++++++++++++++++- Source/Core/Core/PowerPC/MMU.cpp | 16 ++++++++-- Source/Core/Core/PowerPC/MMU.h | 1 + Source/Core/Core/PowerPC/PowerPC.cpp | 13 ++++++-- Source/Core/Core/PowerPC/PowerPC.h | 26 +++++++++------- .../DolphinQt/Debugger/RegisterWidget.cpp | 2 +- 17 files changed, 115 insertions(+), 37 deletions(-) diff --git a/Source/Core/Core/Boot/Boot.cpp b/Source/Core/Core/Boot/Boot.cpp index e43ffc52d4..7509324449 100644 --- a/Source/Core/Core/Boot/Boot.cpp +++ b/Source/Core/Core/Boot/Boot.cpp @@ -460,7 +460,7 @@ bool CBoot::Load_BS2(Core::System& system, const std::string& boot_rom_filename) ppc_state.pc = 0x81200150; - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); return true; } @@ -530,7 +530,7 @@ bool CBoot::BootUp(Core::System& system, const Core::CPUThreadGuard& guard, auto& ppc_state = system.GetPPCState(); - SetupMSR(ppc_state); + SetupMSR(system); SetupHID(ppc_state, system.IsWii()); SetupBAT(system, system.IsWii()); CopyDefaultExceptionHandlers(system); diff --git a/Source/Core/Core/Boot/Boot.h b/Source/Core/Core/Boot/Boot.h index f892ee7c75..087245ee0f 100644 --- a/Source/Core/Core/Boot/Boot.h +++ b/Source/Core/Core/Boot/Boot.h @@ -169,7 +169,7 @@ private: static bool Boot_WiiWAD(Core::System& system, const DiscIO::VolumeWAD& wad); static bool BootNANDTitle(Core::System& system, u64 title_id); - static void SetupMSR(PowerPC::PowerPCState& ppc_state); + static void SetupMSR(Core::System& system); static void SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii); static void SetupBAT(Core::System& system, bool is_wii); static bool RunApploader(Core::System& system, const Core::CPUThreadGuard& guard, bool is_wii, diff --git a/Source/Core/Core/Boot/Boot_BS2Emu.cpp b/Source/Core/Core/Boot/Boot_BS2Emu.cpp index 884a11bd87..4c37069a2e 100644 --- a/Source/Core/Core/Boot/Boot_BS2Emu.cpp +++ b/Source/Core/Core/Boot/Boot_BS2Emu.cpp @@ -68,14 +68,15 @@ void CBoot::RunFunction(Core::System& system, u32 address) power_pc.SingleStep(); } -void CBoot::SetupMSR(PowerPC::PowerPCState& ppc_state) +void CBoot::SetupMSR(Core::System& system) { + PowerPC::PowerPCState& ppc_state = system.GetPPCState(); // 0x0002032 ppc_state.msr.RI = 1; ppc_state.msr.DR = 1; ppc_state.msr.IR = 1; ppc_state.msr.FP = 1; - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); } void CBoot::SetupHID(PowerPC::PowerPCState& ppc_state, bool is_wii) @@ -286,7 +287,7 @@ bool CBoot::EmulatedBS2_GC(Core::System& system, const Core::CPUThreadGuard& gua auto& ppc_state = system.GetPPCState(); - SetupMSR(ppc_state); + SetupMSR(system); SetupHID(ppc_state, /*is_wii*/ false); SetupBAT(system, /*is_wii*/ false); @@ -593,7 +594,7 @@ bool CBoot::EmulatedBS2_Wii(Core::System& system, const Core::CPUThreadGuard& gu auto& ppc_state = system.GetPPCState(); - SetupMSR(ppc_state); + SetupMSR(system); SetupHID(ppc_state, /*is_wii*/ true); SetupBAT(system, /*is_wii*/ true); diff --git a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp index bb1ae839f5..e9cafcd6a6 100644 --- a/Source/Core/Core/FifoPlayer/FifoPlayer.cpp +++ b/Source/Core/Core/FifoPlayer/FifoPlayer.cpp @@ -650,7 +650,7 @@ void FifoPlayer::LoadMemory() HID4(ppc_state).SBE = 1; } - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(m_system); auto& mmu = m_system.GetMMU(); mmu.DBATUpdated(); diff --git a/Source/Core/Core/IOS/MIOS.cpp b/Source/Core/Core/IOS/MIOS.cpp index d47400c9d2..fb0ce67f67 100644 --- a/Source/Core/Core/IOS/MIOS.cpp +++ b/Source/Core/Core/IOS/MIOS.cpp @@ -89,7 +89,7 @@ bool Load(Core::System& system) PowerPC::PowerPCState& ppc_state = power_pc.GetPPCState(); ppc_state.msr.Hex = 0; ppc_state.pc = 0x3400; - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); NOTICE_LOG_FMT(IOS, "Loaded MIOS and bootstrapped PPC."); diff --git a/Source/Core/Core/PowerPC/Expression.cpp b/Source/Core/Core/PowerPC/Expression.cpp index bd97e8a8b1..315faa9104 100644 --- a/Source/Core/Core/PowerPC/Expression.cpp +++ b/Source/Core/Core/PowerPC/Expression.cpp @@ -469,7 +469,7 @@ void Expression::SynchronizeBindings(Core::System& system, SynchronizeDirection else { ppc_state.msr.Hex = static_cast(static_cast(v->value)); - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); } break; } diff --git a/Source/Core/Core/PowerPC/GDBStub.cpp b/Source/Core/Core/PowerPC/GDBStub.cpp index 92204d8be9..a20090d9c9 100644 --- a/Source/Core/Core/PowerPC/GDBStub.cpp +++ b/Source/Core/Core/PowerPC/GDBStub.cpp @@ -664,7 +664,7 @@ static void WriteRegister() break; case 65: ppc_state.msr.Hex = re32hex(bufptr); - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(system); break; case 66: ppc_state.cr.Set(re32hex(bufptr)); diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp index 1d57d971b4..120482a4f6 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_Branch.cpp @@ -161,7 +161,7 @@ void Interpreter::rfi(Interpreter& interpreter, UGeckoInstruction inst) // set NPC to saved offset and resume ppc_state.npc = SRR0(ppc_state); - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(interpreter.m_system); interpreter.m_end_block = true; } diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index e32875cdef..26876537f4 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -181,7 +181,7 @@ void Interpreter::mtmsr(Interpreter& interpreter, UGeckoInstruction inst) ppc_state.msr.Hex = ppc_state.gpr[inst.RS]; - PowerPC::MSRUpdated(ppc_state); + PowerPC::MSRUpdated(interpreter.m_system); // FE0/FE1 may have been set CheckFPExceptions(ppc_state); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.cpp b/Source/Core/Core/PowerPC/Jit64/Jit.cpp index b09279d458..5199dc8a1e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit.cpp @@ -506,6 +506,8 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) { ASSERT(!msr.IsSimpleReg(scratch_reg)); + constexpr u32 dr_bit = 1 << UReg_MSR{}.DR.StartBit(); + // Update mem_ptr auto& memory = m_system.GetMemory(); if (msr.IsImm()) @@ -517,7 +519,7 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) { MOV(64, R(RMEM), ImmPtr(memory.GetLogicalBase())); MOV(64, R(scratch_reg), ImmPtr(memory.GetPhysicalBase())); - TEST(32, msr, Imm32(1 << (31 - 27))); + TEST(32, msr, Imm32(dr_bit)); CMOVcc(64, RMEM, R(scratch_reg), CC_Z); } MOV(64, PPCSTATE(mem_ptr), R(RMEM)); @@ -541,6 +543,25 @@ void Jit64::MSRUpdated(const OpArg& msr, X64Reg scratch_reg) OR(32, R(scratch_reg), Imm32(other_feature_flags)); MOV(32, PPCSTATE(feature_flags), R(scratch_reg)); } + + // Call PageTableUpdatedFromJit if needed + if (!msr.IsImm() || UReg_MSR(msr.Imm32()).DR) + { + gpr.Flush(); + fpr.Flush(); + FixupBranch dr_unset; + if (!msr.IsImm()) + { + TEST(32, msr, Imm32(dr_bit)); + dr_unset = J_CC(CC_Z); + } + CMP(8, PPCSTATE(pagetable_update_pending), Imm8(0)); + FixupBranch update_not_pending = J_CC(CC_E); + ABI_CallFunctionP(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + if (!msr.IsImm()) + SetJumpTarget(dr_unset); + } } void Jit64::WriteExit(u32 destination, bool bl, u32 after) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 6331358359..c485e39517 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -436,11 +436,14 @@ void Jit64::mtmsr(UGeckoInstruction inst) FALLBACK_IF(jo.fp_exceptions); { - RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); - RegCache::Realize(Rs); - MOV(32, PPCSTATE(msr), Rs); - - MSRUpdated(Rs, RSCRATCH2); + OpArg Rs_op_arg; + { + RCOpArg Rs = gpr.BindOrImm(inst.RS, RCMode::Read); + RegCache::Realize(Rs); + MOV(32, PPCSTATE(msr), Rs); + Rs_op_arg = Rs; + } + MSRUpdated(Rs_op_arg, RSCRATCH2); } gpr.Flush(); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 65a452ceca..b648510672 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -445,10 +445,27 @@ void JitArm64::MSRUpdated(u32 msr) MOVI2R(WA, feature_flags); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); } + + // Call PageTableUpdatedFromJit if needed + if (UReg_MSR(msr).DR) + { + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + + auto WA = gpr.GetScopedReg(); + + static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending)); + FixupBranch update_not_pending = CBZ(WA); + ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + } } void JitArm64::MSRUpdated(ARM64Reg msr) { + constexpr LogicalImm dr_bit(1ULL << UReg_MSR{}.DR.StartBit(), GPRSize::B32); + auto WA = gpr.GetScopedReg(); ARM64Reg XA = EncodeRegTo64(WA); @@ -456,7 +473,7 @@ void JitArm64::MSRUpdated(ARM64Reg msr) auto& memory = m_system.GetMemory(); MOVP2R(MEM_REG, jo.fastmem ? memory.GetLogicalBase() : memory.GetLogicalPageMappingsBase()); MOVP2R(XA, jo.fastmem ? memory.GetPhysicalBase() : memory.GetPhysicalPageMappingsBase()); - TST(msr, LogicalImm(1 << (31 - 27), GPRSize::B32)); + TST(msr, dr_bit); CSEL(MEM_REG, MEM_REG, XA, CCFlags::CC_NEQ); STR(IndexType::Unsigned, MEM_REG, PPC_REG, PPCSTATE_OFF(mem_ptr)); @@ -470,6 +487,18 @@ void JitArm64::MSRUpdated(ARM64Reg msr) if (other_feature_flags != 0) ORR(WA, WA, LogicalImm(other_feature_flags, GPRSize::B32)); STR(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(feature_flags)); + + // Call PageTableUpdatedFromJit if needed + MOV(WA, msr); + gpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + fpr.Flush(FlushMode::All, ARM64Reg::INVALID_REG); + FixupBranch dr_unset = TBZ(WA, dr_bit); + static_assert(PPCSTATE_OFF(pagetable_update_pending) < 0x1000); + LDRB(IndexType::Unsigned, WA, PPC_REG, PPCSTATE_OFF(pagetable_update_pending)); + FixupBranch update_not_pending = CBZ(WA); + ABI_CallFunction(&PowerPC::MMU::PageTableUpdatedFromJit, &m_system.GetMMU()); + SetJumpTarget(update_not_pending); + SetJumpTarget(dr_unset); } void JitArm64::WriteExit(u32 destination, bool LK, u32 exit_address_after_return, diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 63cfa0afa6..fc89187b4e 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1346,7 +1346,10 @@ void MMU::SDRUpdated() void MMU::SRUpdated() { - PageTableUpdated(); + if (m_ppc_state.msr.DR) + PageTableUpdated(); + else + m_ppc_state.pagetable_update_pending = true; } enum class TLBLookupResult @@ -1437,11 +1440,15 @@ void MMU::InvalidateTLBEntry(u32 address) m_ppc_state.tlb[PowerPC::DATA_TLB_INDEX][entry_index].Invalidate(); m_ppc_state.tlb[PowerPC::INST_TLB_INDEX][entry_index].Invalidate(); - PageTableUpdated(); + if (m_ppc_state.msr.DR) + PageTableUpdated(); + else + m_ppc_state.pagetable_update_pending = true; } void MMU::PageTableUpdated() { + m_ppc_state.pagetable_update_pending = false; #ifndef _ARCH_32 m_page_mappings.clear(); @@ -1556,6 +1563,11 @@ void MMU::PageTableUpdated() #endif } +void MMU::PageTableUpdatedFromJit(MMU* mmu) +{ + mmu->PageTableUpdated(); +} + // Page Address Translation template MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress address, bool* wi) diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 0b7dd8a668..5293de56b0 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -247,6 +247,7 @@ public: void SRUpdated(); void InvalidateTLBEntry(u32 address); void PageTableUpdated(); + static void PageTableUpdatedFromJit(MMU* mmu); void DBATUpdated(); void IBATUpdated(); diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 255ad7d0be..ca83370577 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -97,6 +97,7 @@ void PowerPCManager::DoState(PointerWrap& p) p.DoArray(m_ppc_state.tlb); p.Do(m_ppc_state.pagetable_base); p.Do(m_ppc_state.pagetable_hashmask); + p.Do(m_ppc_state.pagetable_update_pending); p.Do(m_ppc_state.reserve); p.Do(m_ppc_state.reserve_address); @@ -283,6 +284,7 @@ void PowerPCManager::Reset() { m_ppc_state.pagetable_base = 0; m_ppc_state.pagetable_hashmask = 0; + m_ppc_state.pagetable_update_pending = false; m_ppc_state.tlb = {}; ResetRegisters(); @@ -576,7 +578,7 @@ void PowerPCManager::CheckExceptions() } m_system.GetJitInterface().UpdateMembase(); - MSRUpdated(m_ppc_state); + MSRUpdated(m_system); } void PowerPCManager::CheckExternalExceptions() @@ -629,7 +631,7 @@ void PowerPCManager::CheckExternalExceptions() ERROR_LOG_FMT(POWERPC, "Unknown EXTERNAL INTERRUPT exception: Exceptions == {:08x}", exceptions); } - MSRUpdated(m_ppc_state); + MSRUpdated(m_system); } m_system.GetJitInterface().UpdateMembase(); @@ -689,15 +691,20 @@ void RoundingModeUpdated(PowerPCState& ppc_state) Common::FPU::SetSIMDMode(ppc_state.fpscr.RN, ppc_state.fpscr.NI); } -void MSRUpdated(PowerPCState& ppc_state) +void MSRUpdated(Core::System& system) { static_assert(UReg_MSR{}.DR.StartBit() == 4); static_assert(UReg_MSR{}.IR.StartBit() == 5); static_assert(FEATURE_FLAG_MSR_DR == 1 << 0); static_assert(FEATURE_FLAG_MSR_IR == 1 << 1); + PowerPCState& ppc_state = system.GetPPCState(); + ppc_state.feature_flags = static_cast( (ppc_state.feature_flags & FEATURE_FLAG_PERFMON) | ((ppc_state.msr.Hex >> 4) & 0x3)); + + if (ppc_state.msr.DR && ppc_state.pagetable_update_pending) + system.GetMMU().PageTableUpdated(); } void MMCRUpdated(PowerPCState& ppc_state) diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 834b1fb432..7f931c2b8f 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -122,6 +122,9 @@ struct PowerPCState u32 pc = 0; // program counter u32 npc = 0; + // Storage for the stack pointer of the BLR optimization. + u8* stored_stack_pointer = nullptr; + // gather pipe pointer for JIT access u8* gather_pipe_ptr = nullptr; u8* gather_pipe_base_ptr = nullptr; @@ -157,6 +160,14 @@ struct PowerPCState // lscbx u16 xer_stringctrl = 0; + // Reservation monitor for lwarx and its friend stwcxd. These two don't really need to be + // this early in the struct, but due to how the padding works out, they fit nicely here. + u32 reserve_address; + bool reserve; + + bool pagetable_update_pending = false; + bool m_enable_dcache = false; + #ifdef _M_X86_64 // This member exists only for the purpose of an assertion that its offset <= 0x100. std::tuple<> above_fits_in_first_0x100; @@ -171,22 +182,15 @@ struct PowerPCState // JitArm64 needs 64-bit alignment for SPR_TL. alignas(8) u32 spr[1024]{}; - // Storage for the stack pointer of the BLR optimization. - u8* stored_stack_pointer = nullptr; u8* mem_ptr = nullptr; - std::array, NUM_TLBS> tlb; - u32 pagetable_base = 0; u32 pagetable_hashmask = 0; - InstructionCache iCache; - bool m_enable_dcache = false; - Cache dCache; + std::array, NUM_TLBS> tlb; - // Reservation monitor for lwarx and its friend stwcxd. - bool reserve; - u32 reserve_address; + InstructionCache iCache; + Cache dCache; void UpdateCR1() { @@ -354,7 +358,7 @@ void CheckAndHandleBreakPointsFromJIT(PowerPCManager& power_pc); #define TU(ppc_state) (ppc_state).spr[SPR_TU] void RoundingModeUpdated(PowerPCState& ppc_state); -void MSRUpdated(PowerPCState& ppc_state); +void MSRUpdated(Core::System& system); void MMCRUpdated(PowerPCState& ppc_state); void RecalculateAllFeatureFlags(PowerPCState& ppc_state); diff --git a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp index 774c58abfe..8ebfd0241e 100644 --- a/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp +++ b/Source/Core/DolphinQt/Debugger/RegisterWidget.cpp @@ -453,7 +453,7 @@ void RegisterWidget::PopulateTable() 23, 5, RegisterType::msr, "MSR", [this] { return m_system.GetPPCState().msr.Hex; }, [this](u64 value) { m_system.GetPPCState().msr.Hex = value; - PowerPC::MSRUpdated(m_system.GetPPCState()); + PowerPC::MSRUpdated(m_system); }); // SRR 0-1 From d3cfa293e918d73bc3a872551dbf6dda422dfeb0 Mon Sep 17 00:00:00 2001 From: JosJuice Date: Fri, 20 Jun 2025 16:51:30 +0200 Subject: [PATCH 5/6] Core: Update page table mappings incrementally Removing and readding every page table mapping every time something changes in the page table is very slow. Instead, let's generate a diff and ask Memmap to update only the diff. --- Source/Core/Core/HW/Memmap.cpp | 46 +- Source/Core/Core/HW/Memmap.h | 6 +- .../Core/PowerPC/JitArm64/JitArm64_RegCache.h | 12 +- Source/Core/Core/PowerPC/MMU.cpp | 427 ++++++++++++++---- Source/Core/Core/PowerPC/MMU.h | 41 +- Source/Core/Core/PowerPC/PowerPC.cpp | 11 +- Source/Core/Core/PowerPC/PowerPC.h | 2 +- 7 files changed, 439 insertions(+), 106 deletions(-) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index f51780dfaa..2241ef75d6 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -244,11 +245,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) } m_dbat_mapped_entries.clear(); - for (auto& entry : m_page_table_mapped_entries) - { - m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); - } - m_page_table_mapped_entries.clear(); + RemoveAllPageTableMappings(); m_logical_page_mappings.fill(nullptr); @@ -302,7 +299,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) intersection_start, mapped_size, logical_address); continue; } - m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); } m_logical_page_mappings[i] = @@ -313,18 +310,12 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) } } -void MemoryManager::UpdatePageTableMappings(const std::map& page_mappings) +void MemoryManager::AddPageTableMappings(const std::map& mappings) { if (m_page_size > PowerPC::HW_PAGE_SIZE) return; - for (auto& entry : m_page_table_mapped_entries) - { - m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); - } - m_page_table_mapped_entries.clear(); - - for (const auto [logical_address, translated_address] : page_mappings) + for (const auto [logical_address, translated_address] : mappings) { if (logical_address % m_page_alignment != 0) continue; @@ -357,13 +348,38 @@ void MemoryManager::UpdatePageTableMappings(const std::map& page_mappi intersection_start, mapped_size, logical_address); continue; } - m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); } } } } } +void MemoryManager::RemovePageTableMappings(const std::set& mappings) +{ + if (m_page_size > PowerPC::HW_PAGE_SIZE) + return; + + if (mappings.empty()) + return; + + std::erase_if(m_page_table_mapped_entries, [this, &mappings](const LogicalMemoryView& entry) { + const bool remove = mappings.contains(entry.logical_address); + if (remove) + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + return remove; + }); +} + +void MemoryManager::RemoveAllPageTableMappings() +{ + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); +} + void MemoryManager::DoState(PointerWrap& p) { const u32 current_ram_size = GetRamSize(); diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 8d224e1ca7..8941474f31 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ struct LogicalMemoryView { void* mapped_pointer; u32 mapped_size; + u32 logical_address; }; class MemoryManager @@ -101,7 +103,9 @@ public: void DoState(PointerWrap& p); void UpdateDBATMappings(const PowerPC::BatTable& dbat_table); - void UpdatePageTableMappings(const std::map& page_mappings); + void AddPageTableMappings(const std::map& mappings); + void RemovePageTableMappings(const std::set& mappings); + void RemoveAllPageTableMappings(); void Clear(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 5164745cd8..11b70ceb03 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -33,19 +33,19 @@ constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::ARM64Reg::W26; PowerPC::PowerPCState, elem); \ _Pragma("GCC diagnostic pop") \ }()) +#else +#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) +#endif #define PPCSTATE_OFF_ARRAY(elem, i) \ (PPCSTATE_OFF(elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) -#else -#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) -#define PPCSTATE_OFF_ARRAY(elem, i) \ - (offsetof(PowerPC::PowerPCState, elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) -#endif +#define PPCSTATE_OFF_STD_ARRAY(elem, i) \ + (PPCSTATE_OFF(elem) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) #define PPCSTATE_OFF_GPR(i) PPCSTATE_OFF_ARRAY(gpr, i) #define PPCSTATE_OFF_CR(i) PPCSTATE_OFF_ARRAY(cr.fields, i) -#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_ARRAY(sr, i) +#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_STD_ARRAY(sr, i) #define PPCSTATE_OFF_SPR(i) PPCSTATE_OFF_ARRAY(spr, i) static_assert(std::is_same_v); diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index fc89187b4e..72f2bc9038 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -25,10 +25,17 @@ #include "Core/PowerPC/MMU.h" +#include #include #include #include +#include #include +#include + +#ifdef _M_X86_64 +#include +#endif #include "Common/Align.h" #include "Common/Assert.h" @@ -61,18 +68,37 @@ MMU::~MMU() = default; void MMU::Reset() { + m_page_table.clear(); m_page_mappings.clear(); #ifndef _ARCH_32 - m_memory.UpdatePageTableMappings(m_page_mappings); + m_memory.RemoveAllPageTableMappings(); #endif } -void MMU::DoState(PointerWrap& p) +void MMU::DoState(PointerWrap& p, bool sr_changed) { - // Instead of storing m_page_mappings in savestates, we *could* recalculate it based on memory + // Instead of storing m_page_table in savestates, we *could* refetch it from memory // here in DoState, but this could lead to us getting a more up-to-date set of page mappings // than we had when the savestate was created, which could be a problem for TAS determinism. - p.Do(m_page_mappings); + if (p.IsReadMode()) + { + if (sr_changed) + { + // Non-incremental update of page table mappings. + p.Do(m_page_table); + SRUpdated(); + } + else + { + // Incremental update of page table mappings. + p.Do(m_temp_page_table); + PageTableUpdated(m_temp_page_table); + } + } + else + { + p.Do(m_page_table); + } } // Overloaded byteswap functions, for use within the templated functions below. @@ -1346,10 +1372,12 @@ void MMU::SDRUpdated() void MMU::SRUpdated() { - if (m_ppc_state.msr.DR) - PageTableUpdated(); - else - m_ppc_state.pagetable_update_pending = true; +#ifndef _ARCH_32 + // Our incremental handling of page table updates can't handle SR changing, so throw away all + // existing mappings and then reparse the whole page table. + m_memory.RemoveAllPageTableMappings(); + ReloadPageTable(); +#endif } enum class TLBLookupResult @@ -1449,9 +1477,13 @@ void MMU::InvalidateTLBEntry(u32 address) void MMU::PageTableUpdated() { m_ppc_state.pagetable_update_pending = false; -#ifndef _ARCH_32 - m_page_mappings.clear(); +#ifdef _ARCH_32 + // If a savestate is brought from a 64-bit system to a 32-bit system, clear m_page_table. + // Not doing this means a stale m_page_table would stick around, which could be a problem + // if the savestate is then brought to a 64-bit system again. + m_page_table.clear(); +#else if (m_ppc_state.m_enable_dcache) { // Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings @@ -1460,9 +1492,8 @@ void MMU::PageTableUpdated() return; } - const u32 page_table_mask = m_ppc_state.pagetable_hashmask; const u32 page_table_base = m_ppc_state.pagetable_base; - const u32 page_table_end = (page_table_base | (page_table_mask << 6)) + (1 << 6); + const u32 page_table_end = (page_table_base | (m_ppc_state.pagetable_hashmask << 6)) + (1 << 6); const u32 page_table_size = page_table_end - page_table_base; u8* page_table_view = m_system.GetMemory().GetPointerForRange(page_table_base, page_table_size); @@ -1470,98 +1501,332 @@ void MMU::PageTableUpdated() { WARN_LOG_FMT(POWERPC, "Failed to read page table at {:#010x}-{:#010x}", page_table_base, page_table_end); - m_memory.UpdatePageTableMappings(m_page_mappings); + + // Remove host mappings, because we no longer know if they're up to date. + m_memory.RemoveAllPageTableMappings(); + + // Because we removed host mappings, incremental updates won't work correctly. + // Start over from scratch. + m_page_table.clear(); + m_page_mappings.clear(); return; } - const auto read_page_table = [&](u32 H) { - for (u32 i = 0; i <= page_table_mask; ++i) + PageTableUpdated(std::span(page_table_view, page_table_size)); +#endif +} + +#ifndef _ARCH_32 +void MMU::ReloadPageTable() +{ + m_page_mappings.clear(); + + m_temp_page_table.clear(); + std::swap(m_page_table, m_temp_page_table); + PageTableUpdated(m_temp_page_table); +} + +void MMU::PageTableUpdated(std::span page_table) +{ + // PowerPC's priority order for PTEs that have the same logical adress is as follows: + // + // * Primary PTEs (H=0) take priority over secondary PTEs (H=1). + // * If two PTEs have equal H values, they must be in the same PTEG due to how the hash + // incorporates the logical address and H. The PTE located first in the PTEG takes priority. + + m_removed_mappings.clear(); + m_added_mappings.clear(); + + if (m_page_table.size() != page_table.size()) + { + m_page_table.clear(); + m_page_table.resize(page_table.size()); + } + + u8* old_page_table = m_page_table.data(); + u8* new_page_table = page_table.data(); + + constexpr auto compare_64_bytes = [](const u8* a, const u8* b) -> bool { +#ifdef _M_X86_64 + // MSVC (x64) doesn't want to optimize the memcmp call. This 64-byte compare is performance + // critical in certain games like Spider-Man 2, so let's use our own vectorized version + // instead. + const __m128i a1 = _mm_load_si128(reinterpret_cast(a)); + const __m128i b1 = _mm_load_si128(reinterpret_cast(b)); + const __m128i cmp1 = _mm_cmpeq_epi8(a1, b1); + const __m128i a2 = _mm_load_si128(reinterpret_cast(a + 0x10)); + const __m128i b2 = _mm_load_si128(reinterpret_cast(b + 0x10)); + const __m128i cmp2 = _mm_cmpeq_epi8(a2, b2); + const __m128i cmp12 = _mm_and_si128(cmp1, cmp2); + const __m128i a3 = _mm_load_si128(reinterpret_cast(a + 0x20)); + const __m128i b3 = _mm_load_si128(reinterpret_cast(b + 0x20)); + const __m128i cmp3 = _mm_cmpeq_epi8(a3, b3); + const __m128i a4 = _mm_load_si128(reinterpret_cast(a + 0x30)); + const __m128i b4 = _mm_load_si128(reinterpret_cast(b + 0x30)); + const __m128i cmp4 = _mm_cmpeq_epi8(a4, b4); + const __m128i cmp34 = _mm_and_si128(cmp3, cmp4); + const __m128i cmp1234 = _mm_and_si128(cmp12, cmp34); + return _mm_movemask_epi8(cmp1234) == 0xFFFF; +#else + return std::memcmp(std::assume_aligned<16>(a), std::assume_aligned<16>(b), 64) == 0; +#endif + }; + + constexpr auto get_page_index = [](UPTE_Lo pte1, u32 hash) { + u32 page_index_from_hash = hash ^ pte1.VSID; + if (pte1.H) + page_index_from_hash = ~page_index_from_hash; + + // Due to hash masking, the upper bits of page_index_from_hash might not match the actual + // page index. But these bits fully overlap with the API (abbreviated page index), so we can + // overwrite these bits with the API from pte1 and thereby get the correct page index. + // + // In other words: logical_address.API must be written to after logical_address.page_index! + + EffectiveAddress logical_address; + logical_address.offset = 0; + logical_address.page_index = page_index_from_hash; + logical_address.API = pte1.API; + return logical_address; + }; + + const auto fixup_shadowed_mappings = [this, &get_page_index, old_page_table, new_page_table]( + UPTE_Lo pte1, u32 page_table_offset, bool* run_pass_2) { + DEBUG_ASSERT(pte1.V == 1); + + bool switched_to_secondary = false; + + while (true) { - for (u32 j = 0; j < 8; ++j) + const u32 big_endian_pte1 = Common::swap32(pte1.Hex); + const u32 pteg_end = Common::AlignUp(page_table_offset, 64); + for (u32 i = page_table_offset; i < pteg_end; i += 8) { - const u32 pte_addr = (page_table_base | ((i & page_table_mask) << 6)) + j * 8; + if (std::memcmp(new_page_table + i, &big_endian_pte1, sizeof(big_endian_pte1)) == 0) + { + // We've found a PTE that has V set and has the same logical address as the passed-in PTE. + // The found PTE was previously skipped over because the passed-in PTE had priority, but + // the passed-in PTE is being changed, so now we need to re-check the found PTE. This will + // happen naturally later in the loop that's calling this function, but only if the 8-byte + // memcmp reports that the PTE has changed. Therefore, if the PTE currently compares + // equal, change an unused bit in the PTE. + if (std::memcmp(old_page_table + i, new_page_table + i, 8) == 0) + { + UPTE_Hi pte2(Common::swap32(old_page_table + i + 4)); + pte2.reserved_1 = pte2.reserved_1 ^ 1; + const u32 value = Common::swap32(pte2.Hex); + std::memcpy(old_page_table + i + 4, &value, sizeof(value)); - UPTE_Lo pte1(Common::swap32(page_table_view + pte_addr - page_table_base)); - UPTE_Hi pte2(Common::swap32(page_table_view + pte_addr - page_table_base + 4)); + if (switched_to_secondary) + *run_pass_2 = true; + } + return; + } + } - if (!pte1.V) - continue; + if (pte1.H == 1) + { + // We've scanned the secondary PTEG. Nothing left to do. + return; + } + else + { + // We've scanned the primary PTEG. Now let's scan the secondary PTEG. + const EffectiveAddress ea = get_page_index(pte1, page_table_offset / 64); + const u32 hash = ~(pte1.VSID ^ ea.page_index); + pte1.H = 1; + page_table_offset = + (((hash & m_ppc_state.pagetable_hashmask) << 6) | m_ppc_state.pagetable_base) - + m_ppc_state.pagetable_base; + switched_to_secondary = true; + } + } + }; - if (pte1.H != H) - continue; + const auto try_add_mapping = [this, &get_page_index, page_table](UPTE_Lo pte1, UPTE_Hi pte2, + u32 page_table_offset) { + EffectiveAddress logical_address = get_page_index(pte1, page_table_offset / 64); + for (u32 i = 0; i < std::size(m_ppc_state.sr); ++i) + { + const auto sr = UReg_SR{m_ppc_state.sr[i]}; + if (sr.VSID != pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = i; + + bool host_mapping = true; + + const bool wi = (pte2.WIMG & 0b1100) != 0; + if (wi) + { // There are quirks related to uncached memory that can't be correctly emulated by fast // accesses, so we don't map uncached memory. (However, no software at all is known to // trigger these quirks through page address translation, only through block address // translation.) - const bool wi = (pte2.WIMG & 0b1100) != 0; - if (wi) - continue; + host_mapping = false; + } + else if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & + PowerPC::BAT_MAPPED_BIT) + { + // Block address translation takes priority over page address translation. + host_mapping = false; + } + else if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, + PowerPC::HW_PAGE_SIZE)) + { + // Fast accesses don't support memchecks, so force slow accesses by removing fastmem + // mappings for all overlapping virtual pages. + host_mapping = false; + } - // Due to hash masking, the upper bits of page_index_from_hash might not match the actual - // page index. But these bits fully overlap with the API (abbreviated page index), so we can - // overwrite these bits with the API from pte1 and thereby get the correct page index. - // - // In other words: logical_address.API must be written to after logical_address.page_index! - u32 page_index_from_hash = i ^ pte1.VSID; - if (pte1.H) - page_index_from_hash = ~page_index_from_hash; - EffectiveAddress logical_address; - logical_address.offset = 0; - logical_address.page_index = page_index_from_hash; - logical_address.API = pte1.API; + const u32 priority = (page_table_offset % 64 / 8) | (pte1.H << 3); + const PageMapping page_mapping(pte2.RPN, host_mapping, priority); - for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + const auto it = m_page_mappings.find(logical_address.Hex); + if (it != m_page_mappings.end()) [[unlikely]] + { + if (priority > it->second.priority) { - const auto sr = UReg_SR{m_ppc_state.sr[k]}; - if (sr.VSID != pte1.VSID || sr.T != 0) - continue; + // An existing mapping has priority. + continue; + } + else + { + // The new mapping has priority over an existing mapping. Replace the existing mapping. + if (it->second.host_mapping) + m_removed_mappings.emplace(it->first); + it->second.Hex = page_mapping.Hex; + } + } + else + { + // There's no existing mapping for this logical address. Add a new mapping. + m_page_mappings.emplace(logical_address.Hex, page_mapping); + } - logical_address.SR = k; + if (host_mapping) + { + const u32 physical_address = pte2.RPN << 12; + m_added_mappings.emplace(logical_address.Hex, physical_address); - // Block address translation takes priority over page address translation. - if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & - PowerPC::BAT_MAPPED_BIT) - { - continue; - } + // HACK: We set R and C, which indicate whether a page have been read from and written to + // respectively, when a page is mapped rather than when it's actually accessed. The latter + // is probably possible using some fault handling logic, but for now it seems like more + // work than it's worth. + if (!pte2.R || !pte2.C) + { + pte2.R = 1; + pte2.C = 1; - // Fast accesses don't support memchecks, so force slow accesses by removing fastmem - // mappings for all overlapping virtual pages. - constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; - if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, logical_size)) - continue; - - const u32 physical_address = pte2.RPN << 12; - - // Important: This doesn't overwrite anything already present in m_page_mappings. - m_page_mappings.emplace(logical_address.Hex, physical_address); - - // HACK: We set R and C, which indicate whether a page have been read from and written to - // respectively, when a page is mapped rather than when it's actually accessed. The latter - // is probably possible using some fault handling logic, but for now it seems like more - // work than it's worth. - if (!pte2.R || !pte2.C) - { - pte2.R = 1; - pte2.C = 1; - - const u32 pte2_swapped = Common::swap32(pte2.Hex); - std::memcpy(page_table_view + pte_addr - page_table_base + 4, &pte2_swapped, - sizeof(pte2_swapped)); - } + const u32 pte2_swapped = Common::swap32(pte2.Hex); + std::memcpy(page_table.data() + page_table_offset + 4, &pte2_swapped, + sizeof(pte2_swapped)); } } } }; - // We need to read all H=0 PTEs first, because H=0 takes priority over H=1. - read_page_table(0); - read_page_table(1); + bool run_pass_2 = false; - m_memory.UpdatePageTableMappings(m_page_mappings); -#endif + // Pass 1: Remove old mappings and add new primary (H=0) mappings. + for (u32 i = 0; i < page_table.size(); i += 64) + { + if (compare_64_bytes(old_page_table + i, new_page_table + i)) [[likely]] + continue; + + for (u32 j = 0; j < 64; j += 8) + { + if (std::memcmp(old_page_table + i + j, new_page_table + i + j, 8) == 0) [[likely]] + continue; + + // Remove old mappings. + UPTE_Lo old_pte1(Common::swap32(old_page_table + i + j)); + if (old_pte1.V) + { + const u32 priority = (j / 8) | (old_pte1.H << 3); + EffectiveAddress logical_address = get_page_index(old_pte1, i / 64); + + for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + { + const auto sr = UReg_SR{m_ppc_state.sr[k]}; + if (sr.VSID != old_pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = k; + + const auto it = m_page_mappings.find(logical_address.Hex); + if (it != m_page_mappings.end() && priority == it->second.priority) + { + if (it->second.host_mapping) + m_removed_mappings.emplace(logical_address.Hex); + m_page_mappings.erase(it); + + // It's unlikely but theoretically possible that this was shadowing another PTE that's + // using the same logical address but has a lower priority. If this happens, we must + // make sure that we don't skip over that other PTE because of the 8-byte memcmp. + fixup_shadowed_mappings(old_pte1, i + j, &run_pass_2); + } + } + } + + // Add new primary (H=0) mappings. + UPTE_Lo new_pte1(Common::swap32(new_page_table + i + j)); + UPTE_Hi new_pte2(Common::swap32(new_page_table + i + j + 4)); + if (new_pte1.V) + { + if (new_pte1.H) + { + run_pass_2 = true; + continue; + } + + try_add_mapping(new_pte1, new_pte2, i + j); + } + + // Update our copy of the page table. + std::memcpy(old_page_table + i + j, new_page_table + i + j, 8); + } + } + + // Pass 2: Add new secondary (H=1) mappings. This is a separate pass because before we can process + // whether a mapping should be added, we first need to check all PTEs that have equal or higher + // priority to see if their mappings should be removed. For adding primary mappings, this ordering + // comes naturally from doing a linear scan of the page table from start to finish. But for adding + // secondary mappings, the primary PTEG that has priority over a given secondary PTEG is in the + // other half of the page table, so we need more than one pass through the page table. But most of + // the time, there are no secondary mappings, letting us skip the second pass. + if (run_pass_2) [[unlikely]] + { + for (u32 i = 0; i < page_table.size(); i += 64) + { + if (compare_64_bytes(old_page_table + i, new_page_table + i)) [[likely]] + continue; + + for (u32 j = 0; j < 64; j += 8) + { + if (std::memcmp(old_page_table + i + j, new_page_table + i + j, 8) == 0) [[likely]] + continue; + + UPTE_Lo new_pte1(Common::swap32(new_page_table + i + j)); + UPTE_Hi new_pte2(Common::swap32(new_page_table + i + j + 4)); + + // We don't need to check new_pte1.V and new_pte1.H. If the memcmp above returned nonzero, + // pass 1 must have skipped running memcpy, which only happens if V and H are both set. + try_add_mapping(new_pte1, new_pte2, i + j); + + std::memcpy(old_page_table + i + j, new_page_table + i + j, 8); + } + } + } + + if (!m_removed_mappings.empty()) + m_memory.RemovePageTableMappings(m_removed_mappings); + + if (!m_added_mappings.empty()) + m_memory.AddPageTableMappings(m_added_mappings); } +#endif void MMU::PageTableUpdatedFromJit(MMU* mmu) { @@ -1794,7 +2059,11 @@ void MMU::DBATUpdated() #ifndef _ARCH_32 m_memory.UpdateDBATMappings(m_dbat_table); - m_memory.UpdatePageTableMappings(m_page_mappings); + + // Calling UpdateDBATMappings removes all fastmem page table mappings, + // so we have to recreate them. + if (!m_page_table.empty()) + ReloadPageTable(); #endif // IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here. diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 5293de56b0..0ddf6bd631 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -7,7 +7,10 @@ #include #include #include +#include +#include #include +#include #include "Common/BitField.h" #include "Common/CommonTypes.h" @@ -120,7 +123,7 @@ public: ~MMU(); void Reset(); - void DoState(PointerWrap& p); + void DoState(PointerWrap& p, bool sr_changed); // Routines for debugger UI, cheats, etc. to access emulated memory from the // perspective of the CPU. Not for use by core emulation routines. @@ -300,6 +303,26 @@ private: explicit EffectiveAddress(u32 address) : Hex{address} {} }; + union PageMapping + { + // A small priority number wins over a larger priority number. + BitField<0, 11, u32> priority; + // Whether we're allowed to create a host mapping for this mapping. + BitField<11, 1, u32> host_mapping; + // The physical address of the page. + BitField<12, 20, u32> RPN; + + u32 Hex = 0; + + PageMapping() = default; + PageMapping(u32 RPN_, bool host_mapping_, u32 priority_) + { + RPN = RPN_; + host_mapping = host_mapping_; + priority = priority_; + } + }; + template TranslateAddressResult TranslateAddress(u32 address); @@ -311,6 +334,11 @@ private: void Memcheck(u32 address, u64 var, bool write, size_t size); +#ifndef _ARCH_32 + void ReloadPageTable(); + void PageTableUpdated(std::span page_table); +#endif + void UpdateBATs(BatTable& bat_table, u32 base_spr); void UpdateFakeMMUBat(BatTable& bat_table, u32 start_addr); @@ -335,9 +363,18 @@ private: PowerPC::PowerPCState& m_ppc_state; // STATE_TO_SAVE - std::map m_page_mappings; + std::vector m_page_table; // END STATE_TO_SAVE + // This keeps track of all valid page table mappings in m_page_table. + // The key is the logical address. + std::map m_page_mappings; + + // These are kept around just for their memory allocations. They are always cleared before use. + std::vector m_temp_page_table; + std::set m_removed_mappings; + std::map m_added_mappings; + BatTable m_ibat_table; BatTable m_dbat_table; }; diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index ca83370577..a50d61193e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -4,6 +4,7 @@ #include "Core/PowerPC/PowerPC.h" #include +#include #include #include #include @@ -80,6 +81,8 @@ void PowerPCManager::DoState(PointerWrap& p) // *((u64 *)&TL(m_ppc_state)) = SystemTimers::GetFakeTimeBase(); //works since we are little // endian and TL comes first :) + const std::array old_sr = m_ppc_state.sr; + p.DoArray(m_ppc_state.gpr); p.Do(m_ppc_state.pc); p.Do(m_ppc_state.npc); @@ -107,10 +110,10 @@ void PowerPCManager::DoState(PointerWrap& p) m_ppc_state.dCache.DoState(memory, p); auto& mmu = m_system.GetMMU(); - mmu.DoState(p); - if (p.IsReadMode()) { + mmu.DoState(p, old_sr != m_ppc_state.sr); + if (!m_ppc_state.m_enable_dcache) { INFO_LOG_FMT(POWERPC, "Flushing data cache"); @@ -123,6 +126,10 @@ void PowerPCManager::DoState(PointerWrap& p) mmu.IBATUpdated(); mmu.DBATUpdated(); } + else + { + mmu.DoState(p, false); + } // SystemTimers::DecrementerSet(); // SystemTimers::TimeBaseSet(); diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 7f931c2b8f..bd185dc594 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -175,7 +175,7 @@ struct PowerPCState alignas(16) PairedSingle ps[32]; #endif - u32 sr[16]{}; // Segment registers. + std::array sr{}; // Segment registers. // special purpose registers - controls quantizers, DMA, and lots of other misc extensions. // also for power management, but we don't care about that. From 6aaa4a37ce973d7a76b5a364b4e6ba48de1b17ce Mon Sep 17 00:00:00 2001 From: JosJuice Date: Sat, 2 Aug 2025 20:52:48 +0200 Subject: [PATCH 6/6] Core: Don't create page table mappings before R/C bits are set This gets rid of the hack of setting the R and C bits pessimistically, more or less reversing the performance regression in Rogue Squadron 3. --- Source/Core/Common/MemArena.h | 14 +++- Source/Core/Common/MemArenaAndroid.cpp | 20 +++++- Source/Core/Common/MemArenaDarwin.cpp | 22 ++++++- Source/Core/Common/MemArenaUnix.cpp | 20 +++++- Source/Core/Common/MemArenaWin.cpp | 38 +++++++++-- Source/Core/Core/HW/Memmap.cpp | 86 ++++++++++++++---------- Source/Core/Core/HW/Memmap.h | 9 ++- Source/Core/Core/PowerPC/MMU.cpp | 90 +++++++++++++++----------- Source/Core/Core/PowerPC/MMU.h | 5 +- 9 files changed, 214 insertions(+), 90 deletions(-) diff --git a/Source/Core/Common/MemArena.h b/Source/Core/Common/MemArena.h index 32b8999e1f..ffa1d59ffe 100644 --- a/Source/Core/Common/MemArena.h +++ b/Source/Core/Common/MemArena.h @@ -102,10 +102,22 @@ public: /// from. /// @param size Size of the region to map. /// @param base Address within the memory region from ReserveMemoryRegion() where to map it. + /// @param writeable Whether the region should be both readable and writeable, or just readable. /// /// @return The address we actually ended up mapping, which should be the given 'base'. /// - void* MapInMemoryRegion(s64 offset, size_t size, void* base); + void* MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable); + + /// + /// Changes whether a section mapped by MapInMemoryRegion is writeable. + /// + /// @param view The address returned by MapInMemoryRegion. + /// @param size The size passed to MapInMemoryRegion. + /// @param writeable Whether the region should be both readable and writeable, or just readable. + /// + /// @return Whether the operation succeeded. + /// + bool ChangeMappingProtection(void* view, size_t size, bool writeable); /// /// Unmap a memory region previously mapped with MapInMemoryRegion(). diff --git a/Source/Core/Common/MemArenaAndroid.cpp b/Source/Core/Common/MemArenaAndroid.cpp index 61aa6cd824..06094a84df 100644 --- a/Source/Core/Common/MemArenaAndroid.cpp +++ b/Source/Core/Common/MemArenaAndroid.cpp @@ -123,9 +123,13 @@ void MemArena::ReleaseMemoryRegion() } } -void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) +void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable) { - void* retval = mmap(base, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, m_shm_fd, offset); + int prot = PROT_READ; + if (writeable) + prot |= PROT_WRITE; + + void* retval = mmap(base, size, prot, MAP_SHARED | MAP_FIXED, m_shm_fd, offset); if (retval == MAP_FAILED) { NOTICE_LOG_FMT(MEMMAP, "mmap failed"); @@ -137,6 +141,18 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) } } +bool MemArena::ChangeMappingProtection(void* view, size_t size, bool writeable) +{ + int prot = PROT_READ; + if (writeable) + prot |= PROT_WRITE; + + int retval = mprotect(view, size, prot); + if (retval != 0) + NOTICE_LOG_FMT(MEMMAP, "mprotect failed"); + return retval == 0; +} + void MemArena::UnmapFromMemoryRegion(void* view, size_t size) { void* retval = mmap(view, size, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); diff --git a/Source/Core/Common/MemArenaDarwin.cpp b/Source/Core/Common/MemArenaDarwin.cpp index b077a71f2e..c368d10fce 100644 --- a/Source/Core/Common/MemArenaDarwin.cpp +++ b/Source/Core/Common/MemArenaDarwin.cpp @@ -123,7 +123,7 @@ void MemArena::ReleaseMemoryRegion() m_region_size = 0; } -void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) +void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable) { if (m_shm_address == 0) { @@ -132,11 +132,13 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) } vm_address_t address = reinterpret_cast(base); - constexpr vm_prot_t prot = VM_PROT_READ | VM_PROT_WRITE; + vm_prot_t prot = VM_PROT_READ; + if (writeable) + prot |= VM_PROT_WRITE; kern_return_t retval = vm_map(mach_task_self(), &address, size, 0, VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, m_shm_entry, - offset, false, prot, prot, VM_INHERIT_DEFAULT); + offset, false, prot, VM_PROT_READ | VM_PROT_WRITE, VM_INHERIT_DEFAULT); if (retval != KERN_SUCCESS) { ERROR_LOG_FMT(MEMMAP, "MapInMemoryRegion failed: vm_map returned {0:#x}", retval); @@ -146,6 +148,20 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) return reinterpret_cast(address); } +bool MemArena::ChangeMappingProtection(void* view, size_t size, bool writeable) +{ + vm_address_t address = reinterpret_cast(view); + vm_prot_t prot = VM_PROT_READ; + if (writeable) + prot |= VM_PROT_WRITE; + + kern_return_t retval = vm_protect(mach_task_self(), address, size, false, prot); + if (retval != KERN_SUCCESS) + ERROR_LOG_FMT(MEMMAP, "ChangeMappingProtection failed: vm_protect returned {0:#x}", retval); + + return retval == KERN_SUCCESS; +} + void MemArena::UnmapFromMemoryRegion(void* view, size_t size) { vm_address_t address = reinterpret_cast(view); diff --git a/Source/Core/Common/MemArenaUnix.cpp b/Source/Core/Common/MemArenaUnix.cpp index 04a81c7db8..d9dea91999 100644 --- a/Source/Core/Common/MemArenaUnix.cpp +++ b/Source/Core/Common/MemArenaUnix.cpp @@ -89,9 +89,13 @@ void MemArena::ReleaseMemoryRegion() } } -void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) +void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable) { - void* retval = mmap(base, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, m_shm_fd, offset); + int prot = PROT_READ; + if (writeable) + prot |= PROT_WRITE; + + void* retval = mmap(base, size, prot, MAP_SHARED | MAP_FIXED, m_shm_fd, offset); if (retval == MAP_FAILED) { NOTICE_LOG_FMT(MEMMAP, "mmap failed"); @@ -103,6 +107,18 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) } } +bool MemArena::ChangeMappingProtection(void* view, size_t size, bool writeable) +{ + int prot = PROT_READ; + if (writeable) + prot |= PROT_WRITE; + + int retval = mprotect(view, size, prot); + if (retval != 0) + NOTICE_LOG_FMT(MEMMAP, "mprotect failed"); + return retval == 0; +} + void MemArena::UnmapFromMemoryRegion(void* view, size_t size) { void* retval = mmap(view, size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0); diff --git a/Source/Core/Common/MemArenaWin.cpp b/Source/Core/Common/MemArenaWin.cpp index 06afdb055a..c2756a0848 100644 --- a/Source/Core/Common/MemArenaWin.cpp +++ b/Source/Core/Common/MemArenaWin.cpp @@ -318,8 +318,10 @@ WindowsMemoryRegion* MemArena::EnsureSplitRegionForMapping(void* start_address, } } -void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) +void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base, bool writeable) { + void* result; + if (m_memory_functions.m_api_ms_win_core_memory_l1_1_6_handle.IsOpen()) { WindowsMemoryRegion* const region = EnsureSplitRegionForMapping(base, size); @@ -329,10 +331,10 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) return nullptr; } - void* rv = static_cast(m_memory_functions.m_address_MapViewOfFile3)( + result = static_cast(m_memory_functions.m_address_MapViewOfFile3)( m_memory_handle, nullptr, base, offset, size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0); - if (rv) + if (result) { region->m_is_mapped = true; } @@ -342,11 +344,37 @@ void* MemArena::MapInMemoryRegion(s64 offset, size_t size, void* base) // revert the split, if any JoinRegionsAfterUnmap(base, size); + + return nullptr; } - return rv; + } + else + { + result = + MapViewOfFileEx(m_memory_handle, FILE_MAP_ALL_ACCESS, 0, (DWORD)((u64)offset), size, base); + + if (!result) + return nullptr; } - return MapViewOfFileEx(m_memory_handle, FILE_MAP_ALL_ACCESS, 0, (DWORD)((u64)offset), size, base); + if (!writeable) + { + // If we want to use PAGE_READONLY for now while still being able to switch to PAGE_READWRITE + // later, we have to call MapViewOfFile with PAGE_READWRITE and then switch to PAGE_READONLY. + ChangeMappingProtection(base, size, writeable); + } + + return result; +} + +bool MemArena::ChangeMappingProtection(void* view, size_t size, bool writeable) +{ + DWORD old_protect; + const int retval = + VirtualProtect(view, size, writeable ? PAGE_READWRITE : PAGE_READONLY, &old_protect); + if (retval == 0) + PanicAlertFmt("VirtualProtect failed: {}", GetLastErrorString()); + return retval != 0; } bool MemArena::JoinRegionsAfterUnmap(void* start_address, size_t size) diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index 2241ef75d6..9ec47bdd86 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -221,7 +221,7 @@ bool MemoryManager::InitFastmemArena() continue; u8* base = m_physical_base + region.physical_address; - u8* view = (u8*)m_arena.MapInMemoryRegion(region.shm_position, region.size, base); + u8* view = (u8*)m_arena.MapInMemoryRegion(region.shm_position, region.size, base, true); if (base != view) { @@ -239,7 +239,7 @@ bool MemoryManager::InitFastmemArena() void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) { - for (auto& entry : m_dbat_mapped_entries) + for (const auto& [logical_address, entry] : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } @@ -291,7 +291,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) u8* base = m_logical_base + logical_address + intersection_start - translated_address; u32 mapped_size = intersection_end - intersection_start; - void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); + void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base, true); if (!mapped_pointer) { PanicAlertFmt("Memory::UpdateDBATMappings(): Failed to map memory region at 0x{:08X} " @@ -299,7 +299,8 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) intersection_start, mapped_size, logical_address); continue; } - m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); + m_dbat_mapped_entries.emplace(logical_address, + LogicalMemoryView{mapped_pointer, mapped_size}); } m_logical_page_mappings[i] = @@ -310,45 +311,61 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) } } -void MemoryManager::AddPageTableMappings(const std::map& mappings) +void MemoryManager::AddPageTableMapping(u32 logical_address, u32 translated_address, bool writeable) { if (m_page_size > PowerPC::HW_PAGE_SIZE) return; - for (const auto [logical_address, translated_address] : mappings) + if (logical_address % m_page_alignment != 0) + return; + + constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; + for (const auto& physical_region : m_physical_regions) { - if (logical_address % m_page_alignment != 0) + if (!physical_region.active) continue; - constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; - for (const auto& physical_region : m_physical_regions) + u32 mapping_address = physical_region.physical_address; + u32 mapping_end = mapping_address + physical_region.size; + u32 intersection_start = std::max(mapping_address, translated_address); + u32 intersection_end = std::min(mapping_end, translated_address + logical_size); + if (intersection_start < intersection_end) { - if (!physical_region.active) - continue; - - u32 mapping_address = physical_region.physical_address; - u32 mapping_end = mapping_address + physical_region.size; - u32 intersection_start = std::max(mapping_address, translated_address); - u32 intersection_end = std::min(mapping_end, translated_address + logical_size); - if (intersection_start < intersection_end) + // Found an overlapping region; map it. + if (m_is_fastmem_arena_initialized) { - // Found an overlapping region; map it. - if (m_is_fastmem_arena_initialized) - { - u32 position = physical_region.shm_position + intersection_start - mapping_address; - u8* base = m_logical_base + logical_address + intersection_start - translated_address; - u32 mapped_size = intersection_end - intersection_start; + u32 position = physical_region.shm_position + intersection_start - mapping_address; + u8* base = m_logical_base + logical_address + intersection_start - translated_address; + u32 mapped_size = intersection_end - intersection_start; - void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); + const auto it = m_page_table_mapped_entries.find(logical_address); + if (it != m_page_table_mapped_entries.end()) + { + // Update the protection of an existing mapping. + if (it->second.mapped_pointer == base && it->second.mapped_size == mapped_size) + { + if (!m_arena.ChangeMappingProtection(base, mapped_size, writeable)) + { + PanicAlertFmt( + "Memory::AddPageTableMapping(): Failed to change protection for memory " + "region at 0x{:08X} (size 0x{:08X}, logical fastmem region at 0x{:08X}).", + intersection_start, mapped_size, logical_address); + } + } + } + else + { + // Create a new mapping. + void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base, writeable); if (!mapped_pointer) { - PanicAlertFmt( - "Memory::UpdatePageTableMappings(): Failed to map memory region at 0x{:08X} " - "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", - intersection_start, mapped_size, logical_address); + PanicAlertFmt("Memory::AddPageTableMapping(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); continue; } - m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); + m_page_table_mapped_entries.emplace(logical_address, + LogicalMemoryView{mapped_pointer, mapped_size}); } } } @@ -363,8 +380,9 @@ void MemoryManager::RemovePageTableMappings(const std::set& mappings) if (mappings.empty()) return; - std::erase_if(m_page_table_mapped_entries, [this, &mappings](const LogicalMemoryView& entry) { - const bool remove = mappings.contains(entry.logical_address); + std::erase_if(m_page_table_mapped_entries, [this, &mappings](const auto& pair) { + const auto& [logical_address, entry] = pair; + const bool remove = mappings.contains(logical_address); if (remove) m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); return remove; @@ -373,7 +391,7 @@ void MemoryManager::RemovePageTableMappings(const std::set& mappings) void MemoryManager::RemoveAllPageTableMappings() { - for (auto& entry : m_page_table_mapped_entries) + for (const auto& [logical_address, entry] : m_page_table_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } @@ -461,13 +479,13 @@ void MemoryManager::ShutdownFastmemArena() m_arena.UnmapFromMemoryRegion(base, region.size); } - for (auto& entry : m_dbat_mapped_entries) + for (const auto& [logical_address, entry] : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } m_dbat_mapped_entries.clear(); - for (auto& entry : m_page_table_mapped_entries) + for (const auto& [logical_address, entry] : m_page_table_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 8941474f31..dfefd7f9e9 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -9,7 +9,6 @@ #include #include #include -#include #include "Common/CommonTypes.h" #include "Common/MathUtil.h" @@ -56,7 +55,6 @@ struct LogicalMemoryView { void* mapped_pointer; u32 mapped_size; - u32 logical_address; }; class MemoryManager @@ -103,7 +101,7 @@ public: void DoState(PointerWrap& p); void UpdateDBATMappings(const PowerPC::BatTable& dbat_table); - void AddPageTableMappings(const std::map& mappings); + void AddPageTableMapping(u32 logical_address, u32 translated_address, bool writeable); void RemovePageTableMappings(const std::set& mappings); void RemoveAllPageTableMappings(); @@ -256,8 +254,9 @@ private: // TODO: Do we want to handle the mirrors of the GC RAM? std::array m_physical_regions{}; - std::vector m_dbat_mapped_entries; - std::vector m_page_table_mapped_entries; + // The key is the logical address + std::map m_dbat_mapped_entries; + std::map m_page_table_mapped_entries; std::array m_physical_page_mappings{}; std::array m_logical_page_mappings{}; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 72f2bc9038..40d506a6a8 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -1486,9 +1486,11 @@ void MMU::PageTableUpdated() #else if (m_ppc_state.m_enable_dcache) { - // Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings - // would be a waste of time. Skipping setting up mappings also comes with the bonus of skipping - // the inaccurate behavior of setting the R and C bits of PTE2 as soon as a page is mapped. + // Because fastmem isn't in use when accurate dcache emulation is enabled, + // keeping track of page table updates would be a waste of time. + m_memory.RemoveAllPageTableMappings(); + m_page_table.clear(); + m_page_mappings.clear(); return; } @@ -1526,7 +1528,7 @@ void MMU::ReloadPageTable() PageTableUpdated(m_temp_page_table); } -void MMU::PageTableUpdated(std::span page_table) +void MMU::PageTableUpdated(std::span page_table) { // PowerPC's priority order for PTEs that have the same logical adress is as follows: // @@ -1535,7 +1537,8 @@ void MMU::PageTableUpdated(std::span page_table) // incorporates the logical address and H. The PTE located first in the PTEG takes priority. m_removed_mappings.clear(); - m_added_mappings.clear(); + m_added_readonly_mappings.clear(); + m_added_readwrite_mappings.clear(); if (m_page_table.size() != page_table.size()) { @@ -1544,7 +1547,7 @@ void MMU::PageTableUpdated(std::span page_table) } u8* old_page_table = m_page_table.data(); - u8* new_page_table = page_table.data(); + const u8* new_page_table = page_table.data(); constexpr auto compare_64_bytes = [](const u8* a, const u8* b) -> bool { #ifdef _M_X86_64 @@ -1643,8 +1646,8 @@ void MMU::PageTableUpdated(std::span page_table) } }; - const auto try_add_mapping = [this, &get_page_index, page_table](UPTE_Lo pte1, UPTE_Hi pte2, - u32 page_table_offset) { + const auto try_add_mapping = [this, &get_page_index](UPTE_Lo pte1, UPTE_Hi pte2, + u32 page_table_offset) { EffectiveAddress logical_address = get_page_index(pte1, page_table_offset / 64); for (u32 i = 0; i < std::size(m_ppc_state.sr); ++i) @@ -1686,14 +1689,15 @@ void MMU::PageTableUpdated(std::span page_table) const auto it = m_page_mappings.find(logical_address.Hex); if (it != m_page_mappings.end()) [[unlikely]] { - if (priority > it->second.priority) + if (it->second.priority < priority) { // An existing mapping has priority. continue; } else { - // The new mapping has priority over an existing mapping. Replace the existing mapping. + // The new mapping has priority over an existing mapping. Replace the existing + // mapping. if (it->second.host_mapping) m_removed_mappings.emplace(it->first); it->second.Hex = page_mapping.Hex; @@ -1705,24 +1709,13 @@ void MMU::PageTableUpdated(std::span page_table) m_page_mappings.emplace(logical_address.Hex, page_mapping); } - if (host_mapping) + // If the R bit isn't set yet, the actual host mapping will be created once + // TranslatePageAddress sets the R bit. + if (host_mapping && pte2.R) { const u32 physical_address = pte2.RPN << 12; - m_added_mappings.emplace(logical_address.Hex, physical_address); - - // HACK: We set R and C, which indicate whether a page have been read from and written to - // respectively, when a page is mapped rather than when it's actually accessed. The latter - // is probably possible using some fault handling logic, but for now it seems like more - // work than it's worth. - if (!pte2.R || !pte2.C) - { - pte2.R = 1; - pte2.C = 1; - - const u32 pte2_swapped = Common::swap32(pte2.Hex); - std::memcpy(page_table.data() + page_table_offset + 4, &pte2_swapped, - sizeof(pte2_swapped)); - } + (pte2.C ? m_added_readwrite_mappings : m_added_readonly_mappings) + .emplace(logical_address.Hex, physical_address); } } }; @@ -1789,13 +1782,14 @@ void MMU::PageTableUpdated(std::span page_table) } } - // Pass 2: Add new secondary (H=1) mappings. This is a separate pass because before we can process - // whether a mapping should be added, we first need to check all PTEs that have equal or higher - // priority to see if their mappings should be removed. For adding primary mappings, this ordering - // comes naturally from doing a linear scan of the page table from start to finish. But for adding - // secondary mappings, the primary PTEG that has priority over a given secondary PTEG is in the - // other half of the page table, so we need more than one pass through the page table. But most of - // the time, there are no secondary mappings, letting us skip the second pass. + // Pass 2: Add new secondary (H=1) mappings. This is a separate pass because before we can + // process whether a mapping should be added, we first need to check all PTEs that have + // equal or higher priority to see if their mappings should be removed. For adding primary + // mappings, this ordering comes naturally from doing a linear scan of the page table from + // start to finish. But for adding secondary mappings, the primary PTEG that has priority + // over a given secondary PTEG is in the other half of the page table, so we need more than + // one pass through the page table. But most of the time, there are no secondary mappings, + // letting us skip the second pass. if (run_pass_2) [[unlikely]] { for (u32 i = 0; i < page_table.size(); i += 64) @@ -1823,8 +1817,11 @@ void MMU::PageTableUpdated(std::span page_table) if (!m_removed_mappings.empty()) m_memory.RemovePageTableMappings(m_removed_mappings); - if (!m_added_mappings.empty()) - m_memory.AddPageTableMappings(m_added_mappings); + for (const auto& [logical_address, physical_address] : m_added_readonly_mappings) + m_memory.AddPageTableMapping(logical_address, physical_address, false); + + for (const auto& [logical_address, physical_address] : m_added_readwrite_mappings) + m_memory.AddPageTableMapping(logical_address, physical_address, true); } #endif @@ -1895,6 +1892,7 @@ MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress add if (pte1.Hex == pteg) { UPTE_Hi pte2(ReadFromHardware(pteg_addr + 4)); + const UPTE_Hi old_pte2 = pte2; // set the access bits switch (flag) @@ -1914,9 +1912,29 @@ MMU::TranslateAddressResult MMU::TranslatePageAddress(const EffectiveAddress add break; } - if (!IsNoExceptionFlag(flag)) + if (!IsNoExceptionFlag(flag) && pte2.Hex != old_pte2.Hex) { m_memory.Write_U32(pte2.Hex, pteg_addr + 4); + + const u32 page_logical_address = address.Hex & ~HW_PAGE_MASK; + const auto it = m_page_mappings.find(page_logical_address); + if (it != m_page_mappings.end()) + { + const u32 priority = (pteg_addr % 64 / 8) | (pte1.H << 3); + if (it->second.Hex == PageMapping(pte2.RPN, true, priority).Hex) + { + const u32 swapped_pte1 = Common::swap32(reinterpret_cast(&pte1)); + std::memcpy(m_page_table.data() + pteg_addr - m_ppc_state.pagetable_base, + &swapped_pte1, sizeof(swapped_pte1)); + + const u32 swapped_pte2 = Common::swap32(reinterpret_cast(&pte2)); + std::memcpy(m_page_table.data() + pteg_addr + 4 - m_ppc_state.pagetable_base, + &swapped_pte2, sizeof(swapped_pte2)); + + const u32 page_translated_address = pte2.RPN << 12; + m_memory.AddPageTableMapping(page_logical_address, page_translated_address, pte2.C); + } + } } // We already updated the TLB entry if this was caused by a C bit. diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 0ddf6bd631..29f0a0a943 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -336,7 +336,7 @@ private: #ifndef _ARCH_32 void ReloadPageTable(); - void PageTableUpdated(std::span page_table); + void PageTableUpdated(std::span page_table); #endif void UpdateBATs(BatTable& bat_table, u32 base_spr); @@ -373,7 +373,8 @@ private: // These are kept around just for their memory allocations. They are always cleared before use. std::vector m_temp_page_table; std::set m_removed_mappings; - std::map m_added_mappings; + std::map m_added_readonly_mappings; + std::map m_added_readwrite_mappings; BatTable m_ibat_table; BatTable m_dbat_table;