diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index 675e942009..f51780dfaa 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -41,7 +42,9 @@ namespace Memory { -MemoryManager::MemoryManager(Core::System& system) : m_system(system) +MemoryManager::MemoryManager(Core::System& system) + : m_page_size(m_arena.GetPageSize()), m_page_alignment(m_arena.GetPageAlignment()), + m_system(system) { } @@ -233,13 +236,19 @@ bool MemoryManager::InitFastmemArena() return true; } -void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) +void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) { - for (auto& entry : m_logical_mapped_entries) + for (auto& entry : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } - m_logical_mapped_entries.clear(); + m_dbat_mapped_entries.clear(); + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); m_logical_page_mappings.fill(nullptr); @@ -288,13 +297,12 @@ void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); if (!mapped_pointer) { - PanicAlertFmt( - "Memory::UpdateLogicalMemory(): Failed to map memory region at 0x{:08X} " - "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", - intersection_start, mapped_size, logical_address); - exit(0); + PanicAlertFmt("Memory::UpdateDBATMappings(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); + continue; } - m_logical_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size}); } m_logical_page_mappings[i] = @@ -305,6 +313,57 @@ void MemoryManager::UpdateLogicalMemory(const PowerPC::BatTable& dbat_table) } } +void MemoryManager::UpdatePageTableMappings(const std::map& page_mappings) +{ + if (m_page_size > PowerPC::HW_PAGE_SIZE) + return; + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); + + for (const auto [logical_address, translated_address] : page_mappings) + { + if (logical_address % m_page_alignment != 0) + continue; + + constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; + for (const auto& physical_region : m_physical_regions) + { + if (!physical_region.active) + continue; + + u32 mapping_address = physical_region.physical_address; + u32 mapping_end = mapping_address + physical_region.size; + u32 intersection_start = std::max(mapping_address, translated_address); + u32 intersection_end = std::min(mapping_end, translated_address + logical_size); + if (intersection_start < intersection_end) + { + // Found an overlapping region; map it. + if (m_is_fastmem_arena_initialized) + { + u32 position = physical_region.shm_position + intersection_start - mapping_address; + u8* base = m_logical_base + logical_address + intersection_start - translated_address; + u32 mapped_size = intersection_end - intersection_start; + + void* mapped_pointer = m_arena.MapInMemoryRegion(position, mapped_size, base); + if (!mapped_pointer) + { + PanicAlertFmt( + "Memory::UpdatePageTableMappings(): Failed to map memory region at 0x{:08X} " + "(size 0x{:08X}) into logical fastmem region at 0x{:08X}.", + intersection_start, mapped_size, logical_address); + continue; + } + m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size}); + } + } + } + } +} + void MemoryManager::DoState(PointerWrap& p) { const u32 current_ram_size = GetRamSize(); @@ -386,11 +445,17 @@ void MemoryManager::ShutdownFastmemArena() m_arena.UnmapFromMemoryRegion(base, region.size); } - for (auto& entry : m_logical_mapped_entries) + for (auto& entry : m_dbat_mapped_entries) { m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); } - m_logical_mapped_entries.clear(); + m_dbat_mapped_entries.clear(); + + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); m_arena.ReleaseMemoryRegion(); diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index e0708605db..8d224e1ca7 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include #include #include @@ -99,7 +100,8 @@ public: void ShutdownFastmemArena(); void DoState(PointerWrap& p); - void UpdateLogicalMemory(const PowerPC::BatTable& dbat_table); + void UpdateDBATMappings(const PowerPC::BatTable& dbat_table); + void UpdatePageTableMappings(const std::map& page_mappings); void Clear(); @@ -207,6 +209,9 @@ private: // The MemArena class Common::MemArena m_arena; + const size_t m_page_size; + const size_t m_page_alignment; + // Dolphin allocates memory to represent four regions: // - 32MB RAM (actually 24MB on hardware), available on GameCube and Wii // - 64MB "EXRAM", RAM only available on Wii @@ -247,7 +252,8 @@ private: // TODO: Do we want to handle the mirrors of the GC RAM? std::array m_physical_regions{}; - std::vector m_logical_mapped_entries; + std::vector m_dbat_mapped_entries; + std::vector m_page_table_mapped_entries; std::array m_physical_page_mappings{}; std::array m_logical_page_mappings{}; diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index e84765acb8..63cfa0afa6 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -33,6 +33,7 @@ #include "Common/Align.h" #include "Common/Assert.h" #include "Common/BitUtils.h" +#include "Common/ChunkFile.h" #include "Common/CommonTypes.h" #include "Common/Logging/Log.h" @@ -58,6 +59,22 @@ MMU::MMU(Core::System& system, Memory::MemoryManager& memory, PowerPC::PowerPCMa MMU::~MMU() = default; +void MMU::Reset() +{ + m_page_mappings.clear(); +#ifndef _ARCH_32 + m_memory.UpdatePageTableMappings(m_page_mappings); +#endif +} + +void MMU::DoState(PointerWrap& p) +{ + // Instead of storing m_page_mappings in savestates, we *could* recalculate it based on memory + // here in DoState, but this could lead to us getting a more up-to-date set of page mappings + // than we had when the savestate was created, which could be a problem for TAS determinism. + p.Do(m_page_mappings); +} + // Overloaded byteswap functions, for use within the templated functions below. [[maybe_unused]] static u8 bswap(u8 val) { @@ -1323,10 +1340,13 @@ void MMU::SDRUpdated() m_ppc_state.pagetable_base = htaborg << 16; m_ppc_state.pagetable_hashmask = ((htabmask << 10) | 0x3ff); + + PageTableUpdated(); } void MMU::SRUpdated() { + PageTableUpdated(); } enum class TLBLookupResult @@ -1416,6 +1436,124 @@ void MMU::InvalidateTLBEntry(u32 address) m_ppc_state.tlb[PowerPC::DATA_TLB_INDEX][entry_index].Invalidate(); m_ppc_state.tlb[PowerPC::INST_TLB_INDEX][entry_index].Invalidate(); + + PageTableUpdated(); +} + +void MMU::PageTableUpdated() +{ +#ifndef _ARCH_32 + m_page_mappings.clear(); + + if (m_ppc_state.m_enable_dcache) + { + // Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings + // would be a waste of time. Skipping setting up mappings also comes with the bonus of skipping + // the inaccurate behavior of setting the R and C bits of PTE2 as soon as a page is mapped. + return; + } + + const u32 page_table_mask = m_ppc_state.pagetable_hashmask; + const u32 page_table_base = m_ppc_state.pagetable_base; + const u32 page_table_end = (page_table_base | (page_table_mask << 6)) + (1 << 6); + const u32 page_table_size = page_table_end - page_table_base; + + u8* page_table_view = m_system.GetMemory().GetPointerForRange(page_table_base, page_table_size); + if (!page_table_view) + { + WARN_LOG_FMT(POWERPC, "Failed to read page table at {:#010x}-{:#010x}", page_table_base, + page_table_end); + m_memory.UpdatePageTableMappings(m_page_mappings); + return; + } + + const auto read_page_table = [&](u32 H) { + for (u32 i = 0; i <= page_table_mask; ++i) + { + for (u32 j = 0; j < 8; ++j) + { + const u32 pte_addr = (page_table_base | ((i & page_table_mask) << 6)) + j * 8; + + UPTE_Lo pte1(Common::swap32(page_table_view + pte_addr - page_table_base)); + UPTE_Hi pte2(Common::swap32(page_table_view + pte_addr - page_table_base + 4)); + + if (!pte1.V) + continue; + + if (pte1.H != H) + continue; + + // There are quirks related to uncached memory that can't be correctly emulated by fast + // accesses, so we don't map uncached memory. (However, no software at all is known to + // trigger these quirks through page address translation, only through block address + // translation.) + const bool wi = (pte2.WIMG & 0b1100) != 0; + if (wi) + continue; + + // Due to hash masking, the upper bits of page_index_from_hash might not match the actual + // page index. But these bits fully overlap with the API (abbreviated page index), so we can + // overwrite these bits with the API from pte1 and thereby get the correct page index. + // + // In other words: logical_address.API must be written to after logical_address.page_index! + u32 page_index_from_hash = i ^ pte1.VSID; + if (pte1.H) + page_index_from_hash = ~page_index_from_hash; + EffectiveAddress logical_address; + logical_address.offset = 0; + logical_address.page_index = page_index_from_hash; + logical_address.API = pte1.API; + + for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + { + const auto sr = UReg_SR{m_ppc_state.sr[k]}; + if (sr.VSID != pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = k; + + // Block address translation takes priority over page address translation. + if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & + PowerPC::BAT_MAPPED_BIT) + { + continue; + } + + // Fast accesses don't support memchecks, so force slow accesses by removing fastmem + // mappings for all overlapping virtual pages. + constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; + if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, logical_size)) + continue; + + const u32 physical_address = pte2.RPN << 12; + + // Important: This doesn't overwrite anything already present in m_page_mappings. + m_page_mappings.emplace(logical_address.Hex, physical_address); + + // HACK: We set R and C, which indicate whether a page have been read from and written to + // respectively, when a page is mapped rather than when it's actually accessed. The latter + // is probably possible using some fault handling logic, but for now it seems like more + // work than it's worth. + if (!pte2.R || !pte2.C) + { + pte2.R = 1; + pte2.C = 1; + + const u32 pte2_swapped = Common::swap32(pte2.Hex); + std::memcpy(page_table_view + pte_addr - page_table_base + 4, &pte2_swapped, + sizeof(pte2_swapped)); + } + } + } + } + }; + + // We need to read all H=0 PTEs first, because H=0 takes priority over H=1. + read_page_table(0); + read_page_table(1); + + m_memory.UpdatePageTableMappings(m_page_mappings); +#endif } // Page Address Translation @@ -1643,7 +1781,8 @@ void MMU::DBATUpdated() } #ifndef _ARCH_32 - m_memory.UpdateLogicalMemory(m_dbat_table); + m_memory.UpdateDBATMappings(m_dbat_table); + m_memory.UpdatePageTableMappings(m_page_mappings); #endif // IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here. diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 4db2c68247..0b7dd8a668 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -5,12 +5,15 @@ #include #include +#include #include #include #include "Common/BitField.h" #include "Common/CommonTypes.h" +class PointerWrap; + namespace Core { class CPUThreadGuard; @@ -116,6 +119,9 @@ public: MMU& operator=(MMU&& other) = delete; ~MMU(); + void Reset(); + void DoState(PointerWrap& p); + // Routines for debugger UI, cheats, etc. to access emulated memory from the // perspective of the CPU. Not for use by core emulation routines. // Use "Host" prefix. @@ -240,6 +246,7 @@ public: void SDRUpdated(); void SRUpdated(); void InvalidateTLBEntry(u32 address); + void PageTableUpdated(); void DBATUpdated(); void IBATUpdated(); @@ -326,6 +333,10 @@ private: PowerPC::PowerPCManager& m_power_pc; PowerPC::PowerPCState& m_ppc_state; + // STATE_TO_SAVE + std::map m_page_mappings; + // END STATE_TO_SAVE + BatTable m_ibat_table; BatTable m_dbat_table; }; diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index 1e75d7b5af..255ad7d0be 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -105,6 +105,9 @@ void PowerPCManager::DoState(PointerWrap& p) m_ppc_state.iCache.DoState(memory, p); m_ppc_state.dCache.DoState(memory, p); + auto& mmu = m_system.GetMMU(); + mmu.DoState(p); + if (p.IsReadMode()) { if (!m_ppc_state.m_enable_dcache) @@ -116,7 +119,6 @@ void PowerPCManager::DoState(PointerWrap& p) RoundingModeUpdated(m_ppc_state); RecalculateAllFeatureFlags(m_ppc_state); - auto& mmu = m_system.GetMMU(); mmu.IBATUpdated(); mmu.DBATUpdated(); } @@ -253,6 +255,10 @@ void PowerPCManager::RefreshConfig() { INFO_LOG_FMT(POWERPC, "Flushing data cache"); m_ppc_state.dCache.FlushAll(m_system.GetMemory()); + + // No page table mappings are created when accurate dcache emulation is enabled. + // If there are any that can be created, let's create them now. + m_system.GetMMU().PageTableUpdated(); } } @@ -282,6 +288,7 @@ void PowerPCManager::Reset() ResetRegisters(); m_ppc_state.iCache.Reset(m_system.GetJitInterface()); m_ppc_state.dCache.Reset(); + m_system.GetMMU().Reset(); } void PowerPCManager::ScheduleInvalidateCacheThreadSafe(u32 address) diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index a947ad19a3..76bab5c634 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -95,7 +95,7 @@ static size_t s_state_writes_in_queue; static std::condition_variable s_state_write_queue_is_empty; // Don't forget to increase this after doing changes on the savestate system -constexpr u32 STATE_VERSION = 175; // Last changed in PR 13751 +constexpr u32 STATE_VERSION = 176; // Last changed in PR 13768 // Increase this if the StateExtendedHeader definition changes constexpr u32 EXTENDED_HEADER_VERSION = 1; // Last changed in PR 12217