diff --git a/Source/Core/Core/HW/Memmap.cpp b/Source/Core/Core/HW/Memmap.cpp index f51780dfaa..2241ef75d6 100644 --- a/Source/Core/Core/HW/Memmap.cpp +++ b/Source/Core/Core/HW/Memmap.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -244,11 +245,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) } m_dbat_mapped_entries.clear(); - for (auto& entry : m_page_table_mapped_entries) - { - m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); - } - m_page_table_mapped_entries.clear(); + RemoveAllPageTableMappings(); m_logical_page_mappings.fill(nullptr); @@ -302,7 +299,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) intersection_start, mapped_size, logical_address); continue; } - m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); } m_logical_page_mappings[i] = @@ -313,18 +310,12 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table) } } -void MemoryManager::UpdatePageTableMappings(const std::map& page_mappings) +void MemoryManager::AddPageTableMappings(const std::map& mappings) { if (m_page_size > PowerPC::HW_PAGE_SIZE) return; - for (auto& entry : m_page_table_mapped_entries) - { - m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); - } - m_page_table_mapped_entries.clear(); - - for (const auto [logical_address, translated_address] : page_mappings) + for (const auto [logical_address, translated_address] : mappings) { if (logical_address % m_page_alignment != 0) continue; @@ -357,13 +348,38 @@ void MemoryManager::UpdatePageTableMappings(const std::map& page_mappi intersection_start, mapped_size, logical_address); continue; } - m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size}); + m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address}); } } } } } +void MemoryManager::RemovePageTableMappings(const std::set& mappings) +{ + if (m_page_size > PowerPC::HW_PAGE_SIZE) + return; + + if (mappings.empty()) + return; + + std::erase_if(m_page_table_mapped_entries, [this, &mappings](const LogicalMemoryView& entry) { + const bool remove = mappings.contains(entry.logical_address); + if (remove) + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + return remove; + }); +} + +void MemoryManager::RemoveAllPageTableMappings() +{ + for (auto& entry : m_page_table_mapped_entries) + { + m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size); + } + m_page_table_mapped_entries.clear(); +} + void MemoryManager::DoState(PointerWrap& p) { const u32 current_ram_size = GetRamSize(); diff --git a/Source/Core/Core/HW/Memmap.h b/Source/Core/Core/HW/Memmap.h index 8d224e1ca7..8941474f31 100644 --- a/Source/Core/Core/HW/Memmap.h +++ b/Source/Core/Core/HW/Memmap.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ struct LogicalMemoryView { void* mapped_pointer; u32 mapped_size; + u32 logical_address; }; class MemoryManager @@ -101,7 +103,9 @@ public: void DoState(PointerWrap& p); void UpdateDBATMappings(const PowerPC::BatTable& dbat_table); - void UpdatePageTableMappings(const std::map& page_mappings); + void AddPageTableMappings(const std::map& mappings); + void RemovePageTableMappings(const std::set& mappings); + void RemoveAllPageTableMappings(); void Clear(); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h index 5164745cd8..11b70ceb03 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h @@ -33,19 +33,19 @@ constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::ARM64Reg::W26; PowerPC::PowerPCState, elem); \ _Pragma("GCC diagnostic pop") \ }()) +#else +#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) +#endif #define PPCSTATE_OFF_ARRAY(elem, i) \ (PPCSTATE_OFF(elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) -#else -#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem)) -#define PPCSTATE_OFF_ARRAY(elem, i) \ - (offsetof(PowerPC::PowerPCState, elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) -#endif +#define PPCSTATE_OFF_STD_ARRAY(elem, i) \ + (PPCSTATE_OFF(elem) + sizeof(PowerPC::PowerPCState::elem[0]) * (i)) #define PPCSTATE_OFF_GPR(i) PPCSTATE_OFF_ARRAY(gpr, i) #define PPCSTATE_OFF_CR(i) PPCSTATE_OFF_ARRAY(cr.fields, i) -#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_ARRAY(sr, i) +#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_STD_ARRAY(sr, i) #define PPCSTATE_OFF_SPR(i) PPCSTATE_OFF_ARRAY(spr, i) static_assert(std::is_same_v); diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index fc89187b4e..72f2bc9038 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -25,10 +25,17 @@ #include "Core/PowerPC/MMU.h" +#include #include #include #include +#include #include +#include + +#ifdef _M_X86_64 +#include +#endif #include "Common/Align.h" #include "Common/Assert.h" @@ -61,18 +68,37 @@ MMU::~MMU() = default; void MMU::Reset() { + m_page_table.clear(); m_page_mappings.clear(); #ifndef _ARCH_32 - m_memory.UpdatePageTableMappings(m_page_mappings); + m_memory.RemoveAllPageTableMappings(); #endif } -void MMU::DoState(PointerWrap& p) +void MMU::DoState(PointerWrap& p, bool sr_changed) { - // Instead of storing m_page_mappings in savestates, we *could* recalculate it based on memory + // Instead of storing m_page_table in savestates, we *could* refetch it from memory // here in DoState, but this could lead to us getting a more up-to-date set of page mappings // than we had when the savestate was created, which could be a problem for TAS determinism. - p.Do(m_page_mappings); + if (p.IsReadMode()) + { + if (sr_changed) + { + // Non-incremental update of page table mappings. + p.Do(m_page_table); + SRUpdated(); + } + else + { + // Incremental update of page table mappings. + p.Do(m_temp_page_table); + PageTableUpdated(m_temp_page_table); + } + } + else + { + p.Do(m_page_table); + } } // Overloaded byteswap functions, for use within the templated functions below. @@ -1346,10 +1372,12 @@ void MMU::SDRUpdated() void MMU::SRUpdated() { - if (m_ppc_state.msr.DR) - PageTableUpdated(); - else - m_ppc_state.pagetable_update_pending = true; +#ifndef _ARCH_32 + // Our incremental handling of page table updates can't handle SR changing, so throw away all + // existing mappings and then reparse the whole page table. + m_memory.RemoveAllPageTableMappings(); + ReloadPageTable(); +#endif } enum class TLBLookupResult @@ -1449,9 +1477,13 @@ void MMU::InvalidateTLBEntry(u32 address) void MMU::PageTableUpdated() { m_ppc_state.pagetable_update_pending = false; -#ifndef _ARCH_32 - m_page_mappings.clear(); +#ifdef _ARCH_32 + // If a savestate is brought from a 64-bit system to a 32-bit system, clear m_page_table. + // Not doing this means a stale m_page_table would stick around, which could be a problem + // if the savestate is then brought to a 64-bit system again. + m_page_table.clear(); +#else if (m_ppc_state.m_enable_dcache) { // Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings @@ -1460,9 +1492,8 @@ void MMU::PageTableUpdated() return; } - const u32 page_table_mask = m_ppc_state.pagetable_hashmask; const u32 page_table_base = m_ppc_state.pagetable_base; - const u32 page_table_end = (page_table_base | (page_table_mask << 6)) + (1 << 6); + const u32 page_table_end = (page_table_base | (m_ppc_state.pagetable_hashmask << 6)) + (1 << 6); const u32 page_table_size = page_table_end - page_table_base; u8* page_table_view = m_system.GetMemory().GetPointerForRange(page_table_base, page_table_size); @@ -1470,98 +1501,332 @@ void MMU::PageTableUpdated() { WARN_LOG_FMT(POWERPC, "Failed to read page table at {:#010x}-{:#010x}", page_table_base, page_table_end); - m_memory.UpdatePageTableMappings(m_page_mappings); + + // Remove host mappings, because we no longer know if they're up to date. + m_memory.RemoveAllPageTableMappings(); + + // Because we removed host mappings, incremental updates won't work correctly. + // Start over from scratch. + m_page_table.clear(); + m_page_mappings.clear(); return; } - const auto read_page_table = [&](u32 H) { - for (u32 i = 0; i <= page_table_mask; ++i) + PageTableUpdated(std::span(page_table_view, page_table_size)); +#endif +} + +#ifndef _ARCH_32 +void MMU::ReloadPageTable() +{ + m_page_mappings.clear(); + + m_temp_page_table.clear(); + std::swap(m_page_table, m_temp_page_table); + PageTableUpdated(m_temp_page_table); +} + +void MMU::PageTableUpdated(std::span page_table) +{ + // PowerPC's priority order for PTEs that have the same logical adress is as follows: + // + // * Primary PTEs (H=0) take priority over secondary PTEs (H=1). + // * If two PTEs have equal H values, they must be in the same PTEG due to how the hash + // incorporates the logical address and H. The PTE located first in the PTEG takes priority. + + m_removed_mappings.clear(); + m_added_mappings.clear(); + + if (m_page_table.size() != page_table.size()) + { + m_page_table.clear(); + m_page_table.resize(page_table.size()); + } + + u8* old_page_table = m_page_table.data(); + u8* new_page_table = page_table.data(); + + constexpr auto compare_64_bytes = [](const u8* a, const u8* b) -> bool { +#ifdef _M_X86_64 + // MSVC (x64) doesn't want to optimize the memcmp call. This 64-byte compare is performance + // critical in certain games like Spider-Man 2, so let's use our own vectorized version + // instead. + const __m128i a1 = _mm_load_si128(reinterpret_cast(a)); + const __m128i b1 = _mm_load_si128(reinterpret_cast(b)); + const __m128i cmp1 = _mm_cmpeq_epi8(a1, b1); + const __m128i a2 = _mm_load_si128(reinterpret_cast(a + 0x10)); + const __m128i b2 = _mm_load_si128(reinterpret_cast(b + 0x10)); + const __m128i cmp2 = _mm_cmpeq_epi8(a2, b2); + const __m128i cmp12 = _mm_and_si128(cmp1, cmp2); + const __m128i a3 = _mm_load_si128(reinterpret_cast(a + 0x20)); + const __m128i b3 = _mm_load_si128(reinterpret_cast(b + 0x20)); + const __m128i cmp3 = _mm_cmpeq_epi8(a3, b3); + const __m128i a4 = _mm_load_si128(reinterpret_cast(a + 0x30)); + const __m128i b4 = _mm_load_si128(reinterpret_cast(b + 0x30)); + const __m128i cmp4 = _mm_cmpeq_epi8(a4, b4); + const __m128i cmp34 = _mm_and_si128(cmp3, cmp4); + const __m128i cmp1234 = _mm_and_si128(cmp12, cmp34); + return _mm_movemask_epi8(cmp1234) == 0xFFFF; +#else + return std::memcmp(std::assume_aligned<16>(a), std::assume_aligned<16>(b), 64) == 0; +#endif + }; + + constexpr auto get_page_index = [](UPTE_Lo pte1, u32 hash) { + u32 page_index_from_hash = hash ^ pte1.VSID; + if (pte1.H) + page_index_from_hash = ~page_index_from_hash; + + // Due to hash masking, the upper bits of page_index_from_hash might not match the actual + // page index. But these bits fully overlap with the API (abbreviated page index), so we can + // overwrite these bits with the API from pte1 and thereby get the correct page index. + // + // In other words: logical_address.API must be written to after logical_address.page_index! + + EffectiveAddress logical_address; + logical_address.offset = 0; + logical_address.page_index = page_index_from_hash; + logical_address.API = pte1.API; + return logical_address; + }; + + const auto fixup_shadowed_mappings = [this, &get_page_index, old_page_table, new_page_table]( + UPTE_Lo pte1, u32 page_table_offset, bool* run_pass_2) { + DEBUG_ASSERT(pte1.V == 1); + + bool switched_to_secondary = false; + + while (true) { - for (u32 j = 0; j < 8; ++j) + const u32 big_endian_pte1 = Common::swap32(pte1.Hex); + const u32 pteg_end = Common::AlignUp(page_table_offset, 64); + for (u32 i = page_table_offset; i < pteg_end; i += 8) { - const u32 pte_addr = (page_table_base | ((i & page_table_mask) << 6)) + j * 8; + if (std::memcmp(new_page_table + i, &big_endian_pte1, sizeof(big_endian_pte1)) == 0) + { + // We've found a PTE that has V set and has the same logical address as the passed-in PTE. + // The found PTE was previously skipped over because the passed-in PTE had priority, but + // the passed-in PTE is being changed, so now we need to re-check the found PTE. This will + // happen naturally later in the loop that's calling this function, but only if the 8-byte + // memcmp reports that the PTE has changed. Therefore, if the PTE currently compares + // equal, change an unused bit in the PTE. + if (std::memcmp(old_page_table + i, new_page_table + i, 8) == 0) + { + UPTE_Hi pte2(Common::swap32(old_page_table + i + 4)); + pte2.reserved_1 = pte2.reserved_1 ^ 1; + const u32 value = Common::swap32(pte2.Hex); + std::memcpy(old_page_table + i + 4, &value, sizeof(value)); - UPTE_Lo pte1(Common::swap32(page_table_view + pte_addr - page_table_base)); - UPTE_Hi pte2(Common::swap32(page_table_view + pte_addr - page_table_base + 4)); + if (switched_to_secondary) + *run_pass_2 = true; + } + return; + } + } - if (!pte1.V) - continue; + if (pte1.H == 1) + { + // We've scanned the secondary PTEG. Nothing left to do. + return; + } + else + { + // We've scanned the primary PTEG. Now let's scan the secondary PTEG. + const EffectiveAddress ea = get_page_index(pte1, page_table_offset / 64); + const u32 hash = ~(pte1.VSID ^ ea.page_index); + pte1.H = 1; + page_table_offset = + (((hash & m_ppc_state.pagetable_hashmask) << 6) | m_ppc_state.pagetable_base) - + m_ppc_state.pagetable_base; + switched_to_secondary = true; + } + } + }; - if (pte1.H != H) - continue; + const auto try_add_mapping = [this, &get_page_index, page_table](UPTE_Lo pte1, UPTE_Hi pte2, + u32 page_table_offset) { + EffectiveAddress logical_address = get_page_index(pte1, page_table_offset / 64); + for (u32 i = 0; i < std::size(m_ppc_state.sr); ++i) + { + const auto sr = UReg_SR{m_ppc_state.sr[i]}; + if (sr.VSID != pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = i; + + bool host_mapping = true; + + const bool wi = (pte2.WIMG & 0b1100) != 0; + if (wi) + { // There are quirks related to uncached memory that can't be correctly emulated by fast // accesses, so we don't map uncached memory. (However, no software at all is known to // trigger these quirks through page address translation, only through block address // translation.) - const bool wi = (pte2.WIMG & 0b1100) != 0; - if (wi) - continue; + host_mapping = false; + } + else if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & + PowerPC::BAT_MAPPED_BIT) + { + // Block address translation takes priority over page address translation. + host_mapping = false; + } + else if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, + PowerPC::HW_PAGE_SIZE)) + { + // Fast accesses don't support memchecks, so force slow accesses by removing fastmem + // mappings for all overlapping virtual pages. + host_mapping = false; + } - // Due to hash masking, the upper bits of page_index_from_hash might not match the actual - // page index. But these bits fully overlap with the API (abbreviated page index), so we can - // overwrite these bits with the API from pte1 and thereby get the correct page index. - // - // In other words: logical_address.API must be written to after logical_address.page_index! - u32 page_index_from_hash = i ^ pte1.VSID; - if (pte1.H) - page_index_from_hash = ~page_index_from_hash; - EffectiveAddress logical_address; - logical_address.offset = 0; - logical_address.page_index = page_index_from_hash; - logical_address.API = pte1.API; + const u32 priority = (page_table_offset % 64 / 8) | (pte1.H << 3); + const PageMapping page_mapping(pte2.RPN, host_mapping, priority); - for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + const auto it = m_page_mappings.find(logical_address.Hex); + if (it != m_page_mappings.end()) [[unlikely]] + { + if (priority > it->second.priority) { - const auto sr = UReg_SR{m_ppc_state.sr[k]}; - if (sr.VSID != pte1.VSID || sr.T != 0) - continue; + // An existing mapping has priority. + continue; + } + else + { + // The new mapping has priority over an existing mapping. Replace the existing mapping. + if (it->second.host_mapping) + m_removed_mappings.emplace(it->first); + it->second.Hex = page_mapping.Hex; + } + } + else + { + // There's no existing mapping for this logical address. Add a new mapping. + m_page_mappings.emplace(logical_address.Hex, page_mapping); + } - logical_address.SR = k; + if (host_mapping) + { + const u32 physical_address = pte2.RPN << 12; + m_added_mappings.emplace(logical_address.Hex, physical_address); - // Block address translation takes priority over page address translation. - if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] & - PowerPC::BAT_MAPPED_BIT) - { - continue; - } + // HACK: We set R and C, which indicate whether a page have been read from and written to + // respectively, when a page is mapped rather than when it's actually accessed. The latter + // is probably possible using some fault handling logic, but for now it seems like more + // work than it's worth. + if (!pte2.R || !pte2.C) + { + pte2.R = 1; + pte2.C = 1; - // Fast accesses don't support memchecks, so force slow accesses by removing fastmem - // mappings for all overlapping virtual pages. - constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE; - if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, logical_size)) - continue; - - const u32 physical_address = pte2.RPN << 12; - - // Important: This doesn't overwrite anything already present in m_page_mappings. - m_page_mappings.emplace(logical_address.Hex, physical_address); - - // HACK: We set R and C, which indicate whether a page have been read from and written to - // respectively, when a page is mapped rather than when it's actually accessed. The latter - // is probably possible using some fault handling logic, but for now it seems like more - // work than it's worth. - if (!pte2.R || !pte2.C) - { - pte2.R = 1; - pte2.C = 1; - - const u32 pte2_swapped = Common::swap32(pte2.Hex); - std::memcpy(page_table_view + pte_addr - page_table_base + 4, &pte2_swapped, - sizeof(pte2_swapped)); - } + const u32 pte2_swapped = Common::swap32(pte2.Hex); + std::memcpy(page_table.data() + page_table_offset + 4, &pte2_swapped, + sizeof(pte2_swapped)); } } } }; - // We need to read all H=0 PTEs first, because H=0 takes priority over H=1. - read_page_table(0); - read_page_table(1); + bool run_pass_2 = false; - m_memory.UpdatePageTableMappings(m_page_mappings); -#endif + // Pass 1: Remove old mappings and add new primary (H=0) mappings. + for (u32 i = 0; i < page_table.size(); i += 64) + { + if (compare_64_bytes(old_page_table + i, new_page_table + i)) [[likely]] + continue; + + for (u32 j = 0; j < 64; j += 8) + { + if (std::memcmp(old_page_table + i + j, new_page_table + i + j, 8) == 0) [[likely]] + continue; + + // Remove old mappings. + UPTE_Lo old_pte1(Common::swap32(old_page_table + i + j)); + if (old_pte1.V) + { + const u32 priority = (j / 8) | (old_pte1.H << 3); + EffectiveAddress logical_address = get_page_index(old_pte1, i / 64); + + for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k) + { + const auto sr = UReg_SR{m_ppc_state.sr[k]}; + if (sr.VSID != old_pte1.VSID || sr.T != 0) + continue; + + logical_address.SR = k; + + const auto it = m_page_mappings.find(logical_address.Hex); + if (it != m_page_mappings.end() && priority == it->second.priority) + { + if (it->second.host_mapping) + m_removed_mappings.emplace(logical_address.Hex); + m_page_mappings.erase(it); + + // It's unlikely but theoretically possible that this was shadowing another PTE that's + // using the same logical address but has a lower priority. If this happens, we must + // make sure that we don't skip over that other PTE because of the 8-byte memcmp. + fixup_shadowed_mappings(old_pte1, i + j, &run_pass_2); + } + } + } + + // Add new primary (H=0) mappings. + UPTE_Lo new_pte1(Common::swap32(new_page_table + i + j)); + UPTE_Hi new_pte2(Common::swap32(new_page_table + i + j + 4)); + if (new_pte1.V) + { + if (new_pte1.H) + { + run_pass_2 = true; + continue; + } + + try_add_mapping(new_pte1, new_pte2, i + j); + } + + // Update our copy of the page table. + std::memcpy(old_page_table + i + j, new_page_table + i + j, 8); + } + } + + // Pass 2: Add new secondary (H=1) mappings. This is a separate pass because before we can process + // whether a mapping should be added, we first need to check all PTEs that have equal or higher + // priority to see if their mappings should be removed. For adding primary mappings, this ordering + // comes naturally from doing a linear scan of the page table from start to finish. But for adding + // secondary mappings, the primary PTEG that has priority over a given secondary PTEG is in the + // other half of the page table, so we need more than one pass through the page table. But most of + // the time, there are no secondary mappings, letting us skip the second pass. + if (run_pass_2) [[unlikely]] + { + for (u32 i = 0; i < page_table.size(); i += 64) + { + if (compare_64_bytes(old_page_table + i, new_page_table + i)) [[likely]] + continue; + + for (u32 j = 0; j < 64; j += 8) + { + if (std::memcmp(old_page_table + i + j, new_page_table + i + j, 8) == 0) [[likely]] + continue; + + UPTE_Lo new_pte1(Common::swap32(new_page_table + i + j)); + UPTE_Hi new_pte2(Common::swap32(new_page_table + i + j + 4)); + + // We don't need to check new_pte1.V and new_pte1.H. If the memcmp above returned nonzero, + // pass 1 must have skipped running memcpy, which only happens if V and H are both set. + try_add_mapping(new_pte1, new_pte2, i + j); + + std::memcpy(old_page_table + i + j, new_page_table + i + j, 8); + } + } + } + + if (!m_removed_mappings.empty()) + m_memory.RemovePageTableMappings(m_removed_mappings); + + if (!m_added_mappings.empty()) + m_memory.AddPageTableMappings(m_added_mappings); } +#endif void MMU::PageTableUpdatedFromJit(MMU* mmu) { @@ -1794,7 +2059,11 @@ void MMU::DBATUpdated() #ifndef _ARCH_32 m_memory.UpdateDBATMappings(m_dbat_table); - m_memory.UpdatePageTableMappings(m_page_mappings); + + // Calling UpdateDBATMappings removes all fastmem page table mappings, + // so we have to recreate them. + if (!m_page_table.empty()) + ReloadPageTable(); #endif // IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here. diff --git a/Source/Core/Core/PowerPC/MMU.h b/Source/Core/Core/PowerPC/MMU.h index 5293de56b0..0ddf6bd631 100644 --- a/Source/Core/Core/PowerPC/MMU.h +++ b/Source/Core/Core/PowerPC/MMU.h @@ -7,7 +7,10 @@ #include #include #include +#include +#include #include +#include #include "Common/BitField.h" #include "Common/CommonTypes.h" @@ -120,7 +123,7 @@ public: ~MMU(); void Reset(); - void DoState(PointerWrap& p); + void DoState(PointerWrap& p, bool sr_changed); // Routines for debugger UI, cheats, etc. to access emulated memory from the // perspective of the CPU. Not for use by core emulation routines. @@ -300,6 +303,26 @@ private: explicit EffectiveAddress(u32 address) : Hex{address} {} }; + union PageMapping + { + // A small priority number wins over a larger priority number. + BitField<0, 11, u32> priority; + // Whether we're allowed to create a host mapping for this mapping. + BitField<11, 1, u32> host_mapping; + // The physical address of the page. + BitField<12, 20, u32> RPN; + + u32 Hex = 0; + + PageMapping() = default; + PageMapping(u32 RPN_, bool host_mapping_, u32 priority_) + { + RPN = RPN_; + host_mapping = host_mapping_; + priority = priority_; + } + }; + template TranslateAddressResult TranslateAddress(u32 address); @@ -311,6 +334,11 @@ private: void Memcheck(u32 address, u64 var, bool write, size_t size); +#ifndef _ARCH_32 + void ReloadPageTable(); + void PageTableUpdated(std::span page_table); +#endif + void UpdateBATs(BatTable& bat_table, u32 base_spr); void UpdateFakeMMUBat(BatTable& bat_table, u32 start_addr); @@ -335,9 +363,18 @@ private: PowerPC::PowerPCState& m_ppc_state; // STATE_TO_SAVE - std::map m_page_mappings; + std::vector m_page_table; // END STATE_TO_SAVE + // This keeps track of all valid page table mappings in m_page_table. + // The key is the logical address. + std::map m_page_mappings; + + // These are kept around just for their memory allocations. They are always cleared before use. + std::vector m_temp_page_table; + std::set m_removed_mappings; + std::map m_added_mappings; + BatTable m_ibat_table; BatTable m_dbat_table; }; diff --git a/Source/Core/Core/PowerPC/PowerPC.cpp b/Source/Core/Core/PowerPC/PowerPC.cpp index ca83370577..a50d61193e 100644 --- a/Source/Core/Core/PowerPC/PowerPC.cpp +++ b/Source/Core/Core/PowerPC/PowerPC.cpp @@ -4,6 +4,7 @@ #include "Core/PowerPC/PowerPC.h" #include +#include #include #include #include @@ -80,6 +81,8 @@ void PowerPCManager::DoState(PointerWrap& p) // *((u64 *)&TL(m_ppc_state)) = SystemTimers::GetFakeTimeBase(); //works since we are little // endian and TL comes first :) + const std::array old_sr = m_ppc_state.sr; + p.DoArray(m_ppc_state.gpr); p.Do(m_ppc_state.pc); p.Do(m_ppc_state.npc); @@ -107,10 +110,10 @@ void PowerPCManager::DoState(PointerWrap& p) m_ppc_state.dCache.DoState(memory, p); auto& mmu = m_system.GetMMU(); - mmu.DoState(p); - if (p.IsReadMode()) { + mmu.DoState(p, old_sr != m_ppc_state.sr); + if (!m_ppc_state.m_enable_dcache) { INFO_LOG_FMT(POWERPC, "Flushing data cache"); @@ -123,6 +126,10 @@ void PowerPCManager::DoState(PointerWrap& p) mmu.IBATUpdated(); mmu.DBATUpdated(); } + else + { + mmu.DoState(p, false); + } // SystemTimers::DecrementerSet(); // SystemTimers::TimeBaseSet(); diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index 7f931c2b8f..bd185dc594 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -175,7 +175,7 @@ struct PowerPCState alignas(16) PairedSingle ps[32]; #endif - u32 sr[16]{}; // Segment registers. + std::array sr{}; // Segment registers. // special purpose registers - controls quantizers, DMA, and lots of other misc extensions. // also for power management, but we don't care about that.