mirror of
https://github.com/dolphin-emu/dolphin.git
synced 2025-08-18 08:19:59 +00:00
Core: Update page table mappings incrementally
Removing and readding every page table mapping every time something changes in the page table is very slow. Instead, let's generate a diff and ask Memmap to update only the diff.
This commit is contained in:
parent
dac13baade
commit
d3cfa293e9
7 changed files with 439 additions and 106 deletions
|
@ -13,6 +13,7 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
|
@ -244,11 +245,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table)
|
||||||
}
|
}
|
||||||
m_dbat_mapped_entries.clear();
|
m_dbat_mapped_entries.clear();
|
||||||
|
|
||||||
for (auto& entry : m_page_table_mapped_entries)
|
RemoveAllPageTableMappings();
|
||||||
{
|
|
||||||
m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
|
|
||||||
}
|
|
||||||
m_page_table_mapped_entries.clear();
|
|
||||||
|
|
||||||
m_logical_page_mappings.fill(nullptr);
|
m_logical_page_mappings.fill(nullptr);
|
||||||
|
|
||||||
|
@ -302,7 +299,7 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table)
|
||||||
intersection_start, mapped_size, logical_address);
|
intersection_start, mapped_size, logical_address);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size});
|
m_dbat_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address});
|
||||||
}
|
}
|
||||||
|
|
||||||
m_logical_page_mappings[i] =
|
m_logical_page_mappings[i] =
|
||||||
|
@ -313,18 +310,12 @@ void MemoryManager::UpdateDBATMappings(const PowerPC::BatTable& dbat_table)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void MemoryManager::UpdatePageTableMappings(const std::map<u32, u32>& page_mappings)
|
void MemoryManager::AddPageTableMappings(const std::map<u32, u32>& mappings)
|
||||||
{
|
{
|
||||||
if (m_page_size > PowerPC::HW_PAGE_SIZE)
|
if (m_page_size > PowerPC::HW_PAGE_SIZE)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (auto& entry : m_page_table_mapped_entries)
|
for (const auto [logical_address, translated_address] : mappings)
|
||||||
{
|
|
||||||
m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
|
|
||||||
}
|
|
||||||
m_page_table_mapped_entries.clear();
|
|
||||||
|
|
||||||
for (const auto [logical_address, translated_address] : page_mappings)
|
|
||||||
{
|
{
|
||||||
if (logical_address % m_page_alignment != 0)
|
if (logical_address % m_page_alignment != 0)
|
||||||
continue;
|
continue;
|
||||||
|
@ -357,13 +348,38 @@ void MemoryManager::UpdatePageTableMappings(const std::map<u32, u32>& page_mappi
|
||||||
intersection_start, mapped_size, logical_address);
|
intersection_start, mapped_size, logical_address);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size});
|
m_page_table_mapped_entries.push_back({mapped_pointer, mapped_size, logical_address});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MemoryManager::RemovePageTableMappings(const std::set<u32>& mappings)
|
||||||
|
{
|
||||||
|
if (m_page_size > PowerPC::HW_PAGE_SIZE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (mappings.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
std::erase_if(m_page_table_mapped_entries, [this, &mappings](const LogicalMemoryView& entry) {
|
||||||
|
const bool remove = mappings.contains(entry.logical_address);
|
||||||
|
if (remove)
|
||||||
|
m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
|
||||||
|
return remove;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void MemoryManager::RemoveAllPageTableMappings()
|
||||||
|
{
|
||||||
|
for (auto& entry : m_page_table_mapped_entries)
|
||||||
|
{
|
||||||
|
m_arena.UnmapFromMemoryRegion(entry.mapped_pointer, entry.mapped_size);
|
||||||
|
}
|
||||||
|
m_page_table_mapped_entries.clear();
|
||||||
|
}
|
||||||
|
|
||||||
void MemoryManager::DoState(PointerWrap& p)
|
void MemoryManager::DoState(PointerWrap& p)
|
||||||
{
|
{
|
||||||
const u32 current_ram_size = GetRamSize();
|
const u32 current_ram_size = GetRamSize();
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -55,6 +56,7 @@ struct LogicalMemoryView
|
||||||
{
|
{
|
||||||
void* mapped_pointer;
|
void* mapped_pointer;
|
||||||
u32 mapped_size;
|
u32 mapped_size;
|
||||||
|
u32 logical_address;
|
||||||
};
|
};
|
||||||
|
|
||||||
class MemoryManager
|
class MemoryManager
|
||||||
|
@ -101,7 +103,9 @@ public:
|
||||||
void DoState(PointerWrap& p);
|
void DoState(PointerWrap& p);
|
||||||
|
|
||||||
void UpdateDBATMappings(const PowerPC::BatTable& dbat_table);
|
void UpdateDBATMappings(const PowerPC::BatTable& dbat_table);
|
||||||
void UpdatePageTableMappings(const std::map<u32, u32>& page_mappings);
|
void AddPageTableMappings(const std::map<u32, u32>& mappings);
|
||||||
|
void RemovePageTableMappings(const std::set<u32>& mappings);
|
||||||
|
void RemoveAllPageTableMappings();
|
||||||
|
|
||||||
void Clear();
|
void Clear();
|
||||||
|
|
||||||
|
|
|
@ -33,19 +33,19 @@ constexpr Arm64Gen::ARM64Reg DISPATCHER_PC = Arm64Gen::ARM64Reg::W26;
|
||||||
PowerPC::PowerPCState, elem); \
|
PowerPC::PowerPCState, elem); \
|
||||||
_Pragma("GCC diagnostic pop") \
|
_Pragma("GCC diagnostic pop") \
|
||||||
}())
|
}())
|
||||||
|
#else
|
||||||
|
#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem))
|
||||||
|
#endif
|
||||||
|
|
||||||
#define PPCSTATE_OFF_ARRAY(elem, i) \
|
#define PPCSTATE_OFF_ARRAY(elem, i) \
|
||||||
(PPCSTATE_OFF(elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i))
|
(PPCSTATE_OFF(elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i))
|
||||||
#else
|
|
||||||
#define PPCSTATE_OFF(elem) (offsetof(PowerPC::PowerPCState, elem))
|
|
||||||
|
|
||||||
#define PPCSTATE_OFF_ARRAY(elem, i) \
|
#define PPCSTATE_OFF_STD_ARRAY(elem, i) \
|
||||||
(offsetof(PowerPC::PowerPCState, elem[0]) + sizeof(PowerPC::PowerPCState::elem[0]) * (i))
|
(PPCSTATE_OFF(elem) + sizeof(PowerPC::PowerPCState::elem[0]) * (i))
|
||||||
#endif
|
|
||||||
|
|
||||||
#define PPCSTATE_OFF_GPR(i) PPCSTATE_OFF_ARRAY(gpr, i)
|
#define PPCSTATE_OFF_GPR(i) PPCSTATE_OFF_ARRAY(gpr, i)
|
||||||
#define PPCSTATE_OFF_CR(i) PPCSTATE_OFF_ARRAY(cr.fields, i)
|
#define PPCSTATE_OFF_CR(i) PPCSTATE_OFF_ARRAY(cr.fields, i)
|
||||||
#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_ARRAY(sr, i)
|
#define PPCSTATE_OFF_SR(i) PPCSTATE_OFF_STD_ARRAY(sr, i)
|
||||||
#define PPCSTATE_OFF_SPR(i) PPCSTATE_OFF_ARRAY(spr, i)
|
#define PPCSTATE_OFF_SPR(i) PPCSTATE_OFF_ARRAY(spr, i)
|
||||||
|
|
||||||
static_assert(std::is_same_v<decltype(PowerPC::PowerPCState::ps[0]), PowerPC::PairedSingle&>);
|
static_assert(std::is_same_v<decltype(PowerPC::PowerPCState::ps[0]), PowerPC::PairedSingle&>);
|
||||||
|
|
|
@ -25,10 +25,17 @@
|
||||||
|
|
||||||
#include "Core/PowerPC/MMU.h"
|
#include "Core/PowerPC/MMU.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <bit>
|
#include <bit>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
#ifdef _M_X86_64
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "Common/Align.h"
|
#include "Common/Align.h"
|
||||||
#include "Common/Assert.h"
|
#include "Common/Assert.h"
|
||||||
|
@ -61,18 +68,37 @@ MMU::~MMU() = default;
|
||||||
|
|
||||||
void MMU::Reset()
|
void MMU::Reset()
|
||||||
{
|
{
|
||||||
|
m_page_table.clear();
|
||||||
m_page_mappings.clear();
|
m_page_mappings.clear();
|
||||||
#ifndef _ARCH_32
|
#ifndef _ARCH_32
|
||||||
m_memory.UpdatePageTableMappings(m_page_mappings);
|
m_memory.RemoveAllPageTableMappings();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void MMU::DoState(PointerWrap& p)
|
void MMU::DoState(PointerWrap& p, bool sr_changed)
|
||||||
{
|
{
|
||||||
// Instead of storing m_page_mappings in savestates, we *could* recalculate it based on memory
|
// Instead of storing m_page_table in savestates, we *could* refetch it from memory
|
||||||
// here in DoState, but this could lead to us getting a more up-to-date set of page mappings
|
// here in DoState, but this could lead to us getting a more up-to-date set of page mappings
|
||||||
// than we had when the savestate was created, which could be a problem for TAS determinism.
|
// than we had when the savestate was created, which could be a problem for TAS determinism.
|
||||||
p.Do(m_page_mappings);
|
if (p.IsReadMode())
|
||||||
|
{
|
||||||
|
if (sr_changed)
|
||||||
|
{
|
||||||
|
// Non-incremental update of page table mappings.
|
||||||
|
p.Do(m_page_table);
|
||||||
|
SRUpdated();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Incremental update of page table mappings.
|
||||||
|
p.Do(m_temp_page_table);
|
||||||
|
PageTableUpdated(m_temp_page_table);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p.Do(m_page_table);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Overloaded byteswap functions, for use within the templated functions below.
|
// Overloaded byteswap functions, for use within the templated functions below.
|
||||||
|
@ -1346,10 +1372,12 @@ void MMU::SDRUpdated()
|
||||||
|
|
||||||
void MMU::SRUpdated()
|
void MMU::SRUpdated()
|
||||||
{
|
{
|
||||||
if (m_ppc_state.msr.DR)
|
#ifndef _ARCH_32
|
||||||
PageTableUpdated();
|
// Our incremental handling of page table updates can't handle SR changing, so throw away all
|
||||||
else
|
// existing mappings and then reparse the whole page table.
|
||||||
m_ppc_state.pagetable_update_pending = true;
|
m_memory.RemoveAllPageTableMappings();
|
||||||
|
ReloadPageTable();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
enum class TLBLookupResult
|
enum class TLBLookupResult
|
||||||
|
@ -1449,9 +1477,13 @@ void MMU::InvalidateTLBEntry(u32 address)
|
||||||
void MMU::PageTableUpdated()
|
void MMU::PageTableUpdated()
|
||||||
{
|
{
|
||||||
m_ppc_state.pagetable_update_pending = false;
|
m_ppc_state.pagetable_update_pending = false;
|
||||||
#ifndef _ARCH_32
|
|
||||||
m_page_mappings.clear();
|
|
||||||
|
|
||||||
|
#ifdef _ARCH_32
|
||||||
|
// If a savestate is brought from a 64-bit system to a 32-bit system, clear m_page_table.
|
||||||
|
// Not doing this means a stale m_page_table would stick around, which could be a problem
|
||||||
|
// if the savestate is then brought to a 64-bit system again.
|
||||||
|
m_page_table.clear();
|
||||||
|
#else
|
||||||
if (m_ppc_state.m_enable_dcache)
|
if (m_ppc_state.m_enable_dcache)
|
||||||
{
|
{
|
||||||
// Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings
|
// Because fastmem isn't in use when accurate dcache emulation is enabled, setting up mappings
|
||||||
|
@ -1460,9 +1492,8 @@ void MMU::PageTableUpdated()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 page_table_mask = m_ppc_state.pagetable_hashmask;
|
|
||||||
const u32 page_table_base = m_ppc_state.pagetable_base;
|
const u32 page_table_base = m_ppc_state.pagetable_base;
|
||||||
const u32 page_table_end = (page_table_base | (page_table_mask << 6)) + (1 << 6);
|
const u32 page_table_end = (page_table_base | (m_ppc_state.pagetable_hashmask << 6)) + (1 << 6);
|
||||||
const u32 page_table_size = page_table_end - page_table_base;
|
const u32 page_table_size = page_table_end - page_table_base;
|
||||||
|
|
||||||
u8* page_table_view = m_system.GetMemory().GetPointerForRange(page_table_base, page_table_size);
|
u8* page_table_view = m_system.GetMemory().GetPointerForRange(page_table_base, page_table_size);
|
||||||
|
@ -1470,98 +1501,332 @@ void MMU::PageTableUpdated()
|
||||||
{
|
{
|
||||||
WARN_LOG_FMT(POWERPC, "Failed to read page table at {:#010x}-{:#010x}", page_table_base,
|
WARN_LOG_FMT(POWERPC, "Failed to read page table at {:#010x}-{:#010x}", page_table_base,
|
||||||
page_table_end);
|
page_table_end);
|
||||||
m_memory.UpdatePageTableMappings(m_page_mappings);
|
|
||||||
|
// Remove host mappings, because we no longer know if they're up to date.
|
||||||
|
m_memory.RemoveAllPageTableMappings();
|
||||||
|
|
||||||
|
// Because we removed host mappings, incremental updates won't work correctly.
|
||||||
|
// Start over from scratch.
|
||||||
|
m_page_table.clear();
|
||||||
|
m_page_mappings.clear();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto read_page_table = [&](u32 H) {
|
PageTableUpdated(std::span(page_table_view, page_table_size));
|
||||||
for (u32 i = 0; i <= page_table_mask; ++i)
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef _ARCH_32
|
||||||
|
void MMU::ReloadPageTable()
|
||||||
|
{
|
||||||
|
m_page_mappings.clear();
|
||||||
|
|
||||||
|
m_temp_page_table.clear();
|
||||||
|
std::swap(m_page_table, m_temp_page_table);
|
||||||
|
PageTableUpdated(m_temp_page_table);
|
||||||
|
}
|
||||||
|
|
||||||
|
void MMU::PageTableUpdated(std::span<u8> page_table)
|
||||||
|
{
|
||||||
|
// PowerPC's priority order for PTEs that have the same logical adress is as follows:
|
||||||
|
//
|
||||||
|
// * Primary PTEs (H=0) take priority over secondary PTEs (H=1).
|
||||||
|
// * If two PTEs have equal H values, they must be in the same PTEG due to how the hash
|
||||||
|
// incorporates the logical address and H. The PTE located first in the PTEG takes priority.
|
||||||
|
|
||||||
|
m_removed_mappings.clear();
|
||||||
|
m_added_mappings.clear();
|
||||||
|
|
||||||
|
if (m_page_table.size() != page_table.size())
|
||||||
|
{
|
||||||
|
m_page_table.clear();
|
||||||
|
m_page_table.resize(page_table.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
u8* old_page_table = m_page_table.data();
|
||||||
|
u8* new_page_table = page_table.data();
|
||||||
|
|
||||||
|
constexpr auto compare_64_bytes = [](const u8* a, const u8* b) -> bool {
|
||||||
|
#ifdef _M_X86_64
|
||||||
|
// MSVC (x64) doesn't want to optimize the memcmp call. This 64-byte compare is performance
|
||||||
|
// critical in certain games like Spider-Man 2, so let's use our own vectorized version
|
||||||
|
// instead.
|
||||||
|
const __m128i a1 = _mm_load_si128(reinterpret_cast<const __m128i*>(a));
|
||||||
|
const __m128i b1 = _mm_load_si128(reinterpret_cast<const __m128i*>(b));
|
||||||
|
const __m128i cmp1 = _mm_cmpeq_epi8(a1, b1);
|
||||||
|
const __m128i a2 = _mm_load_si128(reinterpret_cast<const __m128i*>(a + 0x10));
|
||||||
|
const __m128i b2 = _mm_load_si128(reinterpret_cast<const __m128i*>(b + 0x10));
|
||||||
|
const __m128i cmp2 = _mm_cmpeq_epi8(a2, b2);
|
||||||
|
const __m128i cmp12 = _mm_and_si128(cmp1, cmp2);
|
||||||
|
const __m128i a3 = _mm_load_si128(reinterpret_cast<const __m128i*>(a + 0x20));
|
||||||
|
const __m128i b3 = _mm_load_si128(reinterpret_cast<const __m128i*>(b + 0x20));
|
||||||
|
const __m128i cmp3 = _mm_cmpeq_epi8(a3, b3);
|
||||||
|
const __m128i a4 = _mm_load_si128(reinterpret_cast<const __m128i*>(a + 0x30));
|
||||||
|
const __m128i b4 = _mm_load_si128(reinterpret_cast<const __m128i*>(b + 0x30));
|
||||||
|
const __m128i cmp4 = _mm_cmpeq_epi8(a4, b4);
|
||||||
|
const __m128i cmp34 = _mm_and_si128(cmp3, cmp4);
|
||||||
|
const __m128i cmp1234 = _mm_and_si128(cmp12, cmp34);
|
||||||
|
return _mm_movemask_epi8(cmp1234) == 0xFFFF;
|
||||||
|
#else
|
||||||
|
return std::memcmp(std::assume_aligned<16>(a), std::assume_aligned<16>(b), 64) == 0;
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
constexpr auto get_page_index = [](UPTE_Lo pte1, u32 hash) {
|
||||||
|
u32 page_index_from_hash = hash ^ pte1.VSID;
|
||||||
|
if (pte1.H)
|
||||||
|
page_index_from_hash = ~page_index_from_hash;
|
||||||
|
|
||||||
|
// Due to hash masking, the upper bits of page_index_from_hash might not match the actual
|
||||||
|
// page index. But these bits fully overlap with the API (abbreviated page index), so we can
|
||||||
|
// overwrite these bits with the API from pte1 and thereby get the correct page index.
|
||||||
|
//
|
||||||
|
// In other words: logical_address.API must be written to after logical_address.page_index!
|
||||||
|
|
||||||
|
EffectiveAddress logical_address;
|
||||||
|
logical_address.offset = 0;
|
||||||
|
logical_address.page_index = page_index_from_hash;
|
||||||
|
logical_address.API = pte1.API;
|
||||||
|
return logical_address;
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto fixup_shadowed_mappings = [this, &get_page_index, old_page_table, new_page_table](
|
||||||
|
UPTE_Lo pte1, u32 page_table_offset, bool* run_pass_2) {
|
||||||
|
DEBUG_ASSERT(pte1.V == 1);
|
||||||
|
|
||||||
|
bool switched_to_secondary = false;
|
||||||
|
|
||||||
|
while (true)
|
||||||
{
|
{
|
||||||
for (u32 j = 0; j < 8; ++j)
|
const u32 big_endian_pte1 = Common::swap32(pte1.Hex);
|
||||||
|
const u32 pteg_end = Common::AlignUp(page_table_offset, 64);
|
||||||
|
for (u32 i = page_table_offset; i < pteg_end; i += 8)
|
||||||
{
|
{
|
||||||
const u32 pte_addr = (page_table_base | ((i & page_table_mask) << 6)) + j * 8;
|
if (std::memcmp(new_page_table + i, &big_endian_pte1, sizeof(big_endian_pte1)) == 0)
|
||||||
|
{
|
||||||
|
// We've found a PTE that has V set and has the same logical address as the passed-in PTE.
|
||||||
|
// The found PTE was previously skipped over because the passed-in PTE had priority, but
|
||||||
|
// the passed-in PTE is being changed, so now we need to re-check the found PTE. This will
|
||||||
|
// happen naturally later in the loop that's calling this function, but only if the 8-byte
|
||||||
|
// memcmp reports that the PTE has changed. Therefore, if the PTE currently compares
|
||||||
|
// equal, change an unused bit in the PTE.
|
||||||
|
if (std::memcmp(old_page_table + i, new_page_table + i, 8) == 0)
|
||||||
|
{
|
||||||
|
UPTE_Hi pte2(Common::swap32(old_page_table + i + 4));
|
||||||
|
pte2.reserved_1 = pte2.reserved_1 ^ 1;
|
||||||
|
const u32 value = Common::swap32(pte2.Hex);
|
||||||
|
std::memcpy(old_page_table + i + 4, &value, sizeof(value));
|
||||||
|
|
||||||
UPTE_Lo pte1(Common::swap32(page_table_view + pte_addr - page_table_base));
|
if (switched_to_secondary)
|
||||||
UPTE_Hi pte2(Common::swap32(page_table_view + pte_addr - page_table_base + 4));
|
*run_pass_2 = true;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!pte1.V)
|
if (pte1.H == 1)
|
||||||
continue;
|
{
|
||||||
|
// We've scanned the secondary PTEG. Nothing left to do.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We've scanned the primary PTEG. Now let's scan the secondary PTEG.
|
||||||
|
const EffectiveAddress ea = get_page_index(pte1, page_table_offset / 64);
|
||||||
|
const u32 hash = ~(pte1.VSID ^ ea.page_index);
|
||||||
|
pte1.H = 1;
|
||||||
|
page_table_offset =
|
||||||
|
(((hash & m_ppc_state.pagetable_hashmask) << 6) | m_ppc_state.pagetable_base) -
|
||||||
|
m_ppc_state.pagetable_base;
|
||||||
|
switched_to_secondary = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (pte1.H != H)
|
const auto try_add_mapping = [this, &get_page_index, page_table](UPTE_Lo pte1, UPTE_Hi pte2,
|
||||||
continue;
|
u32 page_table_offset) {
|
||||||
|
EffectiveAddress logical_address = get_page_index(pte1, page_table_offset / 64);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < std::size(m_ppc_state.sr); ++i)
|
||||||
|
{
|
||||||
|
const auto sr = UReg_SR{m_ppc_state.sr[i]};
|
||||||
|
if (sr.VSID != pte1.VSID || sr.T != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
logical_address.SR = i;
|
||||||
|
|
||||||
|
bool host_mapping = true;
|
||||||
|
|
||||||
|
const bool wi = (pte2.WIMG & 0b1100) != 0;
|
||||||
|
if (wi)
|
||||||
|
{
|
||||||
// There are quirks related to uncached memory that can't be correctly emulated by fast
|
// There are quirks related to uncached memory that can't be correctly emulated by fast
|
||||||
// accesses, so we don't map uncached memory. (However, no software at all is known to
|
// accesses, so we don't map uncached memory. (However, no software at all is known to
|
||||||
// trigger these quirks through page address translation, only through block address
|
// trigger these quirks through page address translation, only through block address
|
||||||
// translation.)
|
// translation.)
|
||||||
const bool wi = (pte2.WIMG & 0b1100) != 0;
|
host_mapping = false;
|
||||||
if (wi)
|
}
|
||||||
continue;
|
else if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] &
|
||||||
|
PowerPC::BAT_MAPPED_BIT)
|
||||||
|
{
|
||||||
|
// Block address translation takes priority over page address translation.
|
||||||
|
host_mapping = false;
|
||||||
|
}
|
||||||
|
else if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex,
|
||||||
|
PowerPC::HW_PAGE_SIZE))
|
||||||
|
{
|
||||||
|
// Fast accesses don't support memchecks, so force slow accesses by removing fastmem
|
||||||
|
// mappings for all overlapping virtual pages.
|
||||||
|
host_mapping = false;
|
||||||
|
}
|
||||||
|
|
||||||
// Due to hash masking, the upper bits of page_index_from_hash might not match the actual
|
const u32 priority = (page_table_offset % 64 / 8) | (pte1.H << 3);
|
||||||
// page index. But these bits fully overlap with the API (abbreviated page index), so we can
|
const PageMapping page_mapping(pte2.RPN, host_mapping, priority);
|
||||||
// overwrite these bits with the API from pte1 and thereby get the correct page index.
|
|
||||||
//
|
|
||||||
// In other words: logical_address.API must be written to after logical_address.page_index!
|
|
||||||
u32 page_index_from_hash = i ^ pte1.VSID;
|
|
||||||
if (pte1.H)
|
|
||||||
page_index_from_hash = ~page_index_from_hash;
|
|
||||||
EffectiveAddress logical_address;
|
|
||||||
logical_address.offset = 0;
|
|
||||||
logical_address.page_index = page_index_from_hash;
|
|
||||||
logical_address.API = pte1.API;
|
|
||||||
|
|
||||||
for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k)
|
const auto it = m_page_mappings.find(logical_address.Hex);
|
||||||
|
if (it != m_page_mappings.end()) [[unlikely]]
|
||||||
|
{
|
||||||
|
if (priority > it->second.priority)
|
||||||
{
|
{
|
||||||
const auto sr = UReg_SR{m_ppc_state.sr[k]};
|
// An existing mapping has priority.
|
||||||
if (sr.VSID != pte1.VSID || sr.T != 0)
|
continue;
|
||||||
continue;
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// The new mapping has priority over an existing mapping. Replace the existing mapping.
|
||||||
|
if (it->second.host_mapping)
|
||||||
|
m_removed_mappings.emplace(it->first);
|
||||||
|
it->second.Hex = page_mapping.Hex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// There's no existing mapping for this logical address. Add a new mapping.
|
||||||
|
m_page_mappings.emplace(logical_address.Hex, page_mapping);
|
||||||
|
}
|
||||||
|
|
||||||
logical_address.SR = k;
|
if (host_mapping)
|
||||||
|
{
|
||||||
|
const u32 physical_address = pte2.RPN << 12;
|
||||||
|
m_added_mappings.emplace(logical_address.Hex, physical_address);
|
||||||
|
|
||||||
// Block address translation takes priority over page address translation.
|
// HACK: We set R and C, which indicate whether a page have been read from and written to
|
||||||
if (m_dbat_table[logical_address.Hex >> PowerPC::BAT_INDEX_SHIFT] &
|
// respectively, when a page is mapped rather than when it's actually accessed. The latter
|
||||||
PowerPC::BAT_MAPPED_BIT)
|
// is probably possible using some fault handling logic, but for now it seems like more
|
||||||
{
|
// work than it's worth.
|
||||||
continue;
|
if (!pte2.R || !pte2.C)
|
||||||
}
|
{
|
||||||
|
pte2.R = 1;
|
||||||
|
pte2.C = 1;
|
||||||
|
|
||||||
// Fast accesses don't support memchecks, so force slow accesses by removing fastmem
|
const u32 pte2_swapped = Common::swap32(pte2.Hex);
|
||||||
// mappings for all overlapping virtual pages.
|
std::memcpy(page_table.data() + page_table_offset + 4, &pte2_swapped,
|
||||||
constexpr u32 logical_size = PowerPC::HW_PAGE_SIZE;
|
sizeof(pte2_swapped));
|
||||||
if (m_power_pc.GetMemChecks().OverlapsMemcheck(logical_address.Hex, logical_size))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const u32 physical_address = pte2.RPN << 12;
|
|
||||||
|
|
||||||
// Important: This doesn't overwrite anything already present in m_page_mappings.
|
|
||||||
m_page_mappings.emplace(logical_address.Hex, physical_address);
|
|
||||||
|
|
||||||
// HACK: We set R and C, which indicate whether a page have been read from and written to
|
|
||||||
// respectively, when a page is mapped rather than when it's actually accessed. The latter
|
|
||||||
// is probably possible using some fault handling logic, but for now it seems like more
|
|
||||||
// work than it's worth.
|
|
||||||
if (!pte2.R || !pte2.C)
|
|
||||||
{
|
|
||||||
pte2.R = 1;
|
|
||||||
pte2.C = 1;
|
|
||||||
|
|
||||||
const u32 pte2_swapped = Common::swap32(pte2.Hex);
|
|
||||||
std::memcpy(page_table_view + pte_addr - page_table_base + 4, &pte2_swapped,
|
|
||||||
sizeof(pte2_swapped));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// We need to read all H=0 PTEs first, because H=0 takes priority over H=1.
|
bool run_pass_2 = false;
|
||||||
read_page_table(0);
|
|
||||||
read_page_table(1);
|
|
||||||
|
|
||||||
m_memory.UpdatePageTableMappings(m_page_mappings);
|
// Pass 1: Remove old mappings and add new primary (H=0) mappings.
|
||||||
#endif
|
for (u32 i = 0; i < page_table.size(); i += 64)
|
||||||
|
{
|
||||||
|
if (compare_64_bytes(old_page_table + i, new_page_table + i)) [[likely]]
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (u32 j = 0; j < 64; j += 8)
|
||||||
|
{
|
||||||
|
if (std::memcmp(old_page_table + i + j, new_page_table + i + j, 8) == 0) [[likely]]
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Remove old mappings.
|
||||||
|
UPTE_Lo old_pte1(Common::swap32(old_page_table + i + j));
|
||||||
|
if (old_pte1.V)
|
||||||
|
{
|
||||||
|
const u32 priority = (j / 8) | (old_pte1.H << 3);
|
||||||
|
EffectiveAddress logical_address = get_page_index(old_pte1, i / 64);
|
||||||
|
|
||||||
|
for (u32 k = 0; k < std::size(m_ppc_state.sr); ++k)
|
||||||
|
{
|
||||||
|
const auto sr = UReg_SR{m_ppc_state.sr[k]};
|
||||||
|
if (sr.VSID != old_pte1.VSID || sr.T != 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
logical_address.SR = k;
|
||||||
|
|
||||||
|
const auto it = m_page_mappings.find(logical_address.Hex);
|
||||||
|
if (it != m_page_mappings.end() && priority == it->second.priority)
|
||||||
|
{
|
||||||
|
if (it->second.host_mapping)
|
||||||
|
m_removed_mappings.emplace(logical_address.Hex);
|
||||||
|
m_page_mappings.erase(it);
|
||||||
|
|
||||||
|
// It's unlikely but theoretically possible that this was shadowing another PTE that's
|
||||||
|
// using the same logical address but has a lower priority. If this happens, we must
|
||||||
|
// make sure that we don't skip over that other PTE because of the 8-byte memcmp.
|
||||||
|
fixup_shadowed_mappings(old_pte1, i + j, &run_pass_2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add new primary (H=0) mappings.
|
||||||
|
UPTE_Lo new_pte1(Common::swap32(new_page_table + i + j));
|
||||||
|
UPTE_Hi new_pte2(Common::swap32(new_page_table + i + j + 4));
|
||||||
|
if (new_pte1.V)
|
||||||
|
{
|
||||||
|
if (new_pte1.H)
|
||||||
|
{
|
||||||
|
run_pass_2 = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try_add_mapping(new_pte1, new_pte2, i + j);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update our copy of the page table.
|
||||||
|
std::memcpy(old_page_table + i + j, new_page_table + i + j, 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass 2: Add new secondary (H=1) mappings. This is a separate pass because before we can process
|
||||||
|
// whether a mapping should be added, we first need to check all PTEs that have equal or higher
|
||||||
|
// priority to see if their mappings should be removed. For adding primary mappings, this ordering
|
||||||
|
// comes naturally from doing a linear scan of the page table from start to finish. But for adding
|
||||||
|
// secondary mappings, the primary PTEG that has priority over a given secondary PTEG is in the
|
||||||
|
// other half of the page table, so we need more than one pass through the page table. But most of
|
||||||
|
// the time, there are no secondary mappings, letting us skip the second pass.
|
||||||
|
if (run_pass_2) [[unlikely]]
|
||||||
|
{
|
||||||
|
for (u32 i = 0; i < page_table.size(); i += 64)
|
||||||
|
{
|
||||||
|
if (compare_64_bytes(old_page_table + i, new_page_table + i)) [[likely]]
|
||||||
|
continue;
|
||||||
|
|
||||||
|
for (u32 j = 0; j < 64; j += 8)
|
||||||
|
{
|
||||||
|
if (std::memcmp(old_page_table + i + j, new_page_table + i + j, 8) == 0) [[likely]]
|
||||||
|
continue;
|
||||||
|
|
||||||
|
UPTE_Lo new_pte1(Common::swap32(new_page_table + i + j));
|
||||||
|
UPTE_Hi new_pte2(Common::swap32(new_page_table + i + j + 4));
|
||||||
|
|
||||||
|
// We don't need to check new_pte1.V and new_pte1.H. If the memcmp above returned nonzero,
|
||||||
|
// pass 1 must have skipped running memcpy, which only happens if V and H are both set.
|
||||||
|
try_add_mapping(new_pte1, new_pte2, i + j);
|
||||||
|
|
||||||
|
std::memcpy(old_page_table + i + j, new_page_table + i + j, 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!m_removed_mappings.empty())
|
||||||
|
m_memory.RemovePageTableMappings(m_removed_mappings);
|
||||||
|
|
||||||
|
if (!m_added_mappings.empty())
|
||||||
|
m_memory.AddPageTableMappings(m_added_mappings);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
void MMU::PageTableUpdatedFromJit(MMU* mmu)
|
void MMU::PageTableUpdatedFromJit(MMU* mmu)
|
||||||
{
|
{
|
||||||
|
@ -1794,7 +2059,11 @@ void MMU::DBATUpdated()
|
||||||
|
|
||||||
#ifndef _ARCH_32
|
#ifndef _ARCH_32
|
||||||
m_memory.UpdateDBATMappings(m_dbat_table);
|
m_memory.UpdateDBATMappings(m_dbat_table);
|
||||||
m_memory.UpdatePageTableMappings(m_page_mappings);
|
|
||||||
|
// Calling UpdateDBATMappings removes all fastmem page table mappings,
|
||||||
|
// so we have to recreate them.
|
||||||
|
if (!m_page_table.empty())
|
||||||
|
ReloadPageTable();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here.
|
// IsOptimizable*Address and dcbz depends on the BAT mapping, so we need a flush here.
|
||||||
|
|
|
@ -7,7 +7,10 @@
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
#include <set>
|
||||||
|
#include <span>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "Common/BitField.h"
|
#include "Common/BitField.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
@ -120,7 +123,7 @@ public:
|
||||||
~MMU();
|
~MMU();
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
void DoState(PointerWrap& p);
|
void DoState(PointerWrap& p, bool sr_changed);
|
||||||
|
|
||||||
// Routines for debugger UI, cheats, etc. to access emulated memory from the
|
// Routines for debugger UI, cheats, etc. to access emulated memory from the
|
||||||
// perspective of the CPU. Not for use by core emulation routines.
|
// perspective of the CPU. Not for use by core emulation routines.
|
||||||
|
@ -300,6 +303,26 @@ private:
|
||||||
explicit EffectiveAddress(u32 address) : Hex{address} {}
|
explicit EffectiveAddress(u32 address) : Hex{address} {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
union PageMapping
|
||||||
|
{
|
||||||
|
// A small priority number wins over a larger priority number.
|
||||||
|
BitField<0, 11, u32> priority;
|
||||||
|
// Whether we're allowed to create a host mapping for this mapping.
|
||||||
|
BitField<11, 1, u32> host_mapping;
|
||||||
|
// The physical address of the page.
|
||||||
|
BitField<12, 20, u32> RPN;
|
||||||
|
|
||||||
|
u32 Hex = 0;
|
||||||
|
|
||||||
|
PageMapping() = default;
|
||||||
|
PageMapping(u32 RPN_, bool host_mapping_, u32 priority_)
|
||||||
|
{
|
||||||
|
RPN = RPN_;
|
||||||
|
host_mapping = host_mapping_;
|
||||||
|
priority = priority_;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <const XCheckTLBFlag flag>
|
template <const XCheckTLBFlag flag>
|
||||||
TranslateAddressResult TranslateAddress(u32 address);
|
TranslateAddressResult TranslateAddress(u32 address);
|
||||||
|
|
||||||
|
@ -311,6 +334,11 @@ private:
|
||||||
|
|
||||||
void Memcheck(u32 address, u64 var, bool write, size_t size);
|
void Memcheck(u32 address, u64 var, bool write, size_t size);
|
||||||
|
|
||||||
|
#ifndef _ARCH_32
|
||||||
|
void ReloadPageTable();
|
||||||
|
void PageTableUpdated(std::span<u8> page_table);
|
||||||
|
#endif
|
||||||
|
|
||||||
void UpdateBATs(BatTable& bat_table, u32 base_spr);
|
void UpdateBATs(BatTable& bat_table, u32 base_spr);
|
||||||
void UpdateFakeMMUBat(BatTable& bat_table, u32 start_addr);
|
void UpdateFakeMMUBat(BatTable& bat_table, u32 start_addr);
|
||||||
|
|
||||||
|
@ -335,9 +363,18 @@ private:
|
||||||
PowerPC::PowerPCState& m_ppc_state;
|
PowerPC::PowerPCState& m_ppc_state;
|
||||||
|
|
||||||
// STATE_TO_SAVE
|
// STATE_TO_SAVE
|
||||||
std::map<u32, u32> m_page_mappings;
|
std::vector<u8> m_page_table;
|
||||||
// END STATE_TO_SAVE
|
// END STATE_TO_SAVE
|
||||||
|
|
||||||
|
// This keeps track of all valid page table mappings in m_page_table.
|
||||||
|
// The key is the logical address.
|
||||||
|
std::map<u32, PageMapping> m_page_mappings;
|
||||||
|
|
||||||
|
// These are kept around just for their memory allocations. They are always cleared before use.
|
||||||
|
std::vector<u8> m_temp_page_table;
|
||||||
|
std::set<u32> m_removed_mappings;
|
||||||
|
std::map<u32, u32> m_added_mappings;
|
||||||
|
|
||||||
BatTable m_ibat_table;
|
BatTable m_ibat_table;
|
||||||
BatTable m_dbat_table;
|
BatTable m_dbat_table;
|
||||||
};
|
};
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
#include "Core/PowerPC/PowerPC.h"
|
#include "Core/PowerPC/PowerPC.h"
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
#include <bit>
|
#include <bit>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
@ -80,6 +81,8 @@ void PowerPCManager::DoState(PointerWrap& p)
|
||||||
// *((u64 *)&TL(m_ppc_state)) = SystemTimers::GetFakeTimeBase(); //works since we are little
|
// *((u64 *)&TL(m_ppc_state)) = SystemTimers::GetFakeTimeBase(); //works since we are little
|
||||||
// endian and TL comes first :)
|
// endian and TL comes first :)
|
||||||
|
|
||||||
|
const std::array<u32, 16> old_sr = m_ppc_state.sr;
|
||||||
|
|
||||||
p.DoArray(m_ppc_state.gpr);
|
p.DoArray(m_ppc_state.gpr);
|
||||||
p.Do(m_ppc_state.pc);
|
p.Do(m_ppc_state.pc);
|
||||||
p.Do(m_ppc_state.npc);
|
p.Do(m_ppc_state.npc);
|
||||||
|
@ -107,10 +110,10 @@ void PowerPCManager::DoState(PointerWrap& p)
|
||||||
m_ppc_state.dCache.DoState(memory, p);
|
m_ppc_state.dCache.DoState(memory, p);
|
||||||
|
|
||||||
auto& mmu = m_system.GetMMU();
|
auto& mmu = m_system.GetMMU();
|
||||||
mmu.DoState(p);
|
|
||||||
|
|
||||||
if (p.IsReadMode())
|
if (p.IsReadMode())
|
||||||
{
|
{
|
||||||
|
mmu.DoState(p, old_sr != m_ppc_state.sr);
|
||||||
|
|
||||||
if (!m_ppc_state.m_enable_dcache)
|
if (!m_ppc_state.m_enable_dcache)
|
||||||
{
|
{
|
||||||
INFO_LOG_FMT(POWERPC, "Flushing data cache");
|
INFO_LOG_FMT(POWERPC, "Flushing data cache");
|
||||||
|
@ -123,6 +126,10 @@ void PowerPCManager::DoState(PointerWrap& p)
|
||||||
mmu.IBATUpdated();
|
mmu.IBATUpdated();
|
||||||
mmu.DBATUpdated();
|
mmu.DBATUpdated();
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
mmu.DoState(p, false);
|
||||||
|
}
|
||||||
|
|
||||||
// SystemTimers::DecrementerSet();
|
// SystemTimers::DecrementerSet();
|
||||||
// SystemTimers::TimeBaseSet();
|
// SystemTimers::TimeBaseSet();
|
||||||
|
|
|
@ -175,7 +175,7 @@ struct PowerPCState
|
||||||
alignas(16) PairedSingle ps[32];
|
alignas(16) PairedSingle ps[32];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
u32 sr[16]{}; // Segment registers.
|
std::array<u32, 16> sr{}; // Segment registers.
|
||||||
|
|
||||||
// special purpose registers - controls quantizers, DMA, and lots of other misc extensions.
|
// special purpose registers - controls quantizers, DMA, and lots of other misc extensions.
|
||||||
// also for power management, but we don't care about that.
|
// also for power management, but we don't care about that.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue