Improve buffer cache locking contention

2025-08-25 03:35:34 +00:00 · 2024-12-29 15:29:37 +02:00 · 2024-12-29 15:29:37 +02:00 · ca5bfd845d
commit ca5bfd845d
parent ee974414d2
7 changed files with 101 additions and 237 deletions
--- a/src/video_core/buffer_cache/buffer_cache.cpp
+++ b/src/video_core/buffer_cache/buffer_cache.cpp
@ -54,18 +54,10 @@ BufferCache::BufferCache(const Vulkan::Instance& instance_, Vulkan::Scheduler& s
 BufferCache::~BufferCache() = default;

 void BufferCache::InvalidateMemory(VAddr device_addr, u64 size) {
-    std::scoped_lock lk{mutex};
    const bool is_tracked = IsRegionRegistered(device_addr, size);
-    if (!is_tracked) {
-        return;
-    }
-    // Mark the page as CPU modified to stop tracking writes.
-    SCOPE_EXIT {
+    if (is_tracked) {
+        // Mark the page as CPU modified to stop tracking writes.
        memory_tracker.MarkRegionAsCpuModified(device_addr, size);
-    };
-    if (!memory_tracker.IsRegionGpuModified(device_addr, size)) {
-        // Page has not been modified by the GPU, nothing to do.
-        return;
    }
 }

@ -365,12 +357,13 @@ bool BufferCache::IsRegionRegistered(VAddr addr, size_t size) {
    const VAddr end_addr = addr + size;
    const u64 page_end = Common::DivCeil(end_addr, CACHING_PAGESIZE);
    for (u64 page = addr >> CACHING_PAGEBITS; page < page_end;) {
-        const BufferId buffer_id = page_table[page];
-        if (!buffer_id) {
+        const BufferId* buffer_id = page_table.find(page);
+        if (!buffer_id || !*buffer_id) {
            ++page;
            continue;
        }
-        Buffer& buffer = slot_buffers[buffer_id];
+        std::shared_lock lk{mutex};
+        Buffer& buffer = slot_buffers[*buffer_id];
        const VAddr buf_start_addr = buffer.CpuAddr();
        const VAddr buf_end_addr = buf_start_addr + buffer.SizeBytes();
        if (buf_start_addr < end_addr && addr < buf_end_addr) {
@ -520,8 +513,11 @@ BufferId BufferCache::CreateBuffer(VAddr device_addr, u32 wanted_size) {
    wanted_size = static_cast<u32>(device_addr_end - device_addr);
    const OverlapResult overlap = ResolveOverlaps(device_addr, wanted_size);
    const u32 size = static_cast<u32>(overlap.end - overlap.begin);
-    const BufferId new_buffer_id = slot_buffers.insert(
-        instance, scheduler, MemoryUsage::DeviceLocal, overlap.begin, AllFlags, size);
+    const BufferId new_buffer_id = [&] {
+        std::scoped_lock lk{mutex};
+        return slot_buffers.insert(instance, scheduler, MemoryUsage::DeviceLocal,
+                                   overlap.begin, AllFlags, size);
+    }();
    auto& new_buffer = slot_buffers[new_buffer_id];
    const size_t size_bytes = new_buffer.SizeBytes();
    const auto cmdbuf = scheduler.CommandBuffer();
@ -561,10 +557,8 @@ void BufferCache::ChangeRegister(BufferId buffer_id) {

 void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
                                    bool is_texel_buffer) {
-    std::scoped_lock lk{mutex};
    boost::container::small_vector<vk::BufferCopy, 4> copies;
    u64 total_size_bytes = 0;
-    u64 largest_copy = 0;
    VAddr buffer_start = buffer.CpuAddr();
    memory_tracker.ForEachUploadRange(device_addr, size, [&](u64 device_addr_out, u64 range_size) {
        copies.push_back(vk::BufferCopy{
@ -573,7 +567,6 @@ void BufferCache::SynchronizeBuffer(Buffer& buffer, VAddr device_addr, u32 size,
            .size = range_size,
        });
        total_size_bytes += range_size;
-        largest_copy = std::max(largest_copy, range_size);
    });
    SCOPE_EXIT {
        if (is_texel_buffer) {
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@ -3,7 +3,7 @@

 #pragma once

-#include <mutex>
+#include <shared_mutex>
 #include <boost/container/small_vector.hpp>
 #include <boost/icl/interval_map.hpp>
 #include <tsl/robin_map.h>
@ -157,7 +157,7 @@ private:
    StreamBuffer staging_buffer;
    StreamBuffer stream_buffer;
    Buffer gds_buffer;
-    std::mutex mutex;
+    std::shared_mutex mutex;
    Common::SlotVector<Buffer> slot_buffers;
    RangeSet gpu_modified_ranges;
    vk::BufferView null_buffer_view;
--- a/src/video_core/buffer_cache/memory_tracker_base.h
+++ b/src/video_core/buffer_cache/memory_tracker_base.h
@ -15,13 +15,8 @@ namespace VideoCore {
 class MemoryTracker {
 public:
    static constexpr size_t MAX_CPU_PAGE_BITS = 40;
-    static constexpr size_t HIGHER_PAGE_BITS = 22;
-    static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
-    static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
    static constexpr size_t NUM_HIGH_PAGES = 1ULL << (MAX_CPU_PAGE_BITS - HIGHER_PAGE_BITS);
    static constexpr size_t MANAGER_POOL_SIZE = 32;
-    static constexpr size_t WORDS_STACK_NEEDED = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
-    using Manager = WordManager<WORDS_STACK_NEEDED>;

 public:
    explicit MemoryTracker(PageManager* tracker_) : tracker{tracker_} {}
@ -30,7 +25,7 @@ public:
    /// Returns true if a region has been modified from the CPU
    [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
        return IteratePages<true>(
-            query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
+            query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
                return manager->template IsRegionModified<Type::CPU>(offset, size);
            });
    }
@ -38,52 +33,34 @@ public:
    /// Returns true if a region has been modified from the GPU
    [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) noexcept {
        return IteratePages<false>(
-            query_cpu_addr, query_size, [](Manager* manager, u64 offset, size_t size) {
+            query_cpu_addr, query_size, [](RegionManager* manager, u64 offset, size_t size) {
                return manager->template IsRegionModified<Type::GPU>(offset, size);
            });
    }

    /// Mark region as CPU modified, notifying the device_tracker about this change
    void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
-        IteratePages<true>(dirty_cpu_addr, query_size,
-                           [](Manager* manager, u64 offset, size_t size) {
+        IteratePages<false>(dirty_cpu_addr, query_size,
+                           [](RegionManager* manager, u64 offset, size_t size) {
                               manager->template ChangeRegionState<Type::CPU, true>(
                                   manager->GetCpuAddr() + offset, size);
                           });
    }

-    /// Unmark region as CPU modified, notifying the device_tracker about this change
-    void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 query_size) {
-        IteratePages<true>(dirty_cpu_addr, query_size,
-                           [](Manager* manager, u64 offset, size_t size) {
-                               manager->template ChangeRegionState<Type::CPU, false>(
-                                   manager->GetCpuAddr() + offset, size);
-                           });
-    }
-
    /// Mark region as modified from the host GPU
    void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
-        IteratePages<true>(dirty_cpu_addr, query_size,
-                           [](Manager* manager, u64 offset, size_t size) {
+        IteratePages<false>(dirty_cpu_addr, query_size,
+                           [](RegionManager* manager, u64 offset, size_t size) {
                               manager->template ChangeRegionState<Type::GPU, true>(
                                   manager->GetCpuAddr() + offset, size);
                           });
    }

-    /// Unmark region as modified from the host GPU
-    void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 query_size) noexcept {
-        IteratePages<true>(dirty_cpu_addr, query_size,
-                           [](Manager* manager, u64 offset, size_t size) {
-                               manager->template ChangeRegionState<Type::GPU, false>(
-                                   manager->GetCpuAddr() + offset, size);
-                           });
-    }
-
    /// Call 'func' for each CPU modified range and unmark those pages as CPU modified
    template <typename Func>
    void ForEachUploadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
        IteratePages<true>(query_cpu_range, query_size,
-                           [&func](Manager* manager, u64 offset, size_t size) {
+                           [&func](RegionManager* manager, u64 offset, size_t size) {
                               manager->template ForEachModifiedRange<Type::CPU, true>(
                                   manager->GetCpuAddr() + offset, size, func);
                           });
@ -93,7 +70,7 @@ public:
    template <bool clear, typename Func>
    void ForEachDownloadRange(VAddr query_cpu_range, u64 query_size, Func&& func) {
        IteratePages<false>(query_cpu_range, query_size,
-                            [&func](Manager* manager, u64 offset, size_t size) {
+                            [&func](RegionManager* manager, u64 offset, size_t size) {
                                if constexpr (clear) {
                                    manager->template ForEachModifiedRange<Type::GPU, true>(
                                        manager->GetCpuAddr() + offset, size, func);
@ -114,7 +91,7 @@ private:
     */
    template <bool create_region_on_fail, typename Func>
    bool IteratePages(VAddr cpu_address, size_t size, Func&& func) {
-        using FuncReturn = typename std::invoke_result<Func, Manager*, u64, size_t>::type;
+        using FuncReturn = typename std::invoke_result<Func, RegionManager*, u64, size_t>::type;
        static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
        std::size_t remaining_size{size};
        std::size_t page_index{cpu_address >> HIGHER_PAGE_BITS};
@ -155,7 +132,7 @@ private:
            manager_pool.emplace_back();
            auto& last_pool = manager_pool.back();
            for (size_t i = 0; i < MANAGER_POOL_SIZE; i++) {
-                std::construct_at(&last_pool[i], tracker, 0, HIGHER_PAGE_SIZE);
+                std::construct_at(&last_pool[i], tracker, 0);
                free_managers.push_back(&last_pool[i]);
            }
        }
@ -167,9 +144,9 @@ private:
    }

    PageManager* tracker;
-    std::deque<std::array<Manager, MANAGER_POOL_SIZE>> manager_pool;
-    std::vector<Manager*> free_managers;
-    std::array<Manager*, NUM_HIGH_PAGES> top_tier{};
+    std::deque<std::array<RegionManager, MANAGER_POOL_SIZE>> manager_pool;
+    std::vector<RegionManager*> free_managers;
+    std::array<RegionManager*, NUM_HIGH_PAGES> top_tier{};
 };

 } // namespace VideoCore
--- a/src/video_core/buffer_cache/word_manager.h
+++ b/src/video_core/buffer_cache/word_manager.h
@ -3,10 +3,12 @@

 #pragma once

-#include <algorithm>
+#include <array>
 #include <span>
 #include <utility>
-#include "common/div_ceil.h"
+#include <mutex>
+
+#include "common/spin_lock.h"
 #include "common/types.h"
 #include "video_core/page_manager.h"

@ -16,135 +18,32 @@ constexpr u64 PAGES_PER_WORD = 64;
 constexpr u64 BYTES_PER_PAGE = 4_KB;
 constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE;

+constexpr u64 HIGHER_PAGE_BITS = 22;
+constexpr u64 HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
+constexpr u64 HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
+constexpr u64 NUM_REGION_WORDS = HIGHER_PAGE_SIZE / BYTES_PER_WORD;
+
 enum class Type {
    CPU,
    GPU,
    Untracked,
 };

-/// Vector tracking modified pages tightly packed with small vector optimization
-template <size_t stack_words = 1>
-struct WordsArray {
-    /// Returns the pointer to the words state
-    [[nodiscard]] const u64* Pointer(bool is_short) const noexcept {
-        return is_short ? stack.data() : heap;
-    }
+using WordsArray = std::array<u64, NUM_REGION_WORDS>;

-    /// Returns the pointer to the words state
-    [[nodiscard]] u64* Pointer(bool is_short) noexcept {
-        return is_short ? stack.data() : heap;
-    }
-
-    std::array<u64, stack_words> stack{}; ///< Small buffers storage
-    u64* heap;                            ///< Not-small buffers pointer to the storage
-};
-
-template <size_t stack_words = 1>
-struct Words {
-    explicit Words() = default;
-    explicit Words(u64 size_bytes_) : size_bytes{size_bytes_} {
-        num_words = Common::DivCeil(size_bytes, BYTES_PER_WORD);
-        if (IsShort()) {
-            cpu.stack.fill(~u64{0});
-            gpu.stack.fill(0);
-            untracked.stack.fill(~u64{0});
-        } else {
-            // Share allocation between CPU and GPU pages and set their default values
-            u64* const alloc = new u64[num_words * 3];
-            cpu.heap = alloc;
-            gpu.heap = alloc + num_words;
-            untracked.heap = alloc + num_words * 2;
-            std::fill_n(cpu.heap, num_words, ~u64{0});
-            std::fill_n(gpu.heap, num_words, 0);
-            std::fill_n(untracked.heap, num_words, ~u64{0});
-        }
-        // Clean up tailing bits
-        const u64 last_word_size = size_bytes % BYTES_PER_WORD;
-        const u64 last_local_page = Common::DivCeil(last_word_size, BYTES_PER_PAGE);
-        const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD;
-        const u64 last_word = (~u64{0} << shift) >> shift;
-        cpu.Pointer(IsShort())[NumWords() - 1] = last_word;
-        untracked.Pointer(IsShort())[NumWords() - 1] = last_word;
-    }
-
-    ~Words() {
-        Release();
-    }
-
-    Words& operator=(Words&& rhs) noexcept {
-        Release();
-        size_bytes = rhs.size_bytes;
-        num_words = rhs.num_words;
-        cpu = rhs.cpu;
-        gpu = rhs.gpu;
-        untracked = rhs.untracked;
-        rhs.cpu.heap = nullptr;
-        return *this;
-    }
-
-    Words(Words&& rhs) noexcept
-        : size_bytes{rhs.size_bytes}, num_words{rhs.num_words}, cpu{rhs.cpu}, gpu{rhs.gpu},
-          untracked{rhs.untracked} {
-        rhs.cpu.heap = nullptr;
-    }
-
-    Words& operator=(const Words&) = delete;
-    Words(const Words&) = delete;
-
-    /// Returns true when the buffer fits in the small vector optimization
-    [[nodiscard]] bool IsShort() const noexcept {
-        return num_words <= stack_words;
-    }
-
-    /// Returns the number of words of the buffer
-    [[nodiscard]] size_t NumWords() const noexcept {
-        return num_words;
-    }
-
-    /// Release buffer resources
-    void Release() {
-        if (!IsShort()) {
-            // CPU written words is the base for the heap allocation
-            delete[] cpu.heap;
-        }
-    }
-
-    template <Type type>
-    std::span<u64> Span() noexcept {
-        if constexpr (type == Type::CPU) {
-            return std::span<u64>(cpu.Pointer(IsShort()), num_words);
-        } else if constexpr (type == Type::GPU) {
-            return std::span<u64>(gpu.Pointer(IsShort()), num_words);
-        } else if constexpr (type == Type::Untracked) {
-            return std::span<u64>(untracked.Pointer(IsShort()), num_words);
-        }
-    }
-
-    template <Type type>
-    std::span<const u64> Span() const noexcept {
-        if constexpr (type == Type::CPU) {
-            return std::span<const u64>(cpu.Pointer(IsShort()), num_words);
-        } else if constexpr (type == Type::GPU) {
-            return std::span<const u64>(gpu.Pointer(IsShort()), num_words);
-        } else if constexpr (type == Type::Untracked) {
-            return std::span<const u64>(untracked.Pointer(IsShort()), num_words);
-        }
-    }
-
-    u64 size_bytes = 0;
-    size_t num_words = 0;
-    WordsArray<stack_words> cpu;
-    WordsArray<stack_words> gpu;
-    WordsArray<stack_words> untracked;
-};
-
-template <size_t stack_words = 1>
-class WordManager {
+/**
+ * Allows tracking CPU and GPU modification of pages in a contigious 4MB virtual address region.
+ * Information is stored in bitsets for spacial locality and fast update of single pages.
+ */
+class RegionManager {
 public:
-    explicit WordManager(PageManager* tracker_, VAddr cpu_addr_, u64 size_bytes)
-        : tracker{tracker_}, cpu_addr{cpu_addr_}, words{size_bytes} {}
-
-    explicit WordManager() = default;
+    explicit RegionManager(PageManager* tracker_, VAddr cpu_addr_)
+        : tracker{tracker_}, cpu_addr{cpu_addr_} {
+        cpu.fill(~u64{0});
+        gpu.fill(0);
+        untracked.fill(~u64{0});
+    }
+    explicit RegionManager() = default;

    void SetCpuAddress(VAddr new_cpu_addr) {
        cpu_addr = new_cpu_addr;
@ -175,12 +74,12 @@ public:
        static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
        const size_t start = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset), 0LL));
        const size_t end = static_cast<size_t>(std::max<s64>(static_cast<s64>(offset + size), 0LL));
-        if (start >= SizeBytes() || end <= start) {
+        if (start >= HIGHER_PAGE_SIZE || end <= start) {
            return;
        }
        auto [start_word, start_page] = GetWordPage(start);
        auto [end_word, end_page] = GetWordPage(end + BYTES_PER_PAGE - 1ULL);
-        const size_t num_words = NumWords();
+        constexpr size_t num_words = NUM_REGION_WORDS;
        start_word = std::min(start_word, num_words);
        end_word = std::min(end_word, num_words);
        const size_t diff = end_word - start_word;
@ -225,21 +124,21 @@ public:
     */
    template <Type type, bool enable>
    void ChangeRegionState(u64 dirty_addr, u64 size) noexcept(type == Type::GPU) {
-        std::span<u64> state_words = words.template Span<type>();
-        [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
+        std::scoped_lock lk{lock};
+        std::span<u64> state_words = Span<type>();
        IterateWords(dirty_addr - cpu_addr, size, [&](size_t index, u64 mask) {
            if constexpr (type == Type::CPU) {
-                NotifyPageTracker<!enable>(index, untracked_words[index], mask);
+                UpdateProtection<!enable>(index, untracked[index], mask);
            }
            if constexpr (enable) {
                state_words[index] |= mask;
                if constexpr (type == Type::CPU) {
-                    untracked_words[index] |= mask;
+                    untracked[index] |= mask;
                }
            } else {
                state_words[index] &= ~mask;
                if constexpr (type == Type::CPU) {
-                    untracked_words[index] &= ~mask;
+                    untracked[index] &= ~mask;
                }
            }
        });
@ -255,10 +154,10 @@ public:
     */
    template <Type type, bool clear, typename Func>
    void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
+        std::scoped_lock lk{lock};
        static_assert(type != Type::Untracked);

-        std::span<u64> state_words = words.template Span<type>();
-        [[maybe_unused]] std::span<u64> untracked_words = words.template Span<Type::Untracked>();
+        std::span<u64> state_words = Span<type>();
        const size_t offset = query_cpu_range - cpu_addr;
        bool pending = false;
        size_t pending_offset{};
@ -269,16 +168,16 @@ public:
        };
        IterateWords(offset, size, [&](size_t index, u64 mask) {
            if constexpr (type == Type::GPU) {
-                mask &= ~untracked_words[index];
+                mask &= ~untracked[index];
            }
            const u64 word = state_words[index] & mask;
            if constexpr (clear) {
                if constexpr (type == Type::CPU) {
-                    NotifyPageTracker<true>(index, untracked_words[index], mask);
+                    UpdateProtection<true>(index, untracked[index], mask);
                }
                state_words[index] &= ~mask;
                if constexpr (type == Type::CPU) {
-                    untracked_words[index] &= ~mask;
+                    untracked[index] &= ~mask;
                }
            }
            const size_t base_offset = index * PAGES_PER_WORD;
@ -315,13 +214,11 @@ public:
    [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
        static_assert(type != Type::Untracked);

-        const std::span<const u64> state_words = words.template Span<type>();
-        [[maybe_unused]] const std::span<const u64> untracked_words =
-            words.template Span<Type::Untracked>();
+        const std::span<const u64> state_words = Span<type>();
        bool result = false;
        IterateWords(offset, size, [&](size_t index, u64 mask) {
            if constexpr (type == Type::GPU) {
-                mask &= ~untracked_words[index];
+                mask &= ~untracked[index];
            }
            const u64 word = state_words[index] & mask;
            if (word != 0) {
@ -333,44 +230,7 @@ public:
        return result;
    }

-    /// Returns the number of words of the manager
-    [[nodiscard]] size_t NumWords() const noexcept {
-        return words.NumWords();
-    }
-
-    /// Returns the size in bytes of the manager
-    [[nodiscard]] u64 SizeBytes() const noexcept {
-        return words.size_bytes;
-    }
-
-    /// Returns true when the buffer fits in the small vector optimization
-    [[nodiscard]] bool IsShort() const noexcept {
-        return words.IsShort();
-    }
-
 private:
-    template <Type type>
-    u64* Array() noexcept {
-        if constexpr (type == Type::CPU) {
-            return words.cpu.Pointer(IsShort());
-        } else if constexpr (type == Type::GPU) {
-            return words.gpu.Pointer(IsShort());
-        } else if constexpr (type == Type::Untracked) {
-            return words.untracked.Pointer(IsShort());
-        }
-    }
-
-    template <Type type>
-    const u64* Array() const noexcept {
-        if constexpr (type == Type::CPU) {
-            return words.cpu.Pointer(IsShort());
-        } else if constexpr (type == Type::GPU) {
-            return words.gpu.Pointer(IsShort());
-        } else if constexpr (type == Type::Untracked) {
-            return words.untracked.Pointer(IsShort());
-        }
-    }
-
    /**
     * Notify tracker about changes in the CPU tracking state of a word in the buffer
     *
@ -381,7 +241,7 @@ private:
     * @tparam add_to_tracker True when the tracker should start tracking the new pages
     */
    template <bool add_to_tracker>
-    void NotifyPageTracker(u64 word_index, u64 current_bits, u64 new_bits) const {
+    void UpdateProtection(u64 word_index, u64 current_bits, u64 new_bits) const {
        u64 changed_bits = (add_to_tracker ? current_bits : ~current_bits) & new_bits;
        VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
        IteratePages(changed_bits, [&](size_t offset, size_t size) {
@ -390,9 +250,34 @@ private:
        });
    }

+    template <Type type>
+    std::span<u64> Span() noexcept {
+        if constexpr (type == Type::CPU) {
+            return cpu;
+        } else if constexpr (type == Type::GPU) {
+            return gpu;
+        } else if constexpr (type == Type::Untracked) {
+            return untracked;
+        }
+    }
+
+    template <Type type>
+    std::span<const u64> Span() const noexcept {
+        if constexpr (type == Type::CPU) {
+            return cpu;
+        } else if constexpr (type == Type::GPU) {
+            return gpu;
+        } else if constexpr (type == Type::Untracked) {
+            return untracked;
+        }
+    }
+
+    Common::SpinLock lock;
    PageManager* tracker;
    VAddr cpu_addr = 0;
-    Words<stack_words> words;
+    WordsArray cpu;
+    WordsArray gpu;
+    WordsArray untracked;
 };

 } // namespace VideoCore
--- a/src/video_core/multi_level_page_table.h
+++ b/src/video_core/multi_level_page_table.h
@ -39,6 +39,15 @@ public:
        return &(*first_level_map[l1_page])[l2_page];
    }

+    [[nodiscard]] const Entry* find(size_t page) const {
+        const size_t l1_page = page >> SecondLevelBits;
+        const size_t l2_page = page & (NumEntriesPerL1Page - 1);
+        if (!first_level_map[l1_page]) {
+            return nullptr;
+        }
+        return &(*first_level_map[l1_page])[l2_page];
+    }
+
    [[nodiscard]] const Entry& operator[](size_t page) const {
        const size_t l1_page = page >> SecondLevelBits;
        const size_t l2_page = page & (NumEntriesPerL1Page - 1);
--- a/src/video_core/page_manager.cpp
+++ b/src/video_core/page_manager.cpp
@ -185,7 +185,7 @@ void PageManager::OnGpuUnmap(VAddr address, size_t size) {
 void PageManager::UpdatePagesCachedCount(VAddr addr, u64 size, s32 delta) {
    static constexpr u64 PageShift = 12;

-    std::scoped_lock lk{mutex};
+    std::scoped_lock lk{lock};
    const u64 num_pages = ((addr + size - 1) >> PageShift) - (addr >> PageShift) + 1;
    const u64 page_start = addr >> PageShift;
    const u64 page_end = page_start + num_pages;
--- a/src/video_core/page_manager.h
+++ b/src/video_core/page_manager.h
@ -4,8 +4,8 @@
 #pragma once

 #include <memory>
-#include <mutex>
 #include <boost/icl/interval_map.hpp>
+#include "common/spin_lock.h"
 #include "common/types.h"

 namespace Vulkan {
@ -35,8 +35,8 @@ private:
    struct Impl;
    std::unique_ptr<Impl> impl;
    Vulkan::Rasterizer* rasterizer;
-    std::mutex mutex;
    boost::icl::interval_map<VAddr, s32> cached_pages;
+    Common::SpinLock lock;
 };

 } // namespace VideoCore