From a838fdfd8894d40caa0ba7caa70ad4891ad8d8d7 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Mon, 22 Aug 2022 14:41:08 +0200 Subject: [PATCH] Kernel: Make the page table quickmaps per-CPU While the "regular" quickmap (used to temporarily map a physical page at a known address for quick access) has been per-CPU for a while, we also have the PD (page directory) and PT (page table) quickmaps used by the memory management code to edit page tables. These have been global, which meant that SMP systems had to keep fighting over them. This patch makes *all* quickmaps per-CPU. We reserve virtual addresses for up to 64 CPUs worth of quickmaps for now. Note that all quickmaps are still protected by the MM lock, and we'll have to fix that too, before seeing any real throughput improvements. --- Kernel/Memory/MemoryManager.cpp | 44 ++++++++++++--------------------- Kernel/Memory/MemoryManager.h | 3 --- Kernel/Sections.h | 8 +++--- 3 files changed, 21 insertions(+), 34 deletions(-) diff --git a/Kernel/Memory/MemoryManager.cpp b/Kernel/Memory/MemoryManager.cpp index b34e74e42fa..7f6478bfe95 100644 --- a/Kernel/Memory/MemoryManager.cpp +++ b/Kernel/Memory/MemoryManager.cpp @@ -1037,53 +1037,41 @@ void MemoryManager::flush_tlb(PageDirectory const* page_directory, VirtualAddres PageDirectoryEntry* MemoryManager::quickmap_pd(PageDirectory& directory, size_t pdpt_index) { + VERIFY_INTERRUPTS_DISABLED(); VERIFY(s_mm_lock.is_locked_by_current_processor()); - auto& mm_data = get_data(); - auto& pte = boot_pd_kernel_pt1023[(KERNEL_QUICKMAP_PD - KERNEL_PT1024_BASE) / PAGE_SIZE]; + + VirtualAddress vaddr(KERNEL_QUICKMAP_PD_PER_CPU_BASE + Processor::current_id() * PAGE_SIZE); + size_t pte_index = (vaddr.get() - KERNEL_PT1024_BASE) / PAGE_SIZE; + + auto& pte = boot_pd_kernel_pt1023[pte_index]; auto pd_paddr = directory.m_directory_pages[pdpt_index]->paddr(); if (pte.physical_page_base() != pd_paddr.get()) { pte.set_physical_page_base(pd_paddr.get()); pte.set_present(true); pte.set_writable(true); pte.set_user_allowed(false); - // Because we must continue to hold the MM lock while we use this - // mapping, it is sufficient to only flush on the current CPU. Other - // CPUs trying to use this API must wait on the MM lock anyway - flush_tlb_local(VirtualAddress(KERNEL_QUICKMAP_PD)); - } else { - // Even though we don't allow this to be called concurrently, it's - // possible that this PD was mapped on a different CPU and we don't - // broadcast the flush. If so, we still need to flush the TLB. - if (mm_data.m_last_quickmap_pd != pd_paddr) - flush_tlb_local(VirtualAddress(KERNEL_QUICKMAP_PD)); + flush_tlb_local(vaddr); } - mm_data.m_last_quickmap_pd = pd_paddr; - return (PageDirectoryEntry*)KERNEL_QUICKMAP_PD; + return (PageDirectoryEntry*)vaddr.get(); } PageTableEntry* MemoryManager::quickmap_pt(PhysicalAddress pt_paddr) { + VERIFY_INTERRUPTS_DISABLED(); VERIFY(s_mm_lock.is_locked_by_current_processor()); - auto& mm_data = get_data(); - auto& pte = ((PageTableEntry*)boot_pd_kernel_pt1023)[(KERNEL_QUICKMAP_PT - KERNEL_PT1024_BASE) / PAGE_SIZE]; + + VirtualAddress vaddr(KERNEL_QUICKMAP_PT_PER_CPU_BASE + Processor::current_id() * PAGE_SIZE); + size_t pte_index = (vaddr.get() - KERNEL_PT1024_BASE) / PAGE_SIZE; + + auto& pte = ((PageTableEntry*)boot_pd_kernel_pt1023)[pte_index]; if (pte.physical_page_base() != pt_paddr.get()) { pte.set_physical_page_base(pt_paddr.get()); pte.set_present(true); pte.set_writable(true); pte.set_user_allowed(false); - // Because we must continue to hold the MM lock while we use this - // mapping, it is sufficient to only flush on the current CPU. Other - // CPUs trying to use this API must wait on the MM lock anyway - flush_tlb_local(VirtualAddress(KERNEL_QUICKMAP_PT)); - } else { - // Even though we don't allow this to be called concurrently, it's - // possible that this PT was mapped on a different CPU and we don't - // broadcast the flush. If so, we still need to flush the TLB. - if (mm_data.m_last_quickmap_pt != pt_paddr) - flush_tlb_local(VirtualAddress(KERNEL_QUICKMAP_PT)); + flush_tlb_local(vaddr); } - mm_data.m_last_quickmap_pt = pt_paddr; - return (PageTableEntry*)KERNEL_QUICKMAP_PT; + return (PageTableEntry*)vaddr.get(); } u8* MemoryManager::quickmap_page(PhysicalAddress const& physical_address) diff --git a/Kernel/Memory/MemoryManager.h b/Kernel/Memory/MemoryManager.h index cdc5eec7884..84eedc75706 100644 --- a/Kernel/Memory/MemoryManager.h +++ b/Kernel/Memory/MemoryManager.h @@ -91,9 +91,6 @@ struct MemoryManagerData { Spinlock m_quickmap_in_use { LockRank::None }; u32 m_quickmap_prev_flags; - - PhysicalAddress m_last_quickmap_pd; - PhysicalAddress m_last_quickmap_pt; }; // NOLINTNEXTLINE(readability-redundant-declaration) FIXME: Why do we declare this here *and* in Thread.h? diff --git a/Kernel/Sections.h b/Kernel/Sections.h index 5ea5638689c..9142dc6ccc3 100644 --- a/Kernel/Sections.h +++ b/Kernel/Sections.h @@ -17,9 +17,11 @@ #define KERNEL_PD_END (kernel_mapping_base + KERNEL_PD_SIZE) #define KERNEL_PT1024_BASE (kernel_mapping_base + 0x3FE00000) -#define KERNEL_QUICKMAP_PT (KERNEL_PT1024_BASE + 0x6000) -#define KERNEL_QUICKMAP_PD (KERNEL_PT1024_BASE + 0x7000) -#define KERNEL_QUICKMAP_PER_CPU_BASE (KERNEL_PT1024_BASE + 0x8000) + +#define KERNEL_MAX_CPU_COUNT 64 +#define KERNEL_QUICKMAP_PT_PER_CPU_BASE (KERNEL_PT1024_BASE + (1 * KERNEL_MAX_CPU_COUNT * PAGE_SIZE)) +#define KERNEL_QUICKMAP_PD_PER_CPU_BASE (KERNEL_PT1024_BASE + (2 * KERNEL_MAX_CPU_COUNT * PAGE_SIZE)) +#define KERNEL_QUICKMAP_PER_CPU_BASE (KERNEL_PT1024_BASE + (3 * KERNEL_MAX_CPU_COUNT * PAGE_SIZE)) #define USER_RANGE_BASE 0x10000 #define USER_RANGE_CEILING (kernel_mapping_base - 0x2000000)