From f1b5def8fd48cf09704a6d4151f9002b80354430 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Mon, 8 Feb 2021 15:45:40 +0100 Subject: [PATCH] Kernel: Factor address space management out of the Process class This patch adds Space, a class representing a process's address space. - Each Process has a Space. - The Space owns the PageDirectory and all Regions in the Process. This allows us to reorganize sys$execve() so that it constructs and populates a new Space fully before committing to it. Previously, we would construct the new address space while still running in the old one, and encountering an error meant we had to do tedious and error-prone rollback. Those problems are now gone, replaced by what's hopefully a set of much smaller problems and missing cleanups. :^) --- Kernel/CMakeLists.txt | 1 + Kernel/CoreDump.cpp | 12 +- Kernel/Devices/BXVGADevice.cpp | 2 +- Kernel/Devices/MBVGADevice.cpp | 2 +- Kernel/Devices/MemoryDevice.cpp | 2 +- Kernel/FileSystem/AnonymousFile.cpp | 2 +- Kernel/FileSystem/InodeFile.cpp | 2 +- Kernel/FileSystem/ProcFS.cpp | 4 +- Kernel/Forward.h | 1 + Kernel/PerformanceEventBuffer.cpp | 4 +- Kernel/Process.cpp | 171 ++------------------- Kernel/Process.h | 66 +------- Kernel/Syscall.cpp | 4 +- Kernel/Syscalls/execve.cpp | 141 +++++++---------- Kernel/Syscalls/fork.cpp | 13 +- Kernel/Syscalls/futex.cpp | 4 +- Kernel/Syscalls/get_stack_bounds.cpp | 2 +- Kernel/Syscalls/mmap.cpp | 74 ++++----- Kernel/Syscalls/ptrace.cpp | 2 +- Kernel/Syscalls/thread.cpp | 2 +- Kernel/Thread.cpp | 6 +- Kernel/VM/MemoryManager.cpp | 29 ++-- Kernel/VM/MemoryManager.h | 9 +- Kernel/VM/PageDirectory.cpp | 8 +- Kernel/VM/PageDirectory.h | 21 ++- Kernel/VM/Space.cpp | 222 +++++++++++++++++++++++++++ Kernel/VM/Space.h | 92 +++++++++++ 27 files changed, 494 insertions(+), 404 deletions(-) create mode 100644 Kernel/VM/Space.cpp create mode 100644 Kernel/VM/Space.h diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index 23367237f6e..a5e87626859 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -212,6 +212,7 @@ set(KERNEL_SOURCES VM/RangeAllocator.cpp VM/Region.cpp VM/SharedInodeVMObject.cpp + VM/Space.cpp VM/VMObject.cpp WaitQueue.cpp init.cpp diff --git a/Kernel/CoreDump.cpp b/Kernel/CoreDump.cpp index 2f3007e8637..4c947f197fe 100644 --- a/Kernel/CoreDump.cpp +++ b/Kernel/CoreDump.cpp @@ -59,7 +59,7 @@ OwnPtr CoreDump::create(NonnullRefPtr process, const String& CoreDump::CoreDump(NonnullRefPtr process, NonnullRefPtr&& fd) : m_process(move(process)) , m_fd(move(fd)) - , m_num_program_headers(m_process->m_regions.size() + 1) // +1 for NOTE segment + , m_num_program_headers(m_process->space().region_count() + 1) // +1 for NOTE segment { } @@ -137,7 +137,7 @@ KResult CoreDump::write_elf_header() KResult CoreDump::write_program_headers(size_t notes_size) { size_t offset = sizeof(Elf32_Ehdr) + m_num_program_headers * sizeof(Elf32_Phdr); - for (auto& region : m_process->m_regions) { + for (auto& region : m_process->space().regions()) { Elf32_Phdr phdr {}; phdr.p_type = PT_LOAD; @@ -178,7 +178,7 @@ KResult CoreDump::write_program_headers(size_t notes_size) KResult CoreDump::write_regions() { - for (auto& region : m_process->m_regions) { + for (auto& region : m_process->space().regions()) { if (region.is_kernel()) continue; @@ -258,13 +258,13 @@ ByteBuffer CoreDump::create_notes_threads_data() const ByteBuffer CoreDump::create_notes_regions_data() const { ByteBuffer regions_data; - for (size_t region_index = 0; region_index < m_process->m_regions.size(); ++region_index) { + for (size_t region_index = 0; region_index < m_process->space().region_count(); ++region_index) { ByteBuffer memory_region_info_buffer; ELF::Core::MemoryRegionInfo info {}; info.header.type = ELF::Core::NotesEntryHeader::Type::MemoryRegionInfo; - auto& region = m_process->m_regions[region_index]; + auto& region = m_process->space().regions()[region_index]; info.region_start = reinterpret_cast(region.vaddr().as_ptr()); info.region_end = reinterpret_cast(region.vaddr().as_ptr() + region.size()); info.program_header_index = region_index; @@ -316,7 +316,7 @@ ByteBuffer CoreDump::create_notes_segment_data() const KResult CoreDump::write() { - ScopedSpinLock lock(m_process->get_lock()); + ScopedSpinLock lock(m_process->space().get_lock()); ProcessPagingScope scope(m_process); ByteBuffer notes_segment = create_notes_segment_data(); diff --git a/Kernel/Devices/BXVGADevice.cpp b/Kernel/Devices/BXVGADevice.cpp index 349cbe10799..307f66d2d60 100644 --- a/Kernel/Devices/BXVGADevice.cpp +++ b/Kernel/Devices/BXVGADevice.cpp @@ -185,7 +185,7 @@ KResultOr BXVGADevice::mmap(Process& process, FileDescription&, const R auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/Devices/MBVGADevice.cpp b/Kernel/Devices/MBVGADevice.cpp index 55e7d176e8d..999108c5541 100644 --- a/Kernel/Devices/MBVGADevice.cpp +++ b/Kernel/Devices/MBVGADevice.cpp @@ -64,7 +64,7 @@ KResultOr MBVGADevice::mmap(Process& process, FileDescription&, const R auto vmobject = AnonymousVMObject::create_for_physical_range(m_framebuffer_address, framebuffer_size_in_bytes()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/Devices/MemoryDevice.cpp b/Kernel/Devices/MemoryDevice.cpp index acbe53ae90d..8763271ebc3 100644 --- a/Kernel/Devices/MemoryDevice.cpp +++ b/Kernel/Devices/MemoryDevice.cpp @@ -66,7 +66,7 @@ KResultOr MemoryDevice::mmap(Process& process, FileDescription&, const if (!vmobject) return ENOMEM; dbgln("MemoryDevice: Mapped physical memory at {} for range of {} bytes", viewed_address, range.size()); - return process.allocate_region_with_vmobject( + return process.space().allocate_region_with_vmobject( range, vmobject.release_nonnull(), 0, diff --git a/Kernel/FileSystem/AnonymousFile.cpp b/Kernel/FileSystem/AnonymousFile.cpp index aede4e94745..82a5669f848 100644 --- a/Kernel/FileSystem/AnonymousFile.cpp +++ b/Kernel/FileSystem/AnonymousFile.cpp @@ -47,7 +47,7 @@ KResultOr AnonymousFile::mmap(Process& process, FileDescription&, const if (range.size() != m_vmobject->size()) return EINVAL; - return process.allocate_region_with_vmobject(range, m_vmobject, offset, {}, prot, shared); + return process.space().allocate_region_with_vmobject(range, m_vmobject, offset, {}, prot, shared); } } diff --git a/Kernel/FileSystem/InodeFile.cpp b/Kernel/FileSystem/InodeFile.cpp index fe529440064..755e9abbeac 100644 --- a/Kernel/FileSystem/InodeFile.cpp +++ b/Kernel/FileSystem/InodeFile.cpp @@ -117,7 +117,7 @@ KResultOr InodeFile::mmap(Process& process, FileDescription& descriptio vmobject = PrivateInodeVMObject::create_with_inode(inode()); if (!vmobject) return ENOMEM; - return process.allocate_region_with_vmobject(range, vmobject.release_nonnull(), offset, description.absolute_path(), prot, shared); + return process.space().allocate_region_with_vmobject(range, vmobject.release_nonnull(), offset, description.absolute_path(), prot, shared); } String InodeFile::absolute_path(const FileDescription& description) const diff --git a/Kernel/FileSystem/ProcFS.cpp b/Kernel/FileSystem/ProcFS.cpp index bb83131cf1c..8551c6feceb 100644 --- a/Kernel/FileSystem/ProcFS.cpp +++ b/Kernel/FileSystem/ProcFS.cpp @@ -317,8 +317,8 @@ static bool procfs$pid_vm(InodeIdentifier identifier, KBufferBuilder& builder) return false; JsonArraySerializer array { builder }; { - ScopedSpinLock lock(process->get_lock()); - for (auto& region : process->regions()) { + ScopedSpinLock lock(process->space().get_lock()); + for (auto& region : process->space().regions()) { if (!region.is_user_accessible() && !Process::current()->is_superuser()) continue; auto region_object = array.add_object(); diff --git a/Kernel/Forward.h b/Kernel/Forward.h index d2e1c26c547..e5a3de0c27b 100644 --- a/Kernel/Forward.h +++ b/Kernel/Forward.h @@ -62,6 +62,7 @@ class Region; class Scheduler; class SchedulerPerProcessorData; class Socket; +class Space; template class SpinLock; class RecursiveSpinLock; diff --git a/Kernel/PerformanceEventBuffer.cpp b/Kernel/PerformanceEventBuffer.cpp index 0aaa07c3fd5..c0e6d2e06c1 100644 --- a/Kernel/PerformanceEventBuffer.cpp +++ b/Kernel/PerformanceEventBuffer.cpp @@ -121,7 +121,7 @@ bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, con { auto process = Process::from_pid(pid); ASSERT(process); - ScopedSpinLock locker(process->get_lock()); + ScopedSpinLock locker(process->space().get_lock()); JsonObjectSerializer object(builder); object.add("pid", pid.value()); @@ -129,7 +129,7 @@ bool PerformanceEventBuffer::to_json(KBufferBuilder& builder, ProcessID pid, con { auto region_array = object.add_array("regions"); - for (const auto& region : process->regions()) { + for (const auto& region : process->space().regions()) { auto region_object = region_array.add_object(); region_object.add("base", region.vaddr().get()); region_object.add("size", region.size()); diff --git a/Kernel/Process.cpp b/Kernel/Process.cpp index 462ae5bda92..7a397114186 100644 --- a/Kernel/Process.cpp +++ b/Kernel/Process.cpp @@ -116,110 +116,6 @@ bool Process::in_group(gid_t gid) const return m_gid == gid || m_extra_gids.contains_slow(gid); } -Optional Process::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment) -{ - vaddr.mask(PAGE_MASK); - size = PAGE_ROUND_UP(size); - if (vaddr.is_null()) - return page_directory().range_allocator().allocate_anywhere(size, alignment); - return page_directory().range_allocator().allocate_specific(vaddr, size); -} - -Region& Process::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject) -{ - auto& region = add_region( - Region::create_user_accessible(this, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared())); - region.set_syscall_region(source_region.is_syscall_region()); - region.set_mmap(source_region.is_mmap()); - region.set_stack(source_region.is_stack()); - size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE; - for (size_t i = 0; i < region.page_count(); ++i) { - if (source_region.should_cow(page_offset_in_source_region + i)) - region.set_should_cow(i, true); - } - return region; -} - -KResultOr Process::allocate_region(const Range& range, const String& name, int prot, AllocationStrategy strategy) -{ - ASSERT(range.is_valid()); - auto vmobject = AnonymousVMObject::create_with_size(range.size(), strategy); - if (!vmobject) - return ENOMEM; - auto region = Region::create_user_accessible(this, range, vmobject.release_nonnull(), 0, name, prot_to_region_access_flags(prot), true, false); - if (!region->map(page_directory())) - return ENOMEM; - return &add_region(move(region)); -} - -KResultOr Process::allocate_region_with_vmobject(const Range& range, NonnullRefPtr vmobject, size_t offset_in_vmobject, const String& name, int prot, bool shared) -{ - ASSERT(range.is_valid()); - size_t end_in_vmobject = offset_in_vmobject + range.size(); - if (end_in_vmobject <= offset_in_vmobject) { - dbgln("allocate_region_with_vmobject: Overflow (offset + size)"); - return EINVAL; - } - if (offset_in_vmobject >= vmobject->size()) { - dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject."); - return EINVAL; - } - if (end_in_vmobject > vmobject->size()) { - dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject."); - return EINVAL; - } - offset_in_vmobject &= PAGE_MASK; - auto& region = add_region(Region::create_user_accessible(this, range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot), true, shared)); - if (!region.map(page_directory())) { - // FIXME: What is an appropriate error code here, really? - return ENOMEM; - } - return ®ion; -} - -bool Process::deallocate_region(Region& region) -{ - OwnPtr region_protector; - ScopedSpinLock lock(m_lock); - - if (m_region_lookup_cache.region.unsafe_ptr() == ®ion) - m_region_lookup_cache.region = nullptr; - for (size_t i = 0; i < m_regions.size(); ++i) { - if (&m_regions[i] == ®ion) { - region_protector = m_regions.unstable_take(i); - return true; - } - } - return false; -} - -Region* Process::find_region_from_range(const Range& range) -{ - ScopedSpinLock lock(m_lock); - if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region) - return m_region_lookup_cache.region.unsafe_ptr(); - - size_t size = PAGE_ROUND_UP(range.size()); - for (auto& region : m_regions) { - if (region.vaddr() == range.base() && region.size() == size) { - m_region_lookup_cache.range = range; - m_region_lookup_cache.region = region; - return ®ion; - } - } - return nullptr; -} - -Region* Process::find_region_containing(const Range& range) -{ - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { - if (region.contains(range)) - return ®ion; - } - return nullptr; -} - void Process::kill_threads_except_self() { InterruptDisabler disabler; @@ -339,7 +235,7 @@ Process::Process(RefPtr& first_thread, const String& name, uid_t uid, gi { dbgln_if(PROCESS_DEBUG, "Created new process {}({})", m_name, m_pid.value()); - m_page_directory = PageDirectory::create_for_userspace(*this, fork_parent ? &fork_parent->page_directory().range_allocator() : nullptr); + m_space = Space::create(*this, fork_parent ? &fork_parent->space() : nullptr); if (fork_parent) { // NOTE: fork() doesn't clone all threads; the thread that called fork() becomes the only thread in the new process. @@ -365,28 +261,6 @@ Process::~Process() } } -void Process::dump_regions() -{ - klog() << "Process regions:"; - klog() << "BEGIN END SIZE ACCESS NAME"; - - ScopedSpinLock lock(m_lock); - - Vector sorted_regions; - sorted_regions.ensure_capacity(m_regions.size()); - for (auto& region : m_regions) - sorted_regions.append(®ion); - quick_sort(sorted_regions, [](auto& a, auto& b) { - return a->vaddr() < b->vaddr(); - }); - - for (auto& sorted_region : sorted_regions) { - auto& region = *sorted_region; - klog() << String::format("%08x", region.vaddr().get()) << " -- " << String::format("%08x", region.vaddr().offset(region.size() - 1).get()) << " " << String::format("%08zx", region.size()) << " " << (region.is_readable() ? 'R' : ' ') << (region.is_writable() ? 'W' : ' ') << (region.is_executable() ? 'X' : ' ') << (region.is_shared() ? 'S' : ' ') << (region.is_stack() ? 'T' : ' ') << (region.vmobject().is_anonymous() ? 'A' : ' ') << " " << region.name().characters(); - } - MM.dump_kernel_regions(); -} - // Make sure the compiler doesn't "optimize away" this function: extern void signal_trampoline_dummy(); void signal_trampoline_dummy() @@ -457,7 +331,7 @@ void Process::crash(int signal, u32 eip, bool out_of_memory) } m_termination_signal = signal; set_dump_core(!out_of_memory); - dump_regions(); + space().dump_regions(); ASSERT(is_user_process()); die(); // We can not return from here, as there is nowhere @@ -643,10 +517,7 @@ void Process::finalize() unblock_waiters(Thread::WaitBlocker::UnblockFlags::Terminated); - { - ScopedSpinLock lock(m_lock); - m_regions.clear(); - } + m_space->remove_all_regions({}); ASSERT(ref_count() > 0); // WaitBlockCondition::finalize will be in charge of dropping the last @@ -689,8 +560,8 @@ size_t Process::amount_dirty_private() const // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping. // That's probably a situation that needs to be looked at in general. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (!region.is_shared()) amount += region.amount_dirty(); } @@ -701,8 +572,8 @@ size_t Process::amount_clean_inode() const { HashTable vmobjects; { - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_inode()) vmobjects.set(&static_cast(region.vmobject())); } @@ -716,8 +587,8 @@ size_t Process::amount_clean_inode() const size_t Process::amount_virtual() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.size(); } return amount; @@ -727,8 +598,8 @@ size_t Process::amount_resident() const { // FIXME: This will double count if multiple regions use the same physical page. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.amount_resident(); } return amount; @@ -741,8 +612,8 @@ size_t Process::amount_shared() const // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored // so that every Region contributes +1 ref to each of its PhysicalPages. size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { amount += region.amount_shared(); } return amount; @@ -751,8 +622,8 @@ size_t Process::amount_shared() const size_t Process::amount_purgeable_volatile() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_anonymous() && static_cast(region.vmobject()).is_any_volatile()) amount += region.amount_resident(); } @@ -762,8 +633,8 @@ size_t Process::amount_purgeable_volatile() const size_t Process::amount_purgeable_nonvolatile() const { size_t amount = 0; - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { if (region.vmobject().is_anonymous() && !static_cast(region.vmobject()).is_any_volatile()) amount += region.amount_resident(); } @@ -858,14 +729,6 @@ void Process::set_root_directory(const Custody& root) m_root_directory = root; } -Region& Process::add_region(NonnullOwnPtr region) -{ - auto* ptr = region.ptr(); - ScopedSpinLock lock(m_lock); - m_regions.append(move(region)); - return *ptr; -} - void Process::set_tty(TTY* tty) { m_tty = tty; diff --git a/Kernel/Process.h b/Kernel/Process.h index 9fdd863efde..427dbb660d2 100644 --- a/Kernel/Process.h +++ b/Kernel/Process.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, Andreas Kling + * Copyright (c) 2018-2021, Andreas Kling * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -48,6 +48,7 @@ #include #include #include +#include #include #include @@ -99,6 +100,8 @@ enum class VeilState { typedef HashMap> FutexQueues; +struct LoadResult; + class Process : public RefCounted , public InlineLinkedListNode @@ -164,9 +167,6 @@ public: bool is_kernel_process() const { return m_is_kernel_process; } bool is_user_process() const { return !m_is_kernel_process; } - PageDirectory& page_directory() { return *m_page_directory; } - const PageDirectory& page_directory() const { return *m_page_directory; } - static RefPtr from_pid(ProcessID); static SessionID get_sid_from_pgid(ProcessGroupID pgid); @@ -188,8 +188,6 @@ public: bool is_dumpable() const { return m_dumpable; } void set_dumpable(bool dumpable) { m_dumpable = dumpable; } - ThreadID exec_tid() const { return m_exec_tid; } - mode_t umask() const { return m_umask; } bool in_group(gid_t) const; @@ -209,8 +207,6 @@ public: void die(); void finalize(); - ALWAYS_INLINE SpinLock& get_lock() const { return m_lock; } - ThreadTracer* tracer() { return m_tracer.ptr(); } bool is_traced() const { return !!m_tracer; } void start_tracing_from(ProcessID tracer); @@ -373,14 +369,6 @@ public: const TTY* tty() const { return m_tty; } void set_tty(TTY*); - size_t region_count() const { return m_regions.size(); } - const NonnullOwnPtrVector& regions() const - { - ASSERT(m_lock.is_locked()); - return m_regions; - } - void dump_regions(); - u32 m_ticks_in_user { 0 }; u32 m_ticks_in_kernel { 0 }; @@ -410,25 +398,12 @@ public: int exec(String path, Vector arguments, Vector environment, int recusion_depth = 0); - struct LoadResult { - FlatPtr load_base { 0 }; - FlatPtr entry_eip { 0 }; - size_t size { 0 }; - FlatPtr program_headers { 0 }; - size_t num_program_headers { 0 }; - WeakPtr tls_region; - size_t tls_size { 0 }; - size_t tls_alignment { 0 }; - WeakPtr stack_region; - }; - enum class ShouldAllocateTls { No = 0, Yes, }; KResultOr load(NonnullRefPtr main_program_description, RefPtr interpreter_description, const Elf32_Ehdr& main_program_header); - KResultOr load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls); KResultOr get_interpreter_load_offset(const Elf32_Ehdr& main_program_header, FileDescription& main_program_description, FileDescription& interpreter_description); bool is_superuser() const @@ -436,13 +411,6 @@ public: return m_euid == 0; } - KResultOr allocate_region_with_vmobject(const Range&, NonnullRefPtr, size_t offset_in_vmobject, const String& name, int prot, bool shared); - KResultOr allocate_region(const Range&, const String& name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve); - bool deallocate_region(Region& region); - - Region& allocate_split_region(const Region& source_region, const Range&, size_t offset_in_vmobject); - Vector split_region_around_range(const Region& source_region, const Range&); - void terminate_due_to_signal(u8 signal); KResult send_signal(u8 signal, Process* sender); @@ -503,7 +471,8 @@ public: PerformanceEventBuffer* perf_events() { return m_perf_event_buffer; } - bool enforces_syscall_regions() const { return m_enforces_syscall_regions; } + Space& space() { return *m_space; } + const Space& space() const { return *m_space; } private: friend class MemoryManager; @@ -518,10 +487,6 @@ private: Process(RefPtr& first_thread, const String& name, uid_t, gid_t, ProcessID ppid, bool is_kernel_process, RefPtr cwd = nullptr, RefPtr executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr); static ProcessID allocate_pid(); - Optional allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE); - - Region& add_region(NonnullOwnPtr); - void kill_threads_except_self(); void kill_all_threads(); bool dump_core(); @@ -552,13 +517,13 @@ private: void clear_futex_queues_on_exec(); - RefPtr m_page_directory; - Process* m_prev { nullptr }; Process* m_next { nullptr }; String m_name; + OwnPtr m_space; + ProcessID m_pid { 0 }; SessionID m_sid { 0 }; RefPtr m_pg; @@ -570,8 +535,6 @@ private: uid_t m_suid { 0 }; gid_t m_sgid { 0 }; - ThreadID m_exec_tid { 0 }; - OwnPtr m_tracer; static const int m_max_open_file_descriptors { FD_SETSIZE }; @@ -617,16 +580,6 @@ private: RefPtr m_tty; - Region* find_region_from_range(const Range&); - Region* find_region_containing(const Range&); - - NonnullOwnPtrVector m_regions; - struct RegionLookupCache { - Optional range; - WeakPtr region; - }; - RegionLookupCache m_region_lookup_cache; - ProcessID m_ppid { 0 }; mode_t m_umask { 022 }; @@ -639,12 +592,9 @@ private: size_t m_master_tls_alignment { 0 }; Lock m_big_lock { "Process" }; - mutable SpinLock m_lock; RefPtr m_alarm_timer; - bool m_enforces_syscall_regions { false }; - bool m_has_promises { false }; u32 m_promises { 0 }; bool m_has_execpromises { false }; diff --git a/Kernel/Syscall.cpp b/Kernel/Syscall.cpp index 8eb79eca52f..62652008a5e 100644 --- a/Kernel/Syscall.cpp +++ b/Kernel/Syscall.cpp @@ -176,7 +176,7 @@ void syscall_handler(TrapFrame* trap) ASSERT_NOT_REACHED(); } - auto* calling_region = MM.find_region_from_vaddr(process, VirtualAddress(regs.eip)); + auto* calling_region = MM.find_region_from_vaddr(process.space(), VirtualAddress(regs.eip)); if (!calling_region) { dbgln("Syscall from {:p} which has no associated region", regs.eip); handle_crash(regs, "Syscall from unknown region", SIGSEGV); @@ -189,7 +189,7 @@ void syscall_handler(TrapFrame* trap) ASSERT_NOT_REACHED(); } - if (process.enforces_syscall_regions() && !calling_region->is_syscall_region()) { + if (process.space().enforces_syscall_regions() && !calling_region->is_syscall_region()) { dbgln("Syscall from non-syscall region"); handle_crash(regs, "Syscall from non-syscall region", SIGSEGV); ASSERT_NOT_REACHED(); diff --git a/Kernel/Syscalls/execve.cpp b/Kernel/Syscalls/execve.cpp index e102f569709..a06969f9055 100644 --- a/Kernel/Syscalls/execve.cpp +++ b/Kernel/Syscalls/execve.cpp @@ -47,6 +47,19 @@ namespace Kernel { +struct LoadResult { + OwnPtr space; + FlatPtr load_base { 0 }; + FlatPtr entry_eip { 0 }; + size_t size { 0 }; + FlatPtr program_headers { 0 }; + size_t num_program_headers { 0 }; + WeakPtr tls_region; + size_t tls_size { 0 }; + size_t tls_alignment { 0 }; + WeakPtr stack_region; +}; + static Vector generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd); static bool validate_stack_size(const Vector& arguments, const Vector& environment) @@ -142,7 +155,7 @@ static KResultOr make_userspace_stack_for_main_thread(Region& region, V return new_esp; } -KResultOr Process::load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls should_allocate_tls) +static KResultOr load_elf_object(NonnullOwnPtr new_space, FileDescription& object_description, FlatPtr load_offset, Process::ShouldAllocateTls should_allocate_tls) { auto& inode = *(object_description.inode()); auto vmobject = SharedInodeVMObject::create_with_inode(inode); @@ -172,10 +185,12 @@ KResultOr Process::load_elf_object(FileDescription& object_ String elf_name = object_description.absolute_path(); ASSERT(!Processor::current().in_critical()); + MemoryManager::enter_space(*new_space); + KResult ph_load_result = KSuccess; elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) { if (program_header.type() == PT_TLS) { - ASSERT(should_allocate_tls == ShouldAllocateTls::Yes); + ASSERT(should_allocate_tls == Process::ShouldAllocateTls::Yes); ASSERT(program_header.size_in_memory()); if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) { @@ -184,13 +199,13 @@ KResultOr Process::load_elf_object(FileDescription& object_ return IterationDecision::Break; } - auto range = allocate_range({}, program_header.size_in_memory()); + auto range = new_space->allocate_range({}, program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); + auto region_or_error = new_space->allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -225,12 +240,12 @@ KResultOr Process::load_elf_object(FileDescription& object_ if (program_header.is_writable()) prot |= PROT_WRITE; auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : ""); - auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); + auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve); + auto region_or_error = new_space->allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -262,12 +277,12 @@ KResultOr Process::load_elf_object(FileDescription& object_ prot |= PROT_WRITE; if (program_header.is_executable()) prot |= PROT_EXEC; - auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); + auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory()); if (!range.has_value()) { ph_load_result = ENOMEM; return IterationDecision::Break; } - auto region_or_error = allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true); + auto region_or_error = new_space->allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true); if (region_or_error.is_error()) { ph_load_result = region_or_error.error(); return IterationDecision::Break; @@ -287,19 +302,20 @@ KResultOr Process::load_elf_object(FileDescription& object_ return ENOEXEC; } - auto stack_range = allocate_range({}, Thread::default_userspace_stack_size); + auto stack_range = new_space->allocate_range({}, Thread::default_userspace_stack_size); if (!stack_range.has_value()) { dbgln("do_exec: Failed to allocate VM range for stack"); return ENOMEM; } - auto stack_region_or_error = allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); + auto stack_region_or_error = new_space->allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve); if (stack_region_or_error.is_error()) return stack_region_or_error.error(); auto& stack_region = *stack_region_or_error.value(); stack_region.set_stack(true); return LoadResult { + move(new_space), load_base_address, elf_image.entry().offset(load_offset).get(), executable_size, @@ -312,44 +328,20 @@ KResultOr Process::load_elf_object(FileDescription& object_ }; } -KResultOr Process::load(NonnullRefPtr main_program_description, RefPtr interpreter_description, const Elf32_Ehdr& main_program_header) +KResultOr Process::load(NonnullRefPtr main_program_description, RefPtr interpreter_description, const Elf32_Ehdr& main_program_header) { - RefPtr old_page_directory; - NonnullOwnPtrVector old_regions; + auto new_space = Space::create(*this, nullptr); + if (!new_space) + return ENOMEM; - { - auto page_directory = PageDirectory::create_for_userspace(*this); - if (!page_directory) - return ENOMEM; - - // Need to make sure we don't swap contexts in the middle - ScopedCritical critical; - old_page_directory = move(m_page_directory); - old_regions = move(m_regions); - m_page_directory = page_directory.release_nonnull(); - MM.enter_process_paging_scope(*this); - } - - ArmedScopeGuard rollback_regions_guard([&]() { - ASSERT(Process::current() == this); - // Need to make sure we don't swap contexts in the middle - ScopedCritical critical; - // Explicitly clear m_regions *before* restoring the page directory, - // otherwise we may silently corrupt memory! - m_regions.clear(); - // Now that we freed the regions, revert to the original page directory - // and restore the original regions - m_page_directory = move(old_page_directory); - MM.enter_process_paging_scope(*this); - m_regions = move(old_regions); + ScopeGuard space_guard([&]() { + MemoryManager::enter_process_paging_scope(*this); }); if (interpreter_description.is_null()) { - auto result = load_elf_object(main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes); + auto result = load_elf_object(new_space.release_nonnull(), main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes); if (result.is_error()) return result.error(); - - rollback_regions_guard.disarm(); return result; } @@ -358,7 +350,7 @@ KResultOr Process::load(NonnullRefPtr main return interpreter_load_offset.error(); } - auto interpreter_load_result = load_elf_object(*interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No); + auto interpreter_load_result = load_elf_object(new_space.release_nonnull(), *interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No); if (interpreter_load_result.is_error()) return interpreter_load_result.error(); @@ -368,7 +360,6 @@ KResultOr Process::load(NonnullRefPtr main ASSERT(!interpreter_load_result.value().tls_alignment); ASSERT(!interpreter_load_result.value().tls_size); - rollback_regions_guard.disarm(); return interpreter_load_result; } @@ -481,34 +472,22 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve if (parts.is_empty()) return -ENOENT; + auto main_program_metadata = main_program_description->metadata(); + + auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header); + if (load_result_or_error.is_error()) { + dbgln("do_exec({}): Failed to load main program or interpreter", path); + return load_result_or_error.error(); + } + + // We commit to the new executable at this point. There is no turning back! + // Disable profiling temporarily in case it's running on this process. TemporaryChange profiling_disabler(m_profiling, false); - // Mark this thread as the current thread that does exec - // No other thread from this process will be scheduled to run - auto current_thread = Thread::current(); - m_exec_tid = current_thread->tid(); - - // NOTE: We switch credentials before altering the memory layout of the process. - // This ensures that ptrace access control takes the right credentials into account. - - // FIXME: This still feels rickety. Perhaps it would be better to simply block ptrace - // clients until we're ready to be traced? Or reject them with EPERM? - - auto main_program_metadata = main_program_description->metadata(); - - auto old_euid = m_euid; - auto old_suid = m_suid; - auto old_egid = m_egid; - auto old_sgid = m_sgid; - - ArmedScopeGuard cred_restore_guard = [&] { - m_euid = old_euid; - m_suid = old_suid; - m_egid = old_egid; - m_sgid = old_sgid; - }; + kill_threads_except_self(); + auto& load_result = load_result_or_error.value(); bool executable_is_setid = false; if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) { @@ -522,17 +501,8 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve } } - auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header); - if (load_result_or_error.is_error()) { - dbgln("do_exec({}): Failed to load main program or interpreter", path); - return load_result_or_error.error(); - } - auto& load_result = load_result_or_error.value(); - - // We can commit to the new credentials at this point. - cred_restore_guard.disarm(); - - kill_threads_except_self(); + m_space = load_result.space.release_nonnull(); + MemoryManager::enter_space(*m_space); #if EXEC_DEBUG dbgln("Memory layout after ELF load:"); @@ -549,20 +519,17 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve m_execpromises = 0; m_has_execpromises = false; - m_enforces_syscall_regions = false; - m_veil_state = VeilState::None; m_unveiled_paths.clear(); m_coredump_metadata.clear(); + auto current_thread = Thread::current(); current_thread->set_default_signal_dispositions(); current_thread->clear_signals(); clear_futex_queues_on_exec(); - m_region_lookup_cache = {}; - set_dumpable(!executable_is_setid); for (size_t i = 0; i < m_fds.size(); ++i) { @@ -616,8 +583,10 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve // FIXME: PID/TID ISSUE m_pid = new_main_thread->tid().value(); auto tsr_result = new_main_thread->make_thread_specific_region({}); - if (tsr_result.is_error()) - return tsr_result.error(); + if (tsr_result.is_error()) { + // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable. + ASSERT_NOT_REACHED(); + } new_main_thread->reset_fpu_state(); auto& tss = new_main_thread->m_tss; @@ -629,7 +598,7 @@ int Process::do_exec(NonnullRefPtr main_program_description, Ve tss.gs = GDT_SELECTOR_TLS | 3; tss.eip = load_result.entry_eip; tss.esp = new_userspace_esp; - tss.cr3 = m_page_directory->cr3(); + tss.cr3 = space().page_directory().cr3(); tss.ss2 = m_pid.value(); // Throw away any recorded performance events in this process. @@ -870,8 +839,6 @@ int Process::exec(String path, Vector arguments, Vector environm u32 prev_flags = 0; int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header); - m_exec_tid = 0; - if (rc < 0) return rc; diff --git a/Kernel/Syscalls/fork.cpp b/Kernel/Syscalls/fork.cpp index 62287358c84..c5b4a35bc8d 100644 --- a/Kernel/Syscalls/fork.cpp +++ b/Kernel/Syscalls/fork.cpp @@ -47,15 +47,14 @@ pid_t Process::sys$fork(RegisterState& regs) child->m_has_execpromises = m_has_execpromises; child->m_veil_state = m_veil_state; child->m_unveiled_paths = m_unveiled_paths.deep_copy(); - child->m_enforces_syscall_regions = m_enforces_syscall_regions; child->m_fds = m_fds; child->m_sid = m_sid; child->m_pg = m_pg; child->m_umask = m_umask; + child->m_extra_gids = m_extra_gids; dbgln_if(FORK_DEBUG, "fork: child={}", child); - - child->m_extra_gids = m_extra_gids; + child->space().set_enforces_syscall_regions(space().enforces_syscall_regions()); auto& child_tss = child_first_thread->m_tss; child_tss.eax = 0; // fork() returns 0 in the child :^) @@ -80,8 +79,8 @@ pid_t Process::sys$fork(RegisterState& regs) #endif { - ScopedSpinLock lock(m_lock); - for (auto& region : m_regions) { + ScopedSpinLock lock(space().get_lock()); + for (auto& region : space().regions()) { dbgln_if(FORK_DEBUG, "fork: cloning Region({}) '{}' @ {}", ®ion, region.name(), region.vaddr()); auto region_clone = region.clone(*child); if (!region_clone) { @@ -90,8 +89,8 @@ pid_t Process::sys$fork(RegisterState& regs) return -ENOMEM; } - auto& child_region = child->add_region(region_clone.release_nonnull()); - child_region.map(child->page_directory()); + auto& child_region = child->space().add_region(region_clone.release_nonnull()); + child_region.map(child->space().page_directory()); if (®ion == m_master_tls_region.unsafe_ptr()) child->m_master_tls_region = child_region; diff --git a/Kernel/Syscalls/futex.cpp b/Kernel/Syscalls/futex.cpp index 27763ea965b..bee8bf9bef3 100644 --- a/Kernel/Syscalls/futex.cpp +++ b/Kernel/Syscalls/futex.cpp @@ -147,7 +147,7 @@ int Process::sys$futex(Userspace user_params) if (!is_private) { if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset), sizeof(u32))) return -EFAULT; - auto region = MM.find_region_from_vaddr(*Process::current(), VirtualAddress(user_address_or_offset)); + auto region = MM.find_region_from_vaddr(space(), VirtualAddress(user_address_or_offset)); if (!region) return -EFAULT; vmobject = region->vmobject(); @@ -159,7 +159,7 @@ int Process::sys$futex(Userspace user_params) case FUTEX_WAKE_OP: { if (!Kernel::is_user_range(VirtualAddress(user_address_or_offset2), sizeof(u32))) return -EFAULT; - auto region2 = MM.find_region_from_vaddr(*Process::current(), VirtualAddress(user_address_or_offset2)); + auto region2 = MM.find_region_from_vaddr(space(), VirtualAddress(user_address_or_offset2)); if (!region2) return -EFAULT; vmobject2 = region2->vmobject(); diff --git a/Kernel/Syscalls/get_stack_bounds.cpp b/Kernel/Syscalls/get_stack_bounds.cpp index a91a401946d..a1b115ec3cd 100644 --- a/Kernel/Syscalls/get_stack_bounds.cpp +++ b/Kernel/Syscalls/get_stack_bounds.cpp @@ -32,7 +32,7 @@ namespace Kernel { int Process::sys$get_stack_bounds(FlatPtr* user_stack_base, size_t* user_stack_size) { FlatPtr stack_pointer = Thread::current()->get_register_dump_from_stack().userspace_esp; - auto* stack_region = MM.find_region_from_vaddr(*this, VirtualAddress(stack_pointer)); + auto* stack_region = MM.find_region_from_vaddr(space(), VirtualAddress(stack_pointer)); if (!stack_region) { ASSERT_NOT_REACHED(); return -EINVAL; diff --git a/Kernel/Syscalls/mmap.cpp b/Kernel/Syscalls/mmap.cpp index eaf0f5c1b51..4a6bb966623 100644 --- a/Kernel/Syscalls/mmap.cpp +++ b/Kernel/Syscalls/mmap.cpp @@ -204,13 +204,13 @@ void* Process::sys$mmap(Userspace user_params) Optional range; if (map_randomized) { - range = page_directory().range_allocator().allocate_randomized(PAGE_ROUND_UP(size), alignment); + range = space().page_directory().range_allocator().allocate_randomized(PAGE_ROUND_UP(size), alignment); } else { - range = allocate_range(VirtualAddress(addr), size, alignment); + range = space().allocate_range(VirtualAddress(addr), size, alignment); if (!range.has_value()) { if (addr && !map_fixed) { // If there's an address but MAP_FIXED wasn't specified, the address is just a hint. - range = allocate_range({}, size, alignment); + range = space().allocate_range({}, size, alignment); } } } @@ -220,7 +220,7 @@ void* Process::sys$mmap(Userspace user_params) if (map_anonymous) { auto strategy = map_noreserve ? AllocationStrategy::None : AllocationStrategy::Reserve; - auto region_or_error = allocate_region(range.value(), !name.is_null() ? name : "mmap", prot, strategy); + auto region_or_error = space().allocate_region(range.value(), !name.is_null() ? name : "mmap", prot, strategy); if (region_or_error.is_error()) return (void*)region_or_error.error().error(); region = region_or_error.value(); @@ -280,7 +280,7 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) Range range_to_mprotect = { VirtualAddress(addr), size }; - if (auto* whole_region = find_region_from_range(range_to_mprotect)) { + if (auto* whole_region = space().find_region_from_range(range_to_mprotect)) { if (!whole_region->is_mmap()) return -EPERM; if (!validate_mmap_prot(prot, whole_region->is_stack(), whole_region->vmobject().is_anonymous(), whole_region)) @@ -300,7 +300,7 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) } // Check if we can carve out the desired range from an existing region - if (auto* old_region = find_region_containing(range_to_mprotect)) { + if (auto* old_region = space().find_region_containing(range_to_mprotect)) { if (!old_region->is_mmap()) return -EPERM; if (!validate_mmap_prot(prot, old_region->is_stack(), old_region->vmobject().is_anonymous(), old_region)) @@ -314,23 +314,23 @@ int Process::sys$mprotect(void* addr, size_t size, int prot) // This vector is the region(s) adjacent to our range. // We need to allocate a new region for the range we wanted to change permission bits on. - auto adjacent_regions = split_region_around_range(*old_region, range_to_mprotect); + auto adjacent_regions = space().split_region_around_range(*old_region, range_to_mprotect); size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (range_to_mprotect.base().get() - old_region->range().base().get()); - auto& new_region = allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject); + auto& new_region = space().allocate_split_region(*old_region, range_to_mprotect, new_range_offset_in_vmobject); new_region.set_readable(prot & PROT_READ); new_region.set_writable(prot & PROT_WRITE); new_region.set_executable(prot & PROT_EXEC); // Unmap the old region here, specifying that we *don't* want the VM deallocated. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); // Map the new regions using our page directory (they were just allocated and don't have one). for (auto* adjacent_region : adjacent_regions) { - adjacent_region->map(page_directory()); + adjacent_region->map(space().page_directory()); } - new_region.map(page_directory()); + new_region.map(space().page_directory()); return 0; } @@ -349,7 +349,7 @@ int Process::sys$madvise(void* address, size_t size, int advice) if (!is_user_range(VirtualAddress(address), size)) return -EFAULT; - auto* region = find_region_from_range({ VirtualAddress(address), size }); + auto* region = space().find_region_from_range({ VirtualAddress(address), size }); if (!region) return -EINVAL; if (!region->is_mmap()) @@ -397,7 +397,7 @@ int Process::sys$set_mmap_name(Userspaceis_mmap()) @@ -406,24 +406,6 @@ int Process::sys$set_mmap_name(Userspace Process::split_region_around_range(const Region& source_region, const Range& desired_range) -{ - Range old_region_range = source_region.range(); - auto remaining_ranges_after_unmap = old_region_range.carve(desired_range); - - ASSERT(!remaining_ranges_after_unmap.is_empty()); - auto make_replacement_region = [&](const Range& new_range) -> Region& { - ASSERT(old_region_range.contains(new_range)); - size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get()); - return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject); - }; - Vector new_regions; - for (auto& new_range : remaining_ranges_after_unmap) { - new_regions.unchecked_append(&make_replacement_region(new_range)); - } - return new_regions; -} int Process::sys$munmap(void* addr, size_t size) { REQUIRE_PROMISE(stdio); @@ -435,30 +417,30 @@ int Process::sys$munmap(void* addr, size_t size) return -EFAULT; Range range_to_unmap { VirtualAddress(addr), size }; - if (auto* whole_region = find_region_from_range(range_to_unmap)) { + if (auto* whole_region = space().find_region_from_range(range_to_unmap)) { if (!whole_region->is_mmap()) return -EPERM; - bool success = deallocate_region(*whole_region); + bool success = space().deallocate_region(*whole_region); ASSERT(success); return 0; } - if (auto* old_region = find_region_containing(range_to_unmap)) { + if (auto* old_region = space().find_region_containing(range_to_unmap)) { if (!old_region->is_mmap()) return -EPERM; - auto new_regions = split_region_around_range(*old_region, range_to_unmap); + auto new_regions = space().split_region_around_range(*old_region, range_to_unmap); // We manually unmap the old region here, specifying that we *don't* want the VM deallocated. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); // Instead we give back the unwanted VM manually. - page_directory().range_allocator().deallocate(range_to_unmap); + space().page_directory().range_allocator().deallocate(range_to_unmap); // And finally we map the new region(s) using our page directory (they were just allocated and don't have one). for (auto* new_region : new_regions) { - new_region->map(page_directory()); + new_region->map(space().page_directory()); } return 0; } @@ -476,7 +458,7 @@ void* Process::sys$mremap(Userspace user_param if (!copy_from_user(¶ms, user_params)) return (void*)-EFAULT; - auto* old_region = find_region_from_range(Range { VirtualAddress(params.old_address), params.old_size }); + auto* old_region = space().find_region_from_range(Range { VirtualAddress(params.old_address), params.old_size }); if (!old_region) return (void*)-EINVAL; @@ -491,11 +473,11 @@ void* Process::sys$mremap(Userspace user_param // Unmap without deallocating the VM range since we're going to reuse it. old_region->unmap(Region::ShouldDeallocateVirtualMemoryRange::No); - deallocate_region(*old_region); + space().deallocate_region(*old_region); auto new_vmobject = PrivateInodeVMObject::create_with_inode(inode); - auto new_region_or_error = allocate_region_with_vmobject(range, new_vmobject, 0, old_name, old_prot, false); + auto new_region_or_error = space().allocate_region_with_vmobject(range, new_vmobject, 0, old_name, old_prot, false); if (new_region_or_error.is_error()) return (void*)new_region_or_error.error().error(); auto& new_region = *new_region_or_error.value(); @@ -527,11 +509,11 @@ void* Process::sys$allocate_tls(size_t size) }); ASSERT(main_thread); - auto range = allocate_range({}, size); + auto range = space().allocate_range({}, size); if (!range.has_value()) return (void*)-ENOMEM; - auto region_or_error = allocate_region(range.value(), String(), PROT_READ | PROT_WRITE); + auto region_or_error = space().allocate_region(range.value(), String(), PROT_READ | PROT_WRITE); if (region_or_error.is_error()) return (void*)region_or_error.error().error(); @@ -552,15 +534,15 @@ void* Process::sys$allocate_tls(size_t size) int Process::sys$msyscall(void* address) { - if (m_enforces_syscall_regions) + if (space().enforces_syscall_regions()) return -EPERM; if (!address) { - m_enforces_syscall_regions = true; + space().set_enforces_syscall_regions(true); return 0; } - auto* region = find_region_containing(Range { VirtualAddress { address }, 1 }); + auto* region = space().find_region_containing(Range { VirtualAddress { address }, 1 }); if (!region) return -EINVAL; diff --git a/Kernel/Syscalls/ptrace.cpp b/Kernel/Syscalls/ptrace.cpp index 26e91fcaae2..22cb45ec498 100644 --- a/Kernel/Syscalls/ptrace.cpp +++ b/Kernel/Syscalls/ptrace.cpp @@ -73,7 +73,7 @@ KResultOr Process::peek_user_data(Userspace address) KResult Process::poke_user_data(Userspace address, u32 data) { Range range = { VirtualAddress(address), sizeof(u32) }; - auto* region = find_region_containing(range); + auto* region = space().find_region_containing(range); if (!region) return EFAULT; ProcessPagingScope scope(*this); diff --git a/Kernel/Syscalls/thread.cpp b/Kernel/Syscalls/thread.cpp index d65591833ae..ff8d4b0ae32 100644 --- a/Kernel/Syscalls/thread.cpp +++ b/Kernel/Syscalls/thread.cpp @@ -80,7 +80,7 @@ int Process::sys$create_thread(void* (*entry)(void*), Userspacetss(); tss.eip = (FlatPtr)entry; tss.eflags = 0x0202; - tss.cr3 = page_directory().cr3(); + tss.cr3 = space().page_directory().cr3(); tss.esp = (u32)user_stack_address; auto tsr_result = thread->make_thread_specific_region({}); diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp index 3a0fba1b4e9..2dc0c7d924a 100644 --- a/Kernel/Thread.cpp +++ b/Kernel/Thread.cpp @@ -108,7 +108,7 @@ Thread::Thread(NonnullRefPtr process, NonnullOwnPtr kernel_stac m_tss.gs = GDT_SELECTOR_TLS | 3; } - m_tss.cr3 = m_process->page_directory().cr3(); + m_tss.cr3 = m_process->space().page_directory().cr3(); m_kernel_stack_base = m_kernel_stack_region->vaddr().get(); m_kernel_stack_top = m_kernel_stack_region->vaddr().offset(default_kernel_stack_size).get() & 0xfffffff8u; @@ -1015,11 +1015,11 @@ KResult Thread::make_thread_specific_region(Badge) if (!process().m_master_tls_region) return KSuccess; - auto range = process().allocate_range({}, thread_specific_region_size()); + auto range = process().space().allocate_range({}, thread_specific_region_size()); if (!range.has_value()) return ENOMEM; - auto region_or_error = process().allocate_region(range.value(), "Thread-specific", PROT_READ | PROT_WRITE); + auto region_or_error = process().space().allocate_region(range.value(), "Thread-specific", PROT_READ | PROT_WRITE); if (region_or_error.is_error()) return region_or_error.error(); diff --git a/Kernel/VM/MemoryManager.cpp b/Kernel/VM/MemoryManager.cpp index 3ee5f39671f..60c299b1704 100644 --- a/Kernel/VM/MemoryManager.cpp +++ b/Kernel/VM/MemoryManager.cpp @@ -401,29 +401,29 @@ Region* MemoryManager::kernel_region_from_vaddr(VirtualAddress vaddr) return nullptr; } -Region* MemoryManager::user_region_from_vaddr(Process& process, VirtualAddress vaddr) +Region* MemoryManager::user_region_from_vaddr(Space& space, VirtualAddress vaddr) { - ScopedSpinLock lock(s_mm_lock); // FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure! - for (auto& region : process.m_regions) { + ScopedSpinLock lock(space.get_lock()); + for (auto& region : space.regions()) { if (region.contains(vaddr)) return ®ion; } return nullptr; } -Region* MemoryManager::find_region_from_vaddr(Process& process, VirtualAddress vaddr) +Region* MemoryManager::find_region_from_vaddr(Space& space, VirtualAddress vaddr) { ScopedSpinLock lock(s_mm_lock); - if (auto* region = user_region_from_vaddr(process, vaddr)) + if (auto* region = user_region_from_vaddr(space, vaddr)) return region; return kernel_region_from_vaddr(vaddr); } -const Region* MemoryManager::find_region_from_vaddr(const Process& process, VirtualAddress vaddr) +const Region* MemoryManager::find_region_from_vaddr(const Space& space, VirtualAddress vaddr) { ScopedSpinLock lock(s_mm_lock); - if (auto* region = user_region_from_vaddr(const_cast(process), vaddr)) + if (auto* region = user_region_from_vaddr(const_cast(space), vaddr)) return region; return kernel_region_from_vaddr(vaddr); } @@ -436,8 +436,8 @@ Region* MemoryManager::find_region_from_vaddr(VirtualAddress vaddr) auto page_directory = PageDirectory::find_by_cr3(read_cr3()); if (!page_directory) return nullptr; - ASSERT(page_directory->process()); - return user_region_from_vaddr(*page_directory->process(), vaddr); + ASSERT(page_directory->space()); + return user_region_from_vaddr(*page_directory->space(), vaddr); } PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) @@ -734,13 +734,18 @@ RefPtr MemoryManager::allocate_supervisor_physical_page() } void MemoryManager::enter_process_paging_scope(Process& process) +{ + enter_space(process.space()); +} + +void MemoryManager::enter_space(Space& space) { auto current_thread = Thread::current(); ASSERT(current_thread != nullptr); ScopedSpinLock lock(s_mm_lock); - current_thread->tss().cr3 = process.page_directory().cr3(); - write_cr3(process.page_directory().cr3()); + current_thread->tss().cr3 = space.page_directory().cr3(); + write_cr3(space.page_directory().cr3()); } void MemoryManager::flush_tlb_local(VirtualAddress vaddr, size_t page_count) @@ -846,7 +851,7 @@ bool MemoryManager::validate_user_stack(const Process& process, VirtualAddress v if (!is_user_address(vaddr)) return false; ScopedSpinLock lock(s_mm_lock); - auto* region = user_region_from_vaddr(const_cast(process), vaddr); + auto* region = user_region_from_vaddr(const_cast(process).space(), vaddr); return region && region->is_user_accessible() && region->is_stack(); } diff --git a/Kernel/VM/MemoryManager.h b/Kernel/VM/MemoryManager.h index 13c3a79d788..ceee66796e8 100644 --- a/Kernel/VM/MemoryManager.h +++ b/Kernel/VM/MemoryManager.h @@ -143,7 +143,8 @@ public: PageFaultResponse handle_page_fault(const PageFault&); - void enter_process_paging_scope(Process&); + static void enter_process_paging_scope(Process&); + static void enter_space(Space&); bool validate_user_stack(const Process&, VirtualAddress) const; @@ -196,8 +197,8 @@ public: } } - static Region* find_region_from_vaddr(Process&, VirtualAddress); - static const Region* find_region_from_vaddr(const Process&, VirtualAddress); + static Region* find_region_from_vaddr(Space&, VirtualAddress); + static const Region* find_region_from_vaddr(const Space&, VirtualAddress); void dump_kernel_regions(); @@ -225,7 +226,7 @@ private: static void flush_tlb_local(VirtualAddress, size_t page_count = 1); static void flush_tlb(const PageDirectory*, VirtualAddress, size_t page_count = 1); - static Region* user_region_from_vaddr(Process&, VirtualAddress); + static Region* user_region_from_vaddr(Space&, VirtualAddress); static Region* kernel_region_from_vaddr(VirtualAddress); static Region* find_region_from_vaddr(VirtualAddress); diff --git a/Kernel/VM/PageDirectory.cpp b/Kernel/VM/PageDirectory.cpp index 939853810f0..3f389423533 100644 --- a/Kernel/VM/PageDirectory.cpp +++ b/Kernel/VM/PageDirectory.cpp @@ -73,7 +73,7 @@ PageDirectory::PageDirectory() m_directory_pages[3] = PhysicalPage::create(boot_pd3_paddr, true, false); } -PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_range_allocator) +PageDirectory::PageDirectory(const RangeAllocator* parent_range_allocator) { ScopedSpinLock lock(s_mm_lock); if (parent_range_allocator) { @@ -142,8 +142,8 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang auto* new_pd = MM.quickmap_pd(*this, 0); memcpy(new_pd, &buffer, sizeof(PageDirectoryEntry)); - // If we got here, we successfully created it. Set m_process now - m_process = &process; + // If we got here, we successfully created it. Set m_space now + m_valid = true; cr3_map().set(cr3(), this); } @@ -151,7 +151,7 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang PageDirectory::~PageDirectory() { ScopedSpinLock lock(s_mm_lock); - if (m_process) + if (m_space) cr3_map().remove(cr3()); } diff --git a/Kernel/VM/PageDirectory.h b/Kernel/VM/PageDirectory.h index f4bd7f41ff6..ba26e37f011 100644 --- a/Kernel/VM/PageDirectory.h +++ b/Kernel/VM/PageDirectory.h @@ -40,10 +40,10 @@ class PageDirectory : public RefCounted { friend class MemoryManager; public: - static RefPtr create_for_userspace(Process& process, const RangeAllocator* parent_range_allocator = nullptr) + static RefPtr create_for_userspace(const RangeAllocator* parent_range_allocator = nullptr) { - auto page_directory = adopt(*new PageDirectory(process, parent_range_allocator)); - if (!page_directory->process()) + auto page_directory = adopt(*new PageDirectory(parent_range_allocator)); + if (!page_directory->is_valid()) return {}; return page_directory; } @@ -55,24 +55,31 @@ public: u32 cr3() const { return m_directory_table->paddr().get(); } RangeAllocator& range_allocator() { return m_range_allocator; } + const RangeAllocator& range_allocator() const { return m_range_allocator; } + RangeAllocator& identity_range_allocator() { return m_identity_range_allocator; } - Process* process() { return m_process; } - const Process* process() const { return m_process; } + bool is_valid() const { return m_valid; } + + Space* space() { return m_space; } + const Space* space() const { return m_space; } + + void set_space(Badge, Space& space) { m_space = &space; } RecursiveSpinLock& get_lock() { return m_lock; } private: - PageDirectory(Process&, const RangeAllocator* parent_range_allocator); + explicit PageDirectory(const RangeAllocator* parent_range_allocator); PageDirectory(); - Process* m_process { nullptr }; + Space* m_space { nullptr }; RangeAllocator m_range_allocator; RangeAllocator m_identity_range_allocator; RefPtr m_directory_table; RefPtr m_directory_pages[4]; HashMap> m_page_tables; RecursiveSpinLock m_lock; + bool m_valid { false }; }; } diff --git a/Kernel/VM/Space.cpp b/Kernel/VM/Space.cpp new file mode 100644 index 00000000000..bbee739db10 --- /dev/null +++ b/Kernel/VM/Space.cpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2021, Andreas Kling + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +namespace Kernel { + +OwnPtr Space::create(Process& process, const Space* parent) +{ + auto page_directory = PageDirectory::create_for_userspace(parent ? &parent->page_directory().range_allocator() : nullptr); + if (!page_directory) + return {}; + auto space = adopt_own(*new Space(process, page_directory.release_nonnull())); + space->page_directory().set_space({}, *space); + return space; +} + +Space::Space(Process& process, NonnullRefPtr page_directory) + : m_process(&process) + , m_page_directory(move(page_directory)) +{ +} + +Space::~Space() +{ +} + +Optional Space::allocate_range(VirtualAddress vaddr, size_t size, size_t alignment) +{ + vaddr.mask(PAGE_MASK); + size = PAGE_ROUND_UP(size); + if (vaddr.is_null()) + return page_directory().range_allocator().allocate_anywhere(size, alignment); + return page_directory().range_allocator().allocate_specific(vaddr, size); +} + +Region& Space::allocate_split_region(const Region& source_region, const Range& range, size_t offset_in_vmobject) +{ + auto& region = add_region(Region::create_user_accessible( + m_process, range, source_region.vmobject(), offset_in_vmobject, source_region.name(), source_region.access(), source_region.is_cacheable(), source_region.is_shared())); + region.set_syscall_region(source_region.is_syscall_region()); + region.set_mmap(source_region.is_mmap()); + region.set_stack(source_region.is_stack()); + size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE; + for (size_t i = 0; i < region.page_count(); ++i) { + if (source_region.should_cow(page_offset_in_source_region + i)) + region.set_should_cow(i, true); + } + return region; +} + +KResultOr Space::allocate_region(const Range& range, const String& name, int prot, AllocationStrategy strategy) +{ + ASSERT(range.is_valid()); + auto vmobject = AnonymousVMObject::create_with_size(range.size(), strategy); + if (!vmobject) + return ENOMEM; + auto region = Region::create_user_accessible(m_process, range, vmobject.release_nonnull(), 0, name, prot_to_region_access_flags(prot), true, false); + if (!region->map(page_directory())) + return ENOMEM; + return &add_region(move(region)); +} + +KResultOr Space::allocate_region_with_vmobject(const Range& range, NonnullRefPtr vmobject, size_t offset_in_vmobject, const String& name, int prot, bool shared) +{ + ASSERT(range.is_valid()); + size_t end_in_vmobject = offset_in_vmobject + range.size(); + if (end_in_vmobject <= offset_in_vmobject) { + dbgln("allocate_region_with_vmobject: Overflow (offset + size)"); + return EINVAL; + } + if (offset_in_vmobject >= vmobject->size()) { + dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject."); + return EINVAL; + } + if (end_in_vmobject > vmobject->size()) { + dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject."); + return EINVAL; + } + offset_in_vmobject &= PAGE_MASK; + auto& region = add_region(Region::create_user_accessible(m_process, range, move(vmobject), offset_in_vmobject, name, prot_to_region_access_flags(prot), true, shared)); + if (!region.map(page_directory())) { + // FIXME: What is an appropriate error code here, really? + return ENOMEM; + } + return ®ion; +} + +bool Space::deallocate_region(Region& region) +{ + OwnPtr region_protector; + ScopedSpinLock lock(m_lock); + + if (m_region_lookup_cache.region.unsafe_ptr() == ®ion) + m_region_lookup_cache.region = nullptr; + for (size_t i = 0; i < m_regions.size(); ++i) { + if (&m_regions[i] == ®ion) { + region_protector = m_regions.unstable_take(i); + return true; + } + } + return false; +} + +Region* Space::find_region_from_range(const Range& range) +{ + ScopedSpinLock lock(m_lock); + if (m_region_lookup_cache.range.has_value() && m_region_lookup_cache.range.value() == range && m_region_lookup_cache.region) + return m_region_lookup_cache.region.unsafe_ptr(); + + size_t size = PAGE_ROUND_UP(range.size()); + for (auto& region : m_regions) { + if (region.vaddr() == range.base() && region.size() == size) { + m_region_lookup_cache.range = range; + m_region_lookup_cache.region = region; + return ®ion; + } + } + return nullptr; +} + +Region* Space::find_region_containing(const Range& range) +{ + ScopedSpinLock lock(m_lock); + for (auto& region : m_regions) { + if (region.contains(range)) + return ®ion; + } + return nullptr; +} + +Region& Space::add_region(NonnullOwnPtr region) +{ + auto* ptr = region.ptr(); + ScopedSpinLock lock(m_lock); + m_regions.append(move(region)); + return *ptr; +} + +// Carve out a virtual address range from a region and return the two regions on either side +Vector Space::split_region_around_range(const Region& source_region, const Range& desired_range) +{ + Range old_region_range = source_region.range(); + auto remaining_ranges_after_unmap = old_region_range.carve(desired_range); + + ASSERT(!remaining_ranges_after_unmap.is_empty()); + auto make_replacement_region = [&](const Range& new_range) -> Region& { + ASSERT(old_region_range.contains(new_range)); + size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get()); + return allocate_split_region(source_region, new_range, new_range_offset_in_vmobject); + }; + Vector new_regions; + for (auto& new_range : remaining_ranges_after_unmap) { + new_regions.unchecked_append(&make_replacement_region(new_range)); + } + return new_regions; +} + +void Space::dump_regions() +{ + klog() << "Process regions:"; + klog() << "BEGIN END SIZE ACCESS NAME"; + + ScopedSpinLock lock(m_lock); + + Vector sorted_regions; + sorted_regions.ensure_capacity(m_regions.size()); + for (auto& region : m_regions) + sorted_regions.append(®ion); + quick_sort(sorted_regions, [](auto& a, auto& b) { + return a->vaddr() < b->vaddr(); + }); + + for (auto& sorted_region : sorted_regions) { + auto& region = *sorted_region; + dmesgln("{:08x} -- {:08x} {:08x} {:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(), + region.is_readable() ? 'R' : ' ', + region.is_writable() ? 'W' : ' ', + region.is_executable() ? 'X' : ' ', + region.is_shared() ? 'S' : ' ', + region.is_stack() ? 'T' : ' ', + region.is_syscall_region() ? 'C' : ' ', + region.name()); + } + MM.dump_kernel_regions(); +} + +void Space::remove_all_regions(Badge) +{ + ScopedSpinLock lock(m_lock); + m_regions.clear(); +} + +} diff --git a/Kernel/VM/Space.h b/Kernel/VM/Space.h new file mode 100644 index 00000000000..4e4a76dc134 --- /dev/null +++ b/Kernel/VM/Space.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2018-2021, Andreas Kling + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace Kernel { + +class Space { +public: + static OwnPtr create(Process&, const Space* parent); + ~Space(); + + PageDirectory& page_directory() { return *m_page_directory; } + const PageDirectory& page_directory() const { return *m_page_directory; } + + Region& add_region(NonnullOwnPtr); + + size_t region_count() const { return m_regions.size(); } + + NonnullOwnPtrVector& regions() { return m_regions; } + const NonnullOwnPtrVector& regions() const { return m_regions; } + + void dump_regions(); + + Optional allocate_range(VirtualAddress, size_t, size_t alignment = PAGE_SIZE); + + KResultOr allocate_region_with_vmobject(const Range&, NonnullRefPtr, size_t offset_in_vmobject, const String& name, int prot, bool shared); + KResultOr allocate_region(const Range&, const String& name, int prot = PROT_READ | PROT_WRITE, AllocationStrategy strategy = AllocationStrategy::Reserve); + bool deallocate_region(Region& region); + + Region& allocate_split_region(const Region& source_region, const Range&, size_t offset_in_vmobject); + Vector split_region_around_range(const Region& source_region, const Range&); + + Region* find_region_from_range(const Range&); + Region* find_region_containing(const Range&); + + bool enforces_syscall_regions() const { return m_enforces_syscall_regions; } + void set_enforces_syscall_regions(bool b) { m_enforces_syscall_regions = b; } + + void remove_all_regions(Badge); + + SpinLock& get_lock() const { return m_lock; } + +private: + Space(Process&, NonnullRefPtr); + + Process* m_process { nullptr }; + mutable SpinLock m_lock; + + RefPtr m_page_directory; + + NonnullOwnPtrVector m_regions; + + struct RegionLookupCache { + Optional range; + WeakPtr region; + }; + RegionLookupCache m_region_lookup_cache; + + bool m_enforces_syscall_regions { false }; +}; + +}