From 23a7ccf607d2be901604534e1a79c3c8a3124097 Mon Sep 17 00:00:00 2001 From: Liav A Date: Thu, 15 Dec 2022 11:42:40 +0200 Subject: [PATCH] Kernel+LibCore+LibC: Split the mount syscall into multiple syscalls This is a preparation before we can create a usable mechanism to use filesystem-specific mount flags. To keep some compatibility with userland code, LibC and LibCore mount functions are kept being usable, but now instead of doing an "atomic" syscall, they do multiple syscalls to perform the complete procedure of mounting a filesystem. The FileBackedFileSystem IntrusiveList in the VFS code is now changed to be protected by a Mutex, because when we mount a new filesystem, we need to check if a filesystem is already created for a given source_fd so we do a scan for that OpenFileDescription in that list. If we fail to find an already-created filesystem we create a new one and register it in the list if we successfully mounted it. We use a Mutex because we might need to initiate disk access during the filesystem creation, which will take other mutexes in other parts of the kernel, therefore making it not possible to take a spinlock while doing this. --- Base/usr/share/man/man2/pledge.md | 1 + Kernel/API/FileSystem/MountSpecificFlags.h | 30 +++ Kernel/API/Ioctl.h | 2 + Kernel/API/Syscall.h | 23 +- Kernel/CMakeLists.txt | 1 + Kernel/Devices/Storage/StorageManagement.cpp | 4 +- Kernel/FileSystem/DevPtsFS/FileSystem.cpp | 2 +- Kernel/FileSystem/DevPtsFS/FileSystem.h | 2 +- Kernel/FileSystem/Ext2FS/FileSystem.cpp | 2 +- Kernel/FileSystem/Ext2FS/FileSystem.h | 2 +- Kernel/FileSystem/FATFS/FileSystem.cpp | 2 +- Kernel/FileSystem/FATFS/FileSystem.h | 2 +- Kernel/FileSystem/File.h | 1 + Kernel/FileSystem/ISO9660FS/FileSystem.cpp | 2 +- Kernel/FileSystem/ISO9660FS/FileSystem.h | 2 +- Kernel/FileSystem/Initializer.h | 32 +++ Kernel/FileSystem/MountFile.cpp | 122 ++++++++++ Kernel/FileSystem/MountFile.h | 43 ++++ Kernel/FileSystem/OpenFileDescription.cpp | 20 ++ Kernel/FileSystem/OpenFileDescription.h | 4 + Kernel/FileSystem/Plan9FS/FileSystem.cpp | 2 +- Kernel/FileSystem/Plan9FS/FileSystem.h | 2 +- Kernel/FileSystem/ProcFS/FileSystem.cpp | 2 +- Kernel/FileSystem/ProcFS/FileSystem.h | 2 +- Kernel/FileSystem/RAMFS/FileSystem.cpp | 2 +- Kernel/FileSystem/RAMFS/FileSystem.h | 2 +- Kernel/FileSystem/SysFS/FileSystem.cpp | 2 +- Kernel/FileSystem/SysFS/FileSystem.h | 2 +- Kernel/FileSystem/VirtualFileSystem.cpp | 218 ++++++++++++------ Kernel/FileSystem/VirtualFileSystem.h | 20 +- Kernel/Forward.h | 1 + Kernel/Syscalls/mount.cpp | 134 ++++------- Kernel/Tasks/Process.h | 18 +- .../DevTools/UserspaceEmulator/Emulator.h | 5 +- .../UserspaceEmulator/Emulator_syscalls.cpp | 52 ++++- Userland/Libraries/LibC/unistd.cpp | 56 ++++- Userland/Libraries/LibC/unistd.h | 3 + Userland/Libraries/LibCore/System.cpp | 41 +++- Userland/Libraries/LibCore/System.h | 2 + 39 files changed, 651 insertions(+), 214 deletions(-) create mode 100644 Kernel/API/FileSystem/MountSpecificFlags.h create mode 100644 Kernel/FileSystem/Initializer.h create mode 100644 Kernel/FileSystem/MountFile.cpp create mode 100644 Kernel/FileSystem/MountFile.h diff --git a/Base/usr/share/man/man2/pledge.md b/Base/usr/share/man/man2/pledge.md index 72374779b14..979b06c577a 100644 --- a/Base/usr/share/man/man2/pledge.md +++ b/Base/usr/share/man/man2/pledge.md @@ -54,6 +54,7 @@ If the process later attempts to use any system functionality it has previously * `ptrace`: The [`ptrace`(2)](help://man/2/ptrace) syscall (\*) * `prot_exec`: [`mmap`(2)](help://man/2/mmap) and [`mprotect`(2)](help://man/2/mprotect) with `PROT_EXEC` * `map_fixed`: [`mmap`(2)](help://man/2/mmap) with `MAP_FIXED` or `MAP_FIXED_NOREPLACE` (\*) +* `mount`: [`mount`(2)](help://man/2/mount) Various filesystem mount related syscalls (\*) * `no_error`: Ignore requests of pledge elevation going forwards, this is useful for enforcing _execpromises_ while the child process wants to ask for more upfront (Note that the elevation requests are _not_ granted, merely ignored), this is similar to the `error` pledge in OpenBSD. * `jail`: Various jail-specific syscalls (\*) diff --git a/Kernel/API/FileSystem/MountSpecificFlags.h b/Kernel/API/FileSystem/MountSpecificFlags.h new file mode 100644 index 00000000000..b08de48da67 --- /dev/null +++ b/Kernel/API/FileSystem/MountSpecificFlags.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2022-2023, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +#define MOUNT_SPECIFIC_FLAG_KEY_STRING_MAX_LENGTH 64 + +#define MOUNT_SPECIFIC_FLAG_NON_ASCII_STRING_TYPE_MAX_LENGTH 64 +#define MOUNT_SPECIFIC_FLAG_ASCII_STRING_TYPE_MAX_LENGTH 1024 + +struct MountSpecificFlag { + u32 key_string_length; + u32 value_length; + + enum class ValueType : u32 { + Boolean = 0, + UnsignedInteger, + SignedInteger, + ASCIIString, + }; + + ValueType value_type; + unsigned char const* key_string_addr; + void const* value_addr; +}; diff --git a/Kernel/API/Ioctl.h b/Kernel/API/Ioctl.h index 3f8b89e3221..0785a320967 100644 --- a/Kernel/API/Ioctl.h +++ b/Kernel/API/Ioctl.h @@ -108,6 +108,7 @@ enum IOCtlNumber { KEYBOARD_IOCTL_SET_NUM_LOCK, KEYBOARD_IOCTL_GET_CAPS_LOCK, KEYBOARD_IOCTL_SET_CAPS_LOCK, + MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG, SIOCATMARK, SIOCSIFADDR, SIOCGIFADDR, @@ -190,6 +191,7 @@ enum IOCtlNumber { #define FIBMAP FIBMAP #define FIONBIO FIONBIO #define FIONREAD FIONREAD +#define MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG #define SOUNDCARD_IOCTL_SET_SAMPLE_RATE SOUNDCARD_IOCTL_SET_SAMPLE_RATE #define SOUNDCARD_IOCTL_GET_SAMPLE_RATE SOUNDCARD_IOCTL_GET_SAMPLE_RATE #define STORAGE_DEVICE_GET_SIZE STORAGE_DEVICE_GET_SIZE diff --git a/Kernel/API/Syscall.h b/Kernel/API/Syscall.h index 565b5e1fe28..55df161b438 100644 --- a/Kernel/API/Syscall.h +++ b/Kernel/API/Syscall.h @@ -82,6 +82,8 @@ enum class NeedsBigProcessLock { S(fork, NeedsBigProcessLock::No) \ S(fstat, NeedsBigProcessLock::No) \ S(fstatvfs, NeedsBigProcessLock::No) \ + S(fsopen, NeedsBigProcessLock::No) \ + S(fsmount, NeedsBigProcessLock::No) \ S(fsync, NeedsBigProcessLock::No) \ S(ftruncate, NeedsBigProcessLock::No) \ S(futex, NeedsBigProcessLock::Yes) \ @@ -128,7 +130,6 @@ enum class NeedsBigProcessLock { S(mkdir, NeedsBigProcessLock::No) \ S(mknod, NeedsBigProcessLock::No) \ S(mmap, NeedsBigProcessLock::No) \ - S(mount, NeedsBigProcessLock::Yes) \ S(mprotect, NeedsBigProcessLock::No) \ S(mremap, NeedsBigProcessLock::No) \ S(msync, NeedsBigProcessLock::No) \ @@ -432,9 +433,19 @@ struct SC_rename_params { StringArgument new_path; }; -struct SC_mount_params { - StringArgument target; +struct SC_fsopen_params { StringArgument fs_type; + int flags; +}; + +struct SC_fsmount_params { + int mount_fd; + StringArgument target; + int source_fd; +}; + +struct SC_bindmount_params { + StringArgument target; int source_fd; int flags; }; @@ -444,12 +455,6 @@ struct SC_remount_params { int flags; }; -struct SC_bindmount_params { - StringArgument target; - int source_fd; - int flags; -}; - struct SC_pledge_params { StringArgument promises; StringArgument execpromises; diff --git a/Kernel/CMakeLists.txt b/Kernel/CMakeLists.txt index 7c508ab3340..ffe4fb1c191 100644 --- a/Kernel/CMakeLists.txt +++ b/Kernel/CMakeLists.txt @@ -138,6 +138,7 @@ set(KERNEL_SOURCES FileSystem/ISO9660FS/FileSystem.cpp FileSystem/ISO9660FS/Inode.cpp FileSystem/Mount.cpp + FileSystem/MountFile.cpp FileSystem/OpenFileDescription.cpp FileSystem/Plan9FS/FileSystem.cpp FileSystem/Plan9FS/Inode.cpp diff --git a/Kernel/Devices/Storage/StorageManagement.cpp b/Kernel/Devices/Storage/StorageManagement.cpp index 00d96e32d8e..b535137baef 100644 --- a/Kernel/Devices/Storage/StorageManagement.cpp +++ b/Kernel/Devices/Storage/StorageManagement.cpp @@ -456,7 +456,9 @@ NonnullRefPtr StorageManagement::root_filesystem() const auto description_or_error = OpenFileDescription::try_create(boot_device_description.release_nonnull()); VERIFY(!description_or_error.is_error()); - auto file_system = Ext2FS::try_create(description_or_error.release_value()).release_value(); + Array mount_specific_data; + mount_specific_data.fill(0); + auto file_system = Ext2FS::try_create(description_or_error.release_value(), mount_specific_data.span()).release_value(); if (auto result = file_system->initialize(); result.is_error()) { dump_storage_devices_and_partitions(); diff --git a/Kernel/FileSystem/DevPtsFS/FileSystem.cpp b/Kernel/FileSystem/DevPtsFS/FileSystem.cpp index 304f2ae7c3c..b33f969500f 100644 --- a/Kernel/FileSystem/DevPtsFS/FileSystem.cpp +++ b/Kernel/FileSystem/DevPtsFS/FileSystem.cpp @@ -13,7 +13,7 @@ namespace Kernel { -ErrorOr> DevPtsFS::try_create() +ErrorOr> DevPtsFS::try_create(ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) DevPtsFS)); } diff --git a/Kernel/FileSystem/DevPtsFS/FileSystem.h b/Kernel/FileSystem/DevPtsFS/FileSystem.h index 0ee1dcf801f..56dd51f7e5e 100644 --- a/Kernel/FileSystem/DevPtsFS/FileSystem.h +++ b/Kernel/FileSystem/DevPtsFS/FileSystem.h @@ -20,7 +20,7 @@ class DevPtsFS final : public FileSystem { public: virtual ~DevPtsFS() override; - static ErrorOr> try_create(); + static ErrorOr> try_create(ReadonlyBytes); virtual ErrorOr initialize() override; virtual StringView class_name() const override { return "DevPtsFS"sv; } diff --git a/Kernel/FileSystem/Ext2FS/FileSystem.cpp b/Kernel/FileSystem/Ext2FS/FileSystem.cpp index 388f44d2bcc..d0f3e75038a 100644 --- a/Kernel/FileSystem/Ext2FS/FileSystem.cpp +++ b/Kernel/FileSystem/Ext2FS/FileSystem.cpp @@ -13,7 +13,7 @@ namespace Kernel { -ErrorOr> Ext2FS::try_create(OpenFileDescription& file_description) +ErrorOr> Ext2FS::try_create(OpenFileDescription& file_description, ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) Ext2FS(file_description))); } diff --git a/Kernel/FileSystem/Ext2FS/FileSystem.h b/Kernel/FileSystem/Ext2FS/FileSystem.h index 08c75268a77..40096e17238 100644 --- a/Kernel/FileSystem/Ext2FS/FileSystem.h +++ b/Kernel/FileSystem/Ext2FS/FileSystem.h @@ -27,7 +27,7 @@ public: FileSize64bits = 1 << 1, }; - static ErrorOr> try_create(OpenFileDescription&); + static ErrorOr> try_create(OpenFileDescription&, ReadonlyBytes); virtual ~Ext2FS() override; diff --git a/Kernel/FileSystem/FATFS/FileSystem.cpp b/Kernel/FileSystem/FATFS/FileSystem.cpp index 728738866ff..99d3465dbe9 100644 --- a/Kernel/FileSystem/FATFS/FileSystem.cpp +++ b/Kernel/FileSystem/FATFS/FileSystem.cpp @@ -10,7 +10,7 @@ namespace Kernel { -ErrorOr> FATFS::try_create(OpenFileDescription& file_description) +ErrorOr> FATFS::try_create(OpenFileDescription& file_description, ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) FATFS(file_description))); } diff --git a/Kernel/FileSystem/FATFS/FileSystem.h b/Kernel/FileSystem/FATFS/FileSystem.h index 7b0949f5c0f..d02f2c96ca4 100644 --- a/Kernel/FileSystem/FATFS/FileSystem.h +++ b/Kernel/FileSystem/FATFS/FileSystem.h @@ -21,7 +21,7 @@ class FATFS final : public BlockBasedFileSystem { friend FATInode; public: - static ErrorOr> try_create(OpenFileDescription&); + static ErrorOr> try_create(OpenFileDescription&, ReadonlyBytes); virtual ~FATFS() override = default; virtual StringView class_name() const override { return "FATFS"sv; } diff --git a/Kernel/FileSystem/File.h b/Kernel/FileSystem/File.h index f1601e8f3f9..63e8c6d7a31 100644 --- a/Kernel/FileSystem/File.h +++ b/Kernel/FileSystem/File.h @@ -114,6 +114,7 @@ public: virtual bool is_character_device() const { return false; } virtual bool is_socket() const { return false; } virtual bool is_inode_watcher() const { return false; } + virtual bool is_mount_file() const { return false; } virtual bool is_regular_file() const { return false; } diff --git a/Kernel/FileSystem/ISO9660FS/FileSystem.cpp b/Kernel/FileSystem/ISO9660FS/FileSystem.cpp index c95d35f64fb..e7b17617f2a 100644 --- a/Kernel/FileSystem/ISO9660FS/FileSystem.cpp +++ b/Kernel/FileSystem/ISO9660FS/FileSystem.cpp @@ -16,7 +16,7 @@ constexpr u32 first_data_area_block = 16; constexpr u32 logical_sector_size = 2048; constexpr u32 max_cached_directory_entries = 128; -ErrorOr> ISO9660FS::try_create(OpenFileDescription& description) +ErrorOr> ISO9660FS::try_create(OpenFileDescription& description, ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) ISO9660FS(description))); } diff --git a/Kernel/FileSystem/ISO9660FS/FileSystem.h b/Kernel/FileSystem/ISO9660FS/FileSystem.h index ce99465ccb0..3b3842573b0 100644 --- a/Kernel/FileSystem/ISO9660FS/FileSystem.h +++ b/Kernel/FileSystem/ISO9660FS/FileSystem.h @@ -29,7 +29,7 @@ class ISO9660FS final : public BlockBasedFileSystem { friend ISO9660DirectoryIterator; public: - static ErrorOr> try_create(OpenFileDescription&); + static ErrorOr> try_create(OpenFileDescription&, ReadonlyBytes); virtual ~ISO9660FS() override; virtual StringView class_name() const override { return "ISO9660FS"sv; } diff --git a/Kernel/FileSystem/Initializer.h b/Kernel/FileSystem/Initializer.h new file mode 100644 index 00000000000..e1eb094be5c --- /dev/null +++ b/Kernel/FileSystem/Initializer.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2022, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace Kernel { + +struct FileSystemInitializer { + StringView short_name; + StringView name; + bool requires_open_file_description { false }; + bool requires_block_device { false }; + bool requires_seekable_file { false }; + ErrorOr> (*create_with_fd)(OpenFileDescription&, ReadonlyBytes) = nullptr; + ErrorOr> (*create)(ReadonlyBytes) = nullptr; + ErrorOr (*handle_mount_boolean_flag)(Span, StringView key, bool) = nullptr; + ErrorOr (*handle_mount_unsigned_integer_flag)(Span, StringView key, u64) = nullptr; + ErrorOr (*handle_mount_signed_integer_flag)(Span, StringView key, i64) = nullptr; + ErrorOr (*handle_mount_ascii_string_flag)(Span, StringView key, StringView value) = nullptr; +}; + +} diff --git a/Kernel/FileSystem/MountFile.cpp b/Kernel/FileSystem/MountFile.cpp new file mode 100644 index 00000000000..b2b6d81e6f4 --- /dev/null +++ b/Kernel/FileSystem/MountFile.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2022-2023, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Kernel { + +ErrorOr> MountFile::create(FileSystemInitializer const& file_system_initializer, int flags) +{ + // NOTE: We should not open a MountFile if someone wants to either remount or bindmount. + // There's a check for this in the fsopen syscall entry handler, but here we just assert + // to ensure this never happens. + VERIFY(!(flags & MS_BIND)); + VERIFY(!(flags & MS_REMOUNT)); + auto mount_specific_data_buffer = TRY(KBuffer::try_create_with_size("Mount Specific Data"sv, PAGE_SIZE, Memory::Region::Access::ReadWrite, AllocationStrategy::AllocateNow)); + return TRY(adopt_nonnull_lock_ref_or_enomem(new (nothrow) MountFile(file_system_initializer, flags, move(mount_specific_data_buffer)))); +} + +MountFile::MountFile(FileSystemInitializer const& file_system_initializer, int flags, NonnullOwnPtr mount_specific_data) + : m_flags(flags) + , m_file_system_initializer(file_system_initializer) +{ + m_mount_specific_data.with_exclusive([&](auto& our_mount_specific_data) { + our_mount_specific_data = move(mount_specific_data); + memset(our_mount_specific_data->data(), 0, our_mount_specific_data->size()); + }); +} + +MountFile::~MountFile() = default; + +ErrorOr MountFile::ioctl(OpenFileDescription&, unsigned request, Userspace arg) +{ + return m_mount_specific_data.with_exclusive([&](auto& our_mount_specific_data) -> ErrorOr { + switch (request) { + case MOUNT_IOCTL_SET_MOUNT_SPECIFIC_FLAG: { + auto user_mount_specific_data = static_ptr_cast(arg); + auto mount_specific_data = TRY(copy_typed_from_user(user_mount_specific_data)); + if ((mount_specific_data.value_type == MountSpecificFlag::ValueType::SignedInteger || mount_specific_data.value_type == MountSpecificFlag::ValueType::UnsignedInteger) && mount_specific_data.value_length != 8) + return EDOM; + if (mount_specific_data.key_string_length > MOUNT_SPECIFIC_FLAG_KEY_STRING_MAX_LENGTH) + return ENAMETOOLONG; + if (mount_specific_data.value_type != MountSpecificFlag::ValueType::Boolean && mount_specific_data.value_length == 0) + return EINVAL; + if (mount_specific_data.value_type != MountSpecificFlag::ValueType::Boolean && mount_specific_data.value_addr == nullptr) + return EFAULT; + + // NOTE: We put these limits in place because we assume that don't need to handle huge + // amounts of bytes when trying to handle a mount fs-specific flag. + // Anything larger than these constants (which could be changed if needed) is deemed to + // potentially cause OOM condition, and cannot represent any reasonable and "honest" data + // from userspace. + if (mount_specific_data.value_type != MountSpecificFlag::ValueType::ASCIIString && mount_specific_data.value_length > MOUNT_SPECIFIC_FLAG_NON_ASCII_STRING_TYPE_MAX_LENGTH) + return E2BIG; + if (mount_specific_data.value_type == MountSpecificFlag::ValueType::ASCIIString && mount_specific_data.value_length > MOUNT_SPECIFIC_FLAG_ASCII_STRING_TYPE_MAX_LENGTH) + return E2BIG; + + // NOTE: We enforce that the passed argument will be either i64 or u64, so it will always be + // exactly 8 bytes. We do that to simplify handling of integers as well as to ensure ABI correctness + // in all possible cases. + auto key_string = TRY(try_copy_kstring_from_user(reinterpret_cast(mount_specific_data.key_string_addr), static_cast(mount_specific_data.key_string_length))); + switch (mount_specific_data.value_type) { + // NOTE: This is actually considered as simply boolean flag. + case MountSpecificFlag::ValueType::Boolean: { + VERIFY(m_file_system_initializer.handle_mount_boolean_flag); + Userspace user_value_addr(reinterpret_cast(mount_specific_data.value_addr)); + auto value_integer = TRY(copy_typed_from_user(user_value_addr)); + if (value_integer != 0 && value_integer != 1) + return EDOM; + bool value = (value_integer == 1) ? true : false; + TRY(m_file_system_initializer.handle_mount_boolean_flag(our_mount_specific_data->bytes(), key_string->view(), value)); + return {}; + } + case MountSpecificFlag::ValueType::UnsignedInteger: { + VERIFY(m_file_system_initializer.handle_mount_unsigned_integer_flag); + Userspace user_value_addr(reinterpret_cast(mount_specific_data.value_addr)); + auto value_integer = TRY(copy_typed_from_user(user_value_addr)); + TRY(m_file_system_initializer.handle_mount_unsigned_integer_flag(our_mount_specific_data->bytes(), key_string->view(), value_integer)); + return {}; + } + case MountSpecificFlag::ValueType::SignedInteger: { + VERIFY(m_file_system_initializer.handle_mount_signed_integer_flag); + Userspace user_value_addr(reinterpret_cast(mount_specific_data.value_addr)); + auto value_integer = TRY(copy_typed_from_user(user_value_addr)); + TRY(m_file_system_initializer.handle_mount_signed_integer_flag(our_mount_specific_data->bytes(), key_string->view(), value_integer)); + return {}; + } + case MountSpecificFlag::ValueType::ASCIIString: { + VERIFY(m_file_system_initializer.handle_mount_ascii_string_flag); + auto value_string = TRY(try_copy_kstring_from_user(reinterpret_cast(mount_specific_data.value_addr), static_cast(mount_specific_data.value_length))); + TRY(m_file_system_initializer.handle_mount_ascii_string_flag(our_mount_specific_data->bytes(), key_string->view(), value_string->view())); + return {}; + } + default: + return EINVAL; + } + } + default: + return EINVAL; + } + }); +} + +ErrorOr> MountFile::pseudo_path(OpenFileDescription const&) const +{ + return KString::try_create(":mount-file:"sv); +} + +} diff --git a/Kernel/FileSystem/MountFile.h b/Kernel/FileSystem/MountFile.h new file mode 100644 index 00000000000..bc800b80bb2 --- /dev/null +++ b/Kernel/FileSystem/MountFile.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2022, Liav A. + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +namespace Kernel { + +class MountFile final : public File { +public: + static ErrorOr> create(FileSystemInitializer const&, int flags); + virtual ~MountFile() override; + + virtual bool can_read(OpenFileDescription const&, u64) const override { return true; } + virtual bool can_write(OpenFileDescription const&, u64) const override { return true; } + virtual ErrorOr read(OpenFileDescription&, u64, UserOrKernelBuffer&, size_t) override { return ENOTSUP; } + virtual ErrorOr write(OpenFileDescription&, u64, UserOrKernelBuffer const&, size_t) override { return ENOTSUP; } + virtual ErrorOr ioctl(OpenFileDescription&, unsigned request, Userspace arg) override; + virtual ErrorOr> pseudo_path(OpenFileDescription const&) const override; + virtual StringView class_name() const override { return "MountFile"sv; } + + int mount_flags() const { return m_flags; } + + MutexProtected>& mount_file_system_specific_data() { return m_mount_specific_data; } + FileSystemInitializer const& file_system_initializer() const { return m_file_system_initializer; } + +private: + virtual bool is_mount_file() const override { return true; } + + MountFile(FileSystemInitializer const&, int flags, NonnullOwnPtr); + + int const m_flags; + FileSystemInitializer const& m_file_system_initializer; + MutexProtected> m_mount_specific_data; +}; + +} diff --git a/Kernel/FileSystem/OpenFileDescription.cpp b/Kernel/FileSystem/OpenFileDescription.cpp index f69cfeb7bc9..169ba8e309c 100644 --- a/Kernel/FileSystem/OpenFileDescription.cpp +++ b/Kernel/FileSystem/OpenFileDescription.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -313,6 +314,25 @@ InodeWatcher* OpenFileDescription::inode_watcher() return static_cast(m_file.ptr()); } +bool OpenFileDescription::is_mount_file() const +{ + return m_file->is_mount_file(); +} + +MountFile const* OpenFileDescription::mount_file() const +{ + if (!is_mount_file()) + return nullptr; + return static_cast(m_file.ptr()); +} + +MountFile* OpenFileDescription::mount_file() +{ + if (!is_mount_file()) + return nullptr; + return static_cast(m_file.ptr()); +} + bool OpenFileDescription::is_master_pty() const { return m_file->is_master_pty(); diff --git a/Kernel/FileSystem/OpenFileDescription.h b/Kernel/FileSystem/OpenFileDescription.h index 1b02473c4d5..7f0f7fab996 100644 --- a/Kernel/FileSystem/OpenFileDescription.h +++ b/Kernel/FileSystem/OpenFileDescription.h @@ -80,6 +80,10 @@ public: InodeWatcher const* inode_watcher() const; InodeWatcher* inode_watcher(); + bool is_mount_file() const; + MountFile const* mount_file() const; + MountFile* mount_file(); + bool is_master_pty() const; MasterPTY const* master_pty() const; MasterPTY* master_pty(); diff --git a/Kernel/FileSystem/Plan9FS/FileSystem.cpp b/Kernel/FileSystem/Plan9FS/FileSystem.cpp index feadf6d8648..47be6a64ac0 100644 --- a/Kernel/FileSystem/Plan9FS/FileSystem.cpp +++ b/Kernel/FileSystem/Plan9FS/FileSystem.cpp @@ -10,7 +10,7 @@ namespace Kernel { -ErrorOr> Plan9FS::try_create(OpenFileDescription& file_description) +ErrorOr> Plan9FS::try_create(OpenFileDescription& file_description, ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) Plan9FS(file_description))); } diff --git a/Kernel/FileSystem/Plan9FS/FileSystem.h b/Kernel/FileSystem/Plan9FS/FileSystem.h index 3be74cd6983..f6bb5a8bb7b 100644 --- a/Kernel/FileSystem/Plan9FS/FileSystem.h +++ b/Kernel/FileSystem/Plan9FS/FileSystem.h @@ -22,7 +22,7 @@ class Plan9FS final : public FileBackedFileSystem { public: virtual ~Plan9FS() override; - static ErrorOr> try_create(OpenFileDescription&); + static ErrorOr> try_create(OpenFileDescription&, ReadonlyBytes); virtual bool supports_watchers() const override { return false; } diff --git a/Kernel/FileSystem/ProcFS/FileSystem.cpp b/Kernel/FileSystem/ProcFS/FileSystem.cpp index 59ecafe179b..d4cec65c1e2 100644 --- a/Kernel/FileSystem/ProcFS/FileSystem.cpp +++ b/Kernel/FileSystem/ProcFS/FileSystem.cpp @@ -11,7 +11,7 @@ namespace Kernel { -ErrorOr> ProcFS::try_create() +ErrorOr> ProcFS::try_create(ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) ProcFS)); } diff --git a/Kernel/FileSystem/ProcFS/FileSystem.h b/Kernel/FileSystem/ProcFS/FileSystem.h index 38354d07379..143c2835655 100644 --- a/Kernel/FileSystem/ProcFS/FileSystem.h +++ b/Kernel/FileSystem/ProcFS/FileSystem.h @@ -20,7 +20,7 @@ class ProcFS final : public FileSystem { public: virtual ~ProcFS() override; - static ErrorOr> try_create(); + static ErrorOr> try_create(ReadonlyBytes); virtual ErrorOr initialize() override; virtual StringView class_name() const override { return "ProcFS"sv; } diff --git a/Kernel/FileSystem/RAMFS/FileSystem.cpp b/Kernel/FileSystem/RAMFS/FileSystem.cpp index 53ae2eab944..a733da81b1b 100644 --- a/Kernel/FileSystem/RAMFS/FileSystem.cpp +++ b/Kernel/FileSystem/RAMFS/FileSystem.cpp @@ -10,7 +10,7 @@ namespace Kernel { -ErrorOr> RAMFS::try_create() +ErrorOr> RAMFS::try_create(ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) RAMFS)); } diff --git a/Kernel/FileSystem/RAMFS/FileSystem.h b/Kernel/FileSystem/RAMFS/FileSystem.h index a3ba20b1cff..0c141bd1316 100644 --- a/Kernel/FileSystem/RAMFS/FileSystem.h +++ b/Kernel/FileSystem/RAMFS/FileSystem.h @@ -18,7 +18,7 @@ class RAMFS final : public FileSystem { public: virtual ~RAMFS() override; - static ErrorOr> try_create(); + static ErrorOr> try_create(ReadonlyBytes); virtual ErrorOr initialize() override; virtual StringView class_name() const override { return "RAMFS"sv; } diff --git a/Kernel/FileSystem/SysFS/FileSystem.cpp b/Kernel/FileSystem/SysFS/FileSystem.cpp index 0e054bddc6a..de43a107121 100644 --- a/Kernel/FileSystem/SysFS/FileSystem.cpp +++ b/Kernel/FileSystem/SysFS/FileSystem.cpp @@ -11,7 +11,7 @@ namespace Kernel { -ErrorOr> SysFS::try_create() +ErrorOr> SysFS::try_create(ReadonlyBytes) { return TRY(adopt_nonnull_ref_or_enomem(new (nothrow) SysFS)); } diff --git a/Kernel/FileSystem/SysFS/FileSystem.h b/Kernel/FileSystem/SysFS/FileSystem.h index 793c1440521..fcaf6a8d6b8 100644 --- a/Kernel/FileSystem/SysFS/FileSystem.h +++ b/Kernel/FileSystem/SysFS/FileSystem.h @@ -20,7 +20,7 @@ class SysFS final : public FileSystem { public: virtual ~SysFS() override; - static ErrorOr> try_create(); + static ErrorOr> try_create(ReadonlyBytes); virtual ErrorOr initialize() override; virtual StringView class_name() const override { return "SysFS"sv; } diff --git a/Kernel/FileSystem/VirtualFileSystem.cpp b/Kernel/FileSystem/VirtualFileSystem.cpp index 7e8e72c1365..619bfe89e8a 100644 --- a/Kernel/FileSystem/VirtualFileSystem.cpp +++ b/Kernel/FileSystem/VirtualFileSystem.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2018-2020, Andreas Kling + * Copyright (c) 2022-2023, Liav A. * * SPDX-License-Identifier: BSD-2-Clause */ @@ -23,11 +24,60 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include + namespace Kernel { static Singleton s_the; static constexpr int root_mount_flags = 0; +static ErrorOr handle_mount_boolean_flag_as_invalid(Span, StringView, bool) +{ + return EINVAL; +} + +static ErrorOr handle_mount_unsigned_integer_flag_as_invalid(Span, StringView, u64) +{ + return EINVAL; +} + +static ErrorOr handle_mount_signed_integer_flag_as_invalid(Span, StringView, i64) +{ + return EINVAL; +} + +static ErrorOr handle_mount_ascii_string_flag_as_invalid(Span, StringView, StringView) +{ + return EINVAL; +} + +static constexpr FileSystemInitializer s_initializers[] = { + { "proc"sv, "ProcFS"sv, false, false, false, {}, ProcFS::try_create, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "devpts"sv, "DevPtsFS"sv, false, false, false, {}, DevPtsFS::try_create, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "sys"sv, "SysFS"sv, false, false, false, {}, SysFS::try_create, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "ram"sv, "RAMFS"sv, false, false, false, {}, RAMFS::try_create, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "ext2"sv, "Ext2FS"sv, true, true, true, Ext2FS::try_create, {}, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "9p"sv, "Plan9FS"sv, true, true, true, Plan9FS::try_create, {}, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "iso9660"sv, "ISO9660FS"sv, true, true, true, ISO9660FS::try_create, {}, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, + { "fat"sv, "FATFS"sv, true, true, true, FATFS::try_create, {}, handle_mount_boolean_flag_as_invalid, handle_mount_unsigned_integer_flag_as_invalid, handle_mount_signed_integer_flag_as_invalid, handle_mount_ascii_string_flag_as_invalid }, +}; + +ErrorOr VirtualFileSystem::find_filesystem_type_initializer(StringView fs_type) +{ + for (auto& initializer_entry : s_initializers) { + if (fs_type == initializer_entry.short_name || fs_type == initializer_entry.name) + return &initializer_entry; + } + return ENODEV; +} + UNMAP_AFTER_INIT void VirtualFileSystem::initialize() { s_the.ensure_instance(); @@ -59,14 +109,14 @@ bool VirtualFileSystem::mount_point_exists_at_inode(InodeIdentifier inode_identi }); } -ErrorOr VirtualFileSystem::mount(FileSystem& fs, Custody& mount_point, int flags) +ErrorOr VirtualFileSystem::add_file_system_to_mount_table(FileSystem& file_system, Custody& mount_point, int flags) { - auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(fs, &mount_point, flags))); + auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(file_system, &mount_point, flags))); return m_mounts.with([&](auto& mounts) -> ErrorOr { auto& inode = mount_point.inode(); - dbgln("VirtualFileSystem: FileSystemID {}, Mounting {} at inode {} with flags {}", - fs.fsid(), - fs.class_name(), + dbgln("VirtualFileSystem: FileSystemID {} (non file-backed), Mounting {} at inode {} with flags {}", + file_system.fsid(), + file_system.class_name(), inode.identifier(), flags); if (mount_point_exists_at_inode(inode.identifier())) { @@ -84,14 +134,8 @@ ErrorOr VirtualFileSystem::mount(FileSystem& fs, Custody& mount_point, int // the FileSystem once it is no longer mounted). if (mounted_count == 1) { m_file_systems_list.with([&](auto& fs_list) { - fs_list.append(fs); + fs_list.append(file_system); }); - if (fs.is_file_backed()) { - auto& file_backed_fs = static_cast(fs); - m_file_backed_file_systems_list.with([&](auto& fs_list) { - fs_list.append(file_backed_fs); - }); - } } }); @@ -102,6 +146,65 @@ ErrorOr VirtualFileSystem::mount(FileSystem& fs, Custody& mount_point, int }); } +ErrorOr VirtualFileSystem::mount(MountFile& mount_file, OpenFileDescription* source_description, Custody& mount_point, int flags) +{ + auto const& file_system_initializer = mount_file.file_system_initializer(); + if (!source_description) { + if (file_system_initializer.requires_open_file_description) + return ENOTSUP; + if (!file_system_initializer.create) + return ENOTSUP; + RefPtr fs; + TRY(mount_file.mount_file_system_specific_data().with_exclusive([&](auto& mount_specific_data) -> ErrorOr { + fs = TRY(file_system_initializer.create(mount_specific_data->bytes())); + return {}; + })); + VERIFY(fs); + TRY(fs->initialize()); + TRY(add_file_system_to_mount_table(*fs, mount_point, flags)); + return {}; + } + + // NOTE: Although it might be OK to support creating filesystems + // without providing an actual file descriptor to their create() method + // because the caller of this function actually supplied a valid file descriptor, + // this will only make things complicated in the future, so we should block + // this kind of behavior. + if (!file_system_initializer.requires_open_file_description) + return ENOTSUP; + + if (file_system_initializer.requires_block_device && !source_description->file().is_block_device()) + return ENOTBLK; + if (file_system_initializer.requires_seekable_file && !source_description->file().is_seekable()) { + dbgln("mount: this is not a seekable file"); + return ENODEV; + } + + // NOTE: If there's an associated file description with the filesystem, we could + // try to first find it from the VirtualFileSystem filesystem list and if it was not found, + // then create it and add it. + VERIFY(file_system_initializer.create_with_fd); + return m_file_backed_file_systems_list.with_exclusive([&](auto& list) -> ErrorOr { + RefPtr fs; + for (auto& node : list) { + if ((&node.file_description() == source_description) || (&node.file() == &source_description->file())) { + fs = node; + break; + } + } + if (!fs) { + TRY(mount_file.mount_file_system_specific_data().with_exclusive([&](auto& mount_specific_data) -> ErrorOr { + fs = TRY(file_system_initializer.create_with_fd(*source_description, mount_specific_data->bytes())); + return {}; + })); + TRY(fs->initialize()); + } + TRY(add_file_system_to_mount_table(*fs, mount_point, flags)); + list.append(static_cast(*fs)); + return {}; + }); +} + ErrorOr VirtualFileSystem::bind_mount(Custody& source, Custody& mount_point, int flags) { auto new_mount = TRY(adopt_nonnull_own_or_enomem(new (nothrow) Mount(source.inode(), mount_point, flags))); @@ -163,41 +266,42 @@ ErrorOr VirtualFileSystem::unmount(Custody& mountpoint_custody) auto custody_path = TRY(mountpoint_custody.try_serialize_absolute_path()); dbgln("VirtualFileSystem: unmount called with inode {} on mountpoint {}", guest_inode.identifier(), custody_path->view()); - return m_mounts.with([&](auto& mounts) -> ErrorOr { - for (auto& mount : mounts) { - if (&mount.guest() != &guest_inode) - continue; - auto mountpoint_path = TRY(mount.absolute_path()); - if (custody_path->view() != mountpoint_path->view()) - continue; - NonnullRefPtr fs = mount.guest_fs(); - TRY(fs->prepare_to_unmount()); - fs->mounted_count({}).with([&](auto& mounted_count) { - VERIFY(mounted_count > 0); - if (mounted_count == 1) { - dbgln("VirtualFileSystem: Unmounting file system {} for the last time...", fs->fsid()); - m_file_systems_list.with([&](auto& list) { - list.remove(*fs); - }); - if (fs->is_file_backed()) { - dbgln("VirtualFileSystem: Unmounting file backed file system {} for the last time...", fs->fsid()); - auto& file_backed_fs = static_cast(*fs); - m_file_backed_file_systems_list.with([&](auto& list) { - list.remove(file_backed_fs); + return m_file_backed_file_systems_list.with_exclusive([&](auto& file_backed_fs_list) -> ErrorOr { + TRY(m_mounts.with([&](auto& mounts) -> ErrorOr { + for (auto& mount : mounts) { + if (&mount.guest() != &guest_inode) + continue; + auto mountpoint_path = TRY(mount.absolute_path()); + if (custody_path->view() != mountpoint_path->view()) + continue; + NonnullRefPtr fs = mount.guest_fs(); + TRY(fs->prepare_to_unmount()); + fs->mounted_count({}).with([&](auto& mounted_count) { + VERIFY(mounted_count > 0); + if (mounted_count == 1) { + dbgln("VirtualFileSystem: Unmounting file system {} for the last time...", fs->fsid()); + m_file_systems_list.with([&](auto& list) { + list.remove(*fs); }); + if (fs->is_file_backed()) { + dbgln("VirtualFileSystem: Unmounting file backed file system {} for the last time...", fs->fsid()); + auto& file_backed_fs = static_cast(*fs); + file_backed_fs_list.remove(file_backed_fs); + } + } else { + mounted_count--; } - } else { - mounted_count--; - } - }); - dbgln("VirtualFileSystem: Unmounting file system {}...", fs->fsid()); - mount.m_vfs_list_node.remove(); - // Note: This is balanced by a `new` statement that is happening in various places before inserting the Mount object to the list. - delete &mount; - return {}; - } - dbgln("VirtualFileSystem: Nothing mounted on inode {}", guest_inode.identifier()); - return ENODEV; + }); + dbgln("VirtualFileSystem: Unmounting file system {}...", fs->fsid()); + mount.m_vfs_list_node.remove(); + // NOTE: This is balanced by a `new` statement that is happening in various places before inserting the Mount object to the list. + delete &mount; + return {}; + } + dbgln("VirtualFileSystem: Nothing mounted on inode {}", guest_inode.identifier()); + return ENODEV; + })); + return {}; }); } @@ -219,7 +323,7 @@ ErrorOr VirtualFileSystem::mount_root(FileSystem& fs) if (fs.is_file_backed()) { auto pseudo_path = TRY(static_cast(fs).file_description().pseudo_path()); dmesgln("VirtualFileSystem: mounted root({}) from {} ({})", fs.fsid(), fs.class_name(), pseudo_path); - m_file_backed_file_systems_list.with([&](auto& list) { + m_file_backed_file_systems_list.with_exclusive([&](auto& list) { list.append(static_cast(fs)); }); } else { @@ -339,28 +443,6 @@ ErrorOr VirtualFileSystem::lookup_metadata(Credentials const& cre return custody->inode().metadata(); } -ErrorOr> VirtualFileSystem::find_already_existing_or_create_file_backed_file_system(OpenFileDescription& description, Function>(OpenFileDescription&)> callback) -{ - return TRY(m_file_backed_file_systems_list.with([&](auto& list) -> ErrorOr> { - for (auto& node : list) { - if (&node.file_description() == &description) { - return node; - } - if (&node.file() == &description.file()) { - return node; - } - } - auto fs = TRY(callback(description)); - - // The created FileSystem is only added to the file_systems_lists - // when the FS has been successfully initialized and mounted - // (in VirtualFileSystem::mount()). This prevents file systems which - // fail to initialize or mount from existing in the list when the - // FileSystem is destroyed after failure. - return static_ptr_cast(fs); - })); -} - ErrorOr> VirtualFileSystem::open(Credentials const& credentials, StringView path, int options, mode_t mode, Custody& base, Optional owner) { return open(Process::current(), credentials, path, options, mode, base, owner); diff --git a/Kernel/FileSystem/VirtualFileSystem.h b/Kernel/FileSystem/VirtualFileSystem.h index f23d5088bf2..7491c8a82d0 100644 --- a/Kernel/FileSystem/VirtualFileSystem.h +++ b/Kernel/FileSystem/VirtualFileSystem.h @@ -14,11 +14,14 @@ #include #include #include +#include #include #include #include +#include #include #include +#include #include namespace Kernel { @@ -49,11 +52,13 @@ public: static void initialize(); static VirtualFileSystem& the(); + static ErrorOr find_filesystem_type_initializer(StringView fs_type); + VirtualFileSystem(); ~VirtualFileSystem(); ErrorOr mount_root(FileSystem&); - ErrorOr mount(FileSystem&, Custody& mount_point, int flags); + ErrorOr mount(MountFile&, OpenFileDescription*, Custody& mount_point, int flags); ErrorOr bind_mount(Custody& source, Custody& mount_point, int flags); ErrorOr remount(Custody& mount_point, int new_flags); ErrorOr unmount(Custody& mount_point); @@ -82,8 +87,6 @@ public: ErrorOr for_each_mount(Function(Mount const&)>) const; - ErrorOr> find_already_existing_or_create_file_backed_file_system(OpenFileDescription& description, Function>(OpenFileDescription&)> callback); - InodeIdentifier root_inode_id() const; void sync_filesystems(); @@ -105,6 +108,8 @@ private: ErrorOr validate_path_against_process_veil(Custody const& path, int options); ErrorOr validate_path_against_process_veil(StringView path, int options); + ErrorOr add_file_system_to_mount_table(FileSystem& file_system, Custody& mount_point, int flags); + bool is_vfs_root(InodeIdentifier) const; ErrorOr traverse_directory_inode(Inode&, Function(FileSystem::DirectoryEntryView const&)>); @@ -120,7 +125,14 @@ private: SpinlockProtected, LockRank::None> m_root_custody {}; SpinlockProtected, LockRank::None> m_mounts {}; - SpinlockProtected, LockRank::None> m_file_backed_file_systems_list {}; + + // NOTE: The FileBackedFileSystem list is protected by a mutex because we need to scan it + // to search for existing filesystems for already used block devices and therefore when doing + // that we could fail to find a filesystem so we need to create a new filesystem which might + // need to do disk access (i.e. taking Mutexes in other places) and then register that new filesystem + // in this list, to avoid TOCTOU bugs. + MutexProtected> m_file_backed_file_systems_list {}; + SpinlockProtected, LockRank::FileSystem> m_file_systems_list {}; }; diff --git a/Kernel/Forward.h b/Kernel/Forward.h index e12681f812f..8d3f7f6c9d6 100644 --- a/Kernel/Forward.h +++ b/Kernel/Forward.h @@ -31,6 +31,7 @@ class IPv4Socket; class Inode; class InodeIdentifier; class InodeWatcher; +class MountFile; class Jail; class KBuffer; class KString; diff --git a/Kernel/Syscalls/mount.cpp b/Kernel/Syscalls/mount.cpp index ae4fedc9799..80fbd323677 100644 --- a/Kernel/Syscalls/mount.cpp +++ b/Kernel/Syscalls/mount.cpp @@ -1,120 +1,68 @@ /* * Copyright (c) 2018-2020, Andreas Kling + * Copyright (c) 2022, Liav A. * * SPDX-License-Identifier: BSD-2-Clause */ #include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include namespace Kernel { -struct FileSystemInitializer { - StringView short_name; - StringView name; - bool requires_open_file_description { false }; - bool requires_block_device { false }; - bool requires_seekable_file { false }; - ErrorOr> (*create_with_fd)(OpenFileDescription&) = nullptr; - ErrorOr> (*create)(void) = nullptr; -}; - -static constexpr FileSystemInitializer s_initializers[] = { - { "proc"sv, "ProcFS"sv, false, false, false, {}, ProcFS::try_create }, - { "devpts"sv, "DevPtsFS"sv, false, false, false, {}, DevPtsFS::try_create }, - { "sys"sv, "SysFS"sv, false, false, false, {}, SysFS::try_create }, - { "ram"sv, "RAMFS"sv, false, false, false, {}, RAMFS::try_create }, - { "ext2"sv, "Ext2FS"sv, true, true, true, Ext2FS::try_create, {} }, - { "9p"sv, "Plan9FS"sv, true, true, true, Plan9FS::try_create, {} }, - { "iso9660"sv, "ISO9660FS"sv, true, true, true, ISO9660FS::try_create, {} }, - { "fat"sv, "FATFS"sv, true, true, true, FATFS::try_create, {} }, -}; - -static ErrorOr> find_or_create_filesystem_instance(StringView fs_type, OpenFileDescription* possible_description) +ErrorOr Process::sys$fsopen(Userspace user_params) { - for (auto& initializer_entry : s_initializers) { - if (fs_type != initializer_entry.short_name && fs_type != initializer_entry.name) - continue; - if (!initializer_entry.requires_open_file_description) { - VERIFY(initializer_entry.create); - NonnullRefPtr fs = TRY(initializer_entry.create()); - return fs; - } - // Note: If there's an associated file description with the filesystem, we could - // try to first find it from the VirtualFileSystem filesystem list and if it was not found, - // then create it and add it. - VERIFY(initializer_entry.create_with_fd); - if (!possible_description) - return EBADF; - OpenFileDescription& description = *possible_description; - - if (initializer_entry.requires_block_device && !description.file().is_block_device()) - return ENOTBLK; - if (initializer_entry.requires_seekable_file && !description.file().is_seekable()) { - dbgln("mount: this is not a seekable file"); - return ENODEV; - } - return TRY(VirtualFileSystem::the().find_already_existing_or_create_file_backed_file_system(description, initializer_entry.create_with_fd)); - } - return ENODEV; -} - -ErrorOr Process::sys$mount(Userspace user_params) -{ - VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this); - TRY(require_no_promises()); + VERIFY_NO_PROCESS_BIG_LOCK(this); + TRY(require_promise(Pledge::mount)); auto credentials = this->credentials(); if (!credentials->is_superuser()) - return EPERM; + return Error::from_errno(EPERM); + auto params = TRY(copy_typed_from_user(user_params)); + auto fs_type_string = TRY(try_copy_kstring_from_user(params.fs_type)); + + // NOTE: If some userspace program uses MS_REMOUNT, return EINVAL to indicate that we never want this + // flag to appear in the mount table... + if (params.flags & MS_REMOUNT || params.flags & MS_BIND) + return Error::from_errno(EINVAL); + + auto const* fs_type_initializer = TRY(VirtualFileSystem::find_filesystem_type_initializer(fs_type_string->view())); + VERIFY(fs_type_initializer); + auto mount_file = TRY(MountFile::create(*fs_type_initializer, params.flags)); + auto description = TRY(OpenFileDescription::try_create(move(mount_file))); + return m_fds.with_exclusive([&](auto& fds) -> ErrorOr { + auto new_fd = TRY(fds.allocate()); + fds[new_fd.fd].set(move(description), FD_CLOEXEC); + return new_fd.fd; + }); +} + +ErrorOr Process::sys$fsmount(Userspace user_params) +{ + VERIFY_NO_PROCESS_BIG_LOCK(this); + TRY(require_promise(Pledge::mount)); + auto credentials = this->credentials(); + if (!credentials->is_superuser()) + return Error::from_errno(EPERM); auto params = TRY(copy_typed_from_user(user_params)); - if (params.flags & MS_REMOUNT) - return EINVAL; - if (params.flags & MS_BIND) - return EINVAL; + auto mount_description = TRY(open_file_description(params.mount_fd)); + if (!mount_description->is_mount_file()) + return Error::from_errno(EINVAL); - auto source_fd = params.source_fd; + RefPtr source_description = TRY(open_file_description_ignoring_negative(params.source_fd)); auto target = TRY(try_copy_kstring_from_user(params.target)); - auto fs_type_string = TRY(try_copy_kstring_from_user(params.fs_type)); - auto fs_type = fs_type_string->view(); - - auto description_or_error = open_file_description(source_fd); - if (!description_or_error.is_error()) - dbgln("mount {}: source fd {} @ {}", fs_type, source_fd, target); - else - dbgln("mount {} @ {}", fs_type, target); - auto target_custody = TRY(VirtualFileSystem::the().resolve_path(credentials, target->view(), current_directory())); - - RefPtr fs; - - if (!description_or_error.is_error()) { - auto description = description_or_error.release_value(); - fs = TRY(find_or_create_filesystem_instance(fs_type, description.ptr())); - auto source_pseudo_path = TRY(description->pseudo_path()); - dbgln("mount: attempting to mount {} on {}", source_pseudo_path, target); - } else { - fs = TRY(find_or_create_filesystem_instance(fs_type, {})); - } - - TRY(fs->initialize()); - TRY(VirtualFileSystem::the().mount(*fs, target_custody, params.flags)); + auto flags = mount_description->mount_file()->mount_flags(); + TRY(VirtualFileSystem::the().mount(*mount_description->mount_file(), source_description.ptr(), target_custody, flags)); return 0; } ErrorOr Process::sys$remount(Userspace user_params) { VERIFY_NO_PROCESS_BIG_LOCK(this); - TRY(require_no_promises()); + TRY(require_promise(Pledge::mount)); auto credentials = this->credentials(); if (!credentials->is_superuser()) return EPERM; @@ -134,7 +82,7 @@ ErrorOr Process::sys$remount(Userspace Process::sys$bindmount(Userspace user_params) { VERIFY_NO_PROCESS_BIG_LOCK(this); - TRY(require_no_promises()); + TRY(require_promise(Pledge::mount)); auto credentials = this->credentials(); if (!credentials->is_superuser()) return EPERM; @@ -166,7 +114,7 @@ ErrorOr Process::sys$umount(Userspace user_mountpoint, siz if (!credentials->is_superuser()) return EPERM; - TRY(require_no_promises()); + TRY(require_promise(Pledge::mount)); auto mountpoint = TRY(get_syscall_path_argument(user_mountpoint, mountpoint_length)); auto custody = TRY(VirtualFileSystem::the().resolve_path(credentials, mountpoint->view(), current_directory())); diff --git a/Kernel/Tasks/Process.h b/Kernel/Tasks/Process.h index a7b86535c8e..97b43bb3ce3 100644 --- a/Kernel/Tasks/Process.h +++ b/Kernel/Tasks/Process.h @@ -70,6 +70,7 @@ UnixDateTime kgettimeofday(); __ENUMERATE_PLEDGE_PROMISE(map_fixed) \ __ENUMERATE_PLEDGE_PROMISE(getkeymap) \ __ENUMERATE_PLEDGE_PROMISE(jail) \ + __ENUMERATE_PLEDGE_PROMISE(mount) \ __ENUMERATE_PLEDGE_PROMISE(no_error) enum class Pledge : u32 { @@ -395,7 +396,8 @@ public: ErrorOr sys$unlink(int dirfd, Userspace pathname, size_t path_length, int flags); ErrorOr sys$symlink(Userspace); ErrorOr sys$rmdir(Userspace pathname, size_t path_length); - ErrorOr sys$mount(Userspace); + ErrorOr sys$fsmount(Userspace); + ErrorOr sys$fsopen(Userspace); ErrorOr sys$umount(Userspace mountpoint, size_t mountpoint_length); ErrorOr sys$chmod(Userspace); ErrorOr sys$fchmod(int fd, mode_t); @@ -826,11 +828,25 @@ public: return m_fds.with_shared([fd](auto& fds) { return fds.open_file_description(fd); }); } + ErrorOr> open_file_description_ignoring_negative(int fd) + { + if (fd < 0) + return nullptr; + return open_file_description(fd); + } + ErrorOr> open_file_description(int fd) const { return m_fds.with_shared([fd](auto& fds) { return fds.open_file_description(fd); }); } + ErrorOr> open_file_description_ignoring_negative(int fd) const + { + if (fd < 0) + return nullptr; + return open_file_description(fd); + } + ErrorOr allocate_fd() { return m_fds.with_exclusive([](auto& fds) { return fds.allocate(); }); diff --git a/Userland/DevTools/UserspaceEmulator/Emulator.h b/Userland/DevTools/UserspaceEmulator/Emulator.h index 404468cff8d..adb1fc7e447 100644 --- a/Userland/DevTools/UserspaceEmulator/Emulator.h +++ b/Userland/DevTools/UserspaceEmulator/Emulator.h @@ -147,6 +147,7 @@ private: int virt$anon_create(size_t, int); int virt$beep(); int virt$bind(int sockfd, FlatPtr address, socklen_t address_length); + u32 virt$bindmount(u32 params_addr); int virt$chdir(FlatPtr, size_t); int virt$chmod(FlatPtr); int virt$chown(FlatPtr); @@ -167,6 +168,8 @@ private: int virt$fchown(int, uid_t, gid_t); u32 virt$fcntl(int fd, int, u32); int virt$fork(); + u32 virt$fsopen(u32); + u32 virt$fsmount(u32); int virt$fstat(int, FlatPtr); int virt$ftruncate(int fd, FlatPtr length_addr); int virt$futex(FlatPtr); @@ -199,7 +202,6 @@ private: u32 virt$madvise(FlatPtr, size_t, int); int virt$mkdir(FlatPtr path, size_t path_length, mode_t mode); u32 virt$mmap(u32); - u32 virt$mount(u32); u32 virt$mprotect(FlatPtr, size_t, int); FlatPtr virt$mremap(FlatPtr); int virt$annotate_mapping(FlatPtr); @@ -219,6 +221,7 @@ private: int virt$recvfd(int, int); int virt$recvmsg(int sockfd, FlatPtr msg_addr, int flags); int virt$rename(FlatPtr address); + u32 virt$remount(u32); int virt$rmdir(FlatPtr path, size_t path_length); int virt$scheduler_get_parameters(FlatPtr); int virt$scheduler_set_parameters(FlatPtr); diff --git a/Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp b/Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp index fba7caef925..f92f2401677 100644 --- a/Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp +++ b/Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp @@ -51,6 +51,8 @@ u32 Emulator::virt_syscall(u32 function, u32 arg1, u32 arg2, u32 arg3) return virt$beep(); case SC_bind: return virt$bind(arg1, arg2, arg3); + case SC_bindmount: + return virt$bindmount(arg1); case SC_chdir: return virt$chdir(arg1, arg2); case SC_chmod: @@ -94,6 +96,10 @@ u32 Emulator::virt_syscall(u32 function, u32 arg1, u32 arg2, u32 arg3) return virt$fork(); case SC_fstat: return virt$fstat(arg1, arg2); + case SC_fsmount: + return virt$fsopen(arg1); + case SC_fsopen: + return virt$fsopen(arg1); case SC_ftruncate: return virt$ftruncate(arg1, arg2); case SC_futex: @@ -158,8 +164,6 @@ u32 Emulator::virt_syscall(u32 function, u32 arg1, u32 arg2, u32 arg3) return virt$mkdir(arg1, arg2, arg3); case SC_mmap: return virt$mmap(arg1); - case SC_mount: - return virt$mount(arg1); case SC_mprotect: return virt$mprotect(arg1, arg2, arg3); case SC_mremap: @@ -196,6 +200,8 @@ u32 Emulator::virt_syscall(u32 function, u32 arg1, u32 arg2, u32 arg3) return virt$recvmsg(arg1, arg2, arg3); case SC_rename: return virt$rename(arg1); + case SC_remount: + return virt$remount(arg1); case SC_rmdir: return virt$rmdir(arg1, arg2); case SC_scheduler_get_parameters: @@ -906,18 +912,48 @@ FlatPtr Emulator::virt$mremap(FlatPtr params_addr) return -EINVAL; } -u32 Emulator::virt$mount(u32 params_addr) +u32 Emulator::virt$bindmount(u32 params_addr) { - Syscall::SC_mount_params params; + Syscall::SC_bindmount_params params; mmu().copy_from_vm(¶ms, params_addr, sizeof(params)); auto target = mmu().copy_buffer_from_vm((FlatPtr)params.target.characters, params.target.length); - auto fs_path = mmu().copy_buffer_from_vm((FlatPtr)params.fs_type.characters, params.fs_type.length); - params.fs_type.characters = (char*)fs_path.data(); - params.fs_type.length = fs_path.size(); params.target.characters = (char*)target.data(); params.target.length = target.size(); - return syscall(SC_mount, ¶ms); + return syscall(SC_bindmount, ¶ms); +} + +u32 Emulator::virt$remount(u32 params_addr) +{ + Syscall::SC_remount_params params; + mmu().copy_from_vm(¶ms, params_addr, sizeof(params)); + auto target = mmu().copy_buffer_from_vm((FlatPtr)params.target.characters, params.target.length); + params.target.characters = (char*)target.data(); + params.target.length = target.size(); + + return syscall(SC_remount, ¶ms); +} + +u32 Emulator::virt$fsopen(u32 params_addr) +{ + Syscall::SC_fsopen_params params; + mmu().copy_from_vm(¶ms, params_addr, sizeof(params)); + auto fs_type = mmu().copy_buffer_from_vm((FlatPtr)params.fs_type.characters, params.fs_type.length); + params.fs_type.characters = (char*)fs_type.data(); + params.fs_type.length = fs_type.size(); + + return syscall(SC_fsopen, ¶ms); +} + +u32 Emulator::virt$fsmount(u32 params_addr) +{ + Syscall::SC_fsmount_params params; + mmu().copy_from_vm(¶ms, params_addr, sizeof(params)); + auto target = mmu().copy_buffer_from_vm((FlatPtr)params.target.characters, params.target.length); + params.target.characters = (char*)target.data(); + params.target.length = target.size(); + + return syscall(SC_fsmount, ¶ms); } u32 Emulator::virt$gettid() diff --git a/Userland/Libraries/LibC/unistd.cpp b/Userland/Libraries/LibC/unistd.cpp index 12864f6a571..beff6b4127d 100644 --- a/Userland/Libraries/LibC/unistd.cpp +++ b/Userland/Libraries/LibC/unistd.cpp @@ -940,23 +940,65 @@ int fsync(int fd) __RETURN_WITH_ERRNO(rc, rc, -1); } -int mount(int source_fd, char const* target, char const* fs_type, int flags) +int fsopen(char const* fs_type, int flags) { - if (!target || !fs_type) { + if (!fs_type) { errno = EFAULT; return -1; } - Syscall::SC_mount_params params { - { target, strlen(target) }, + Syscall::SC_fsopen_params params { { fs_type, strlen(fs_type) }, - source_fd, - flags + flags, }; - int rc = syscall(SC_mount, ¶ms); + int rc = syscall(SC_fsopen, ¶ms); __RETURN_WITH_ERRNO(rc, rc, -1); } +int fsmount(int mount_fd, int source_fd, char const* target) +{ + if (!target) { + errno = EFAULT; + return -1; + } + + Syscall::SC_fsmount_params params { + mount_fd, + { target, strlen(target) }, + source_fd, + }; + int rc = syscall(SC_fsmount, ¶ms); + __RETURN_WITH_ERRNO(rc, rc, -1); +} + +int bindmount(int source_fd, char const* target, int flags) +{ + if (!target) { + errno = EFAULT; + return -1; + } + + Syscall::SC_bindmount_params params { + { target, strlen(target) }, + source_fd, + flags, + }; + int rc = syscall(SC_bindmount, ¶ms); + __RETURN_WITH_ERRNO(rc, rc, -1); +} + +int mount(int source_fd, char const* target, char const* fs_type, int flags) +{ + if (flags & MS_BIND) + return bindmount(source_fd, target, flags); + + int mount_fd = fsopen(fs_type, flags); + if (mount_fd < 0) + return -1; + + return fsmount(mount_fd, source_fd, target); +} + int umount(char const* mountpoint) { int rc = syscall(SC_umount, mountpoint, strlen(mountpoint)); diff --git a/Userland/Libraries/LibC/unistd.h b/Userland/Libraries/LibC/unistd.h index 47e5a2fcdb6..ef29f6c969b 100644 --- a/Userland/Libraries/LibC/unistd.h +++ b/Userland/Libraries/LibC/unistd.h @@ -117,6 +117,9 @@ int fchown(int fd, uid_t, gid_t); int fchownat(int fd, char const* pathname, uid_t uid, gid_t gid, int flags); int ftruncate(int fd, off_t length); int truncate(char const* path, off_t length); +int fsopen(char const* fs_type, int flags); +int fsmount(int mount_fd, int source_fd, char const* target); +int bindmount(int source_fd, char const* target, int flags); int mount(int source_fd, char const* target, char const* fs_type, int flags); int umount(char const* mountpoint); int pledge(char const* promises, char const* execpromises); diff --git a/Userland/Libraries/LibCore/System.cpp b/Userland/Libraries/LibCore/System.cpp index 8feb4ec4c1e..7cacceda788 100644 --- a/Userland/Libraries/LibCore/System.cpp +++ b/Userland/Libraries/LibCore/System.cpp @@ -263,14 +263,43 @@ ErrorOr mount(int source_fd, StringView target, StringView fs_type, int fl if (target.is_null() || fs_type.is_null()) return Error::from_errno(EFAULT); - Syscall::SC_mount_params params { - { target.characters_without_null_termination(), target.length() }, + if (flags & MS_REMOUNT) { + TRY(remount(target, flags)); + return {}; + } + if (flags & MS_BIND) { + TRY(bindmount(source_fd, target, flags)); + return {}; + } + int mount_fd = TRY(fsopen(fs_type, flags)); + return fsmount(mount_fd, source_fd, target); +} + +ErrorOr fsopen(StringView fs_type, int flags) +{ + if (fs_type.is_null()) + return Error::from_errno(EFAULT); + + Syscall::SC_fsopen_params params { { fs_type.characters_without_null_termination(), fs_type.length() }, - source_fd, - flags + flags, }; - int rc = syscall(SC_mount, ¶ms); - HANDLE_SYSCALL_RETURN_VALUE("mount", rc, {}); + int rc = syscall(SC_fsopen, ¶ms); + HANDLE_SYSCALL_RETURN_VALUE("fsopen", rc, rc); +} + +ErrorOr fsmount(int mount_fd, int source_fd, StringView target) +{ + if (target.is_null()) + return Error::from_errno(EFAULT); + + Syscall::SC_fsmount_params params { + mount_fd, + { target.characters_without_null_termination(), target.length() }, + source_fd, + }; + int rc = syscall(SC_fsmount, ¶ms); + HANDLE_SYSCALL_RETURN_VALUE("fsmount", rc, {}); } ErrorOr umount(StringView mount_point) diff --git a/Userland/Libraries/LibCore/System.h b/Userland/Libraries/LibCore/System.h index 912f2f49419..9527e50af70 100644 --- a/Userland/Libraries/LibCore/System.h +++ b/Userland/Libraries/LibCore/System.h @@ -60,6 +60,8 @@ ErrorOr recvfd(int sockfd, int options); ErrorOr ptrace_peekbuf(pid_t tid, void const* tracee_addr, Bytes destination_buf); ErrorOr mount(int source_fd, StringView target, StringView fs_type, int flags); ErrorOr bindmount(int source_fd, StringView target, int flags); +ErrorOr fsopen(StringView fs_type, int flags); +ErrorOr fsmount(int mount_fd, int source_fd, StringView target_path); ErrorOr remount(StringView target, int flags); ErrorOr umount(StringView mount_point); ErrorOr ptrace(int request, pid_t tid, void* address, void* data);