Kernel: Implement O_DIRECT open() flag to bypass disk caches

Files opened with O_DIRECT will now bypass the disk cache in read/write
operations (though metadata operations will still hit the disk cache.)

This will allow us to test actual disk performance instead of testing
disk *cache* performance, if that's what we want. :^)

There's room for improvment here, we're very aggressively flushing any
dirty cache entries for the specific block before reading/writing that
block. This is done by walking the entire cache, which may be slow.
This commit is contained in:
Andreas Kling 2019-11-05 19:35:12 +01:00
parent 3de3daf765
commit 59ed235c85
Notes: sideshowbarker 2024-07-19 11:22:09 +09:00
7 changed files with 59 additions and 18 deletions

View file

@ -1,5 +1,6 @@
#include <Kernel/Arch/i386/CPU.h>
#include <Kernel/FileSystem/DiskBackedFileSystem.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/KBuffer.h>
#include <Kernel/Process.h>
@ -92,11 +93,21 @@ DiskBackedFS::~DiskBackedFS()
{
}
bool DiskBackedFS::write_block(unsigned index, const u8* data)
bool DiskBackedFS::write_block(unsigned index, const u8* data, FileDescription* description)
{
#ifdef DBFS_DEBUG
kprintf("DiskBackedFileSystem::write_block %u, size=%u\n", index, data.size());
#endif
bool allow_cache = !description || !description->is_direct();
if (!allow_cache) {
flush_specific_block_if_needed(index);
DiskOffset base_offset = static_cast<DiskOffset>(index) * static_cast<DiskOffset>(block_size());
device().write(base_offset, block_size(), data);
return true;
}
auto& entry = cache().get(index);
memcpy(entry.data, data, block_size());
entry.is_dirty = true;
@ -106,22 +117,32 @@ bool DiskBackedFS::write_block(unsigned index, const u8* data)
return true;
}
bool DiskBackedFS::write_blocks(unsigned index, unsigned count, const u8* data)
bool DiskBackedFS::write_blocks(unsigned index, unsigned count, const u8* data, FileDescription* description)
{
#ifdef DBFS_DEBUG
kprintf("DiskBackedFileSystem::write_blocks %u x%u\n", index, count);
#endif
for (unsigned i = 0; i < count; ++i)
write_block(index + i, data + i * block_size());
write_block(index + i, data + i * block_size(), description);
return true;
}
bool DiskBackedFS::read_block(unsigned index, u8* buffer) const
bool DiskBackedFS::read_block(unsigned index, u8* buffer, FileDescription* description) const
{
#ifdef DBFS_DEBUG
kprintf("DiskBackedFileSystem::read_block %u\n", index);
#endif
bool allow_cache = !description || !description->is_direct();
if (!allow_cache) {
const_cast<DiskBackedFS*>(this)->flush_specific_block_if_needed(index);
DiskOffset base_offset = static_cast<DiskOffset>(index) * static_cast<DiskOffset>(block_size());
bool success = device().read(base_offset, block_size(), buffer);
ASSERT(success);
return true;
}
auto& entry = cache().get(index);
if (!entry.has_data) {
DiskOffset base_offset = static_cast<DiskOffset>(index) * static_cast<DiskOffset>(block_size());
@ -133,16 +154,16 @@ bool DiskBackedFS::read_block(unsigned index, u8* buffer) const
return true;
}
bool DiskBackedFS::read_blocks(unsigned index, unsigned count, u8* buffer) const
bool DiskBackedFS::read_blocks(unsigned index, unsigned count, u8* buffer, FileDescription* description) const
{
if (!count)
return false;
if (count == 1)
return read_block(index, buffer);
return read_block(index, buffer, description);
u8* out = buffer;
for (unsigned i = 0; i < count; ++i) {
if (!read_block(index + i, out))
if (!read_block(index + i, out, description))
return false;
out += block_size();
}
@ -150,6 +171,20 @@ bool DiskBackedFS::read_blocks(unsigned index, unsigned count, u8* buffer) const
return true;
}
void DiskBackedFS::flush_specific_block_if_needed(unsigned index)
{
LOCKER(m_lock);
if (!cache().is_dirty())
return;
cache().for_each_entry([&](CacheEntry& entry) {
if (entry.is_dirty && entry.block_index == index) {
DiskOffset base_offset = static_cast<DiskOffset>(entry.block_index) * static_cast<DiskOffset>(block_size());
device().write(base_offset, block_size(), entry.data);
entry.is_dirty = false;
}
});
}
void DiskBackedFS::flush_writes_impl()
{
LOCKER(m_lock);

View file

@ -21,14 +21,15 @@ public:
protected:
explicit DiskBackedFS(NonnullRefPtr<DiskDevice>&&);
bool read_block(unsigned index, u8* buffer) const;
bool read_blocks(unsigned index, unsigned count, u8* buffer) const;
bool read_block(unsigned index, u8* buffer, FileDescription* = nullptr) const;
bool read_blocks(unsigned index, unsigned count, u8* buffer, FileDescription* = nullptr) const;
bool write_block(unsigned index, const u8*);
bool write_blocks(unsigned index, unsigned count, const u8*);
bool write_block(unsigned index, const u8*, FileDescription* = nullptr);
bool write_blocks(unsigned index, unsigned count, const u8*, FileDescription* = nullptr);
private:
DiskCache& cache() const;
void flush_specific_block_if_needed(unsigned index);
NonnullRefPtr<DiskDevice> m_device;
mutable OwnPtr<DiskCache> m_cache;

View file

@ -2,6 +2,7 @@
#include <AK/BufferStream.h>
#include <AK/StdLibExtras.h>
#include <Kernel/FileSystem/Ext2FileSystem.h>
#include <Kernel/FileSystem/FileDescription.h>
#include <Kernel/FileSystem/ext2_fs.h>
#include <Kernel/Process.h>
#include <Kernel/RTC.h>
@ -90,7 +91,6 @@ bool Ext2FS::initialize()
kprintf("ext2fs: desc size = %u\n", EXT2_DESC_SIZE(&super_block));
#endif
set_block_size(EXT2_BLOCK_SIZE(&super_block));
ASSERT(block_size() <= (int)max_block_size);
@ -610,7 +610,7 @@ RefPtr<Inode> Ext2FS::get_inode(InodeIdentifier inode) const
return new_inode;
}
ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, u8* buffer, FileDescription*) const
ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, u8* buffer, FileDescription* description) const
{
Locker inode_locker(m_lock);
ASSERT(offset >= 0);
@ -656,7 +656,7 @@ ssize_t Ext2FSInode::read_bytes(off_t offset, ssize_t count, u8* buffer, FileDes
u8 block[max_block_size];
for (int bi = first_block_logical_index; remaining_count && bi <= last_block_logical_index; ++bi) {
bool success = fs().read_block(m_block_list[bi], block);
bool success = fs().read_block(m_block_list[bi], block, description);
if (!success) {
kprintf("ext2fs: read_bytes: read_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi);
return -EIO;
@ -694,7 +694,6 @@ KResult Ext2FSInode::resize(u64 new_size)
return KResult(-ENOSPC);
}
auto block_list = fs().block_list_for_inode(m_raw_inode);
if (blocks_needed_after > blocks_needed_before) {
auto new_blocks = fs().allocate_blocks(fs().group_index_from_inode(index()), blocks_needed_after - blocks_needed_before);
@ -723,7 +722,7 @@ KResult Ext2FSInode::resize(u64 new_size)
return KSuccess;
}
ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, FileDescription*)
ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, FileDescription* description)
{
ASSERT(offset >= 0);
ASSERT(count >= 0);
@ -785,7 +784,7 @@ ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, Fi
ByteBuffer block;
if (offset_into_block != 0 || num_bytes_to_copy != block_size) {
block = ByteBuffer::create_uninitialized(block_size);
bool success = fs().read_block(m_block_list[bi], block.data());
bool success = fs().read_block(m_block_list[bi], block.data(), description);
if (!success) {
kprintf("Ext2FSInode::write_bytes: read_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi);
return -EIO;
@ -805,7 +804,7 @@ ssize_t Ext2FSInode::write_bytes(off_t offset, ssize_t count, const u8* data, Fi
#ifdef EXT2_DEBUG
dbgprintf("Ext2FSInode::write_bytes: writing block %u (offset_into_block: %u)\n", m_block_list[bi], offset_into_block);
#endif
bool success = fs().write_block(m_block_list[bi], block.data());
bool success = fs().write_block(m_block_list[bi], block.data(), description);
if (!success) {
kprintf("Ext2FSInode::write_bytes: write_block(%u) failed (lbi: %u)\n", m_block_list[bi], bi);
ASSERT_NOT_REACHED();

View file

@ -293,6 +293,7 @@ void FileDescription::set_file_flags(u32 flags)
{
m_is_blocking = !(flags & O_NONBLOCK);
m_should_append = flags & O_APPEND;
m_direct = flags & O_DIRECT;
m_file_flags = flags;
}

View file

@ -44,6 +44,8 @@ public:
String absolute_path() const;
bool is_direct() const { return m_direct; }
bool is_directory() const { return m_is_directory; }
File& file() { return *m_file; }
@ -117,5 +119,6 @@ private:
bool m_is_blocking { true };
bool m_is_directory { false };
bool m_should_append { false };
bool m_direct { false };
FIFO::Direction m_fifo_direction { FIFO::Direction::Neither };
};

View file

@ -24,6 +24,7 @@
#define O_DIRECTORY 00200000
#define O_NOFOLLOW 00400000
#define O_CLOEXEC 02000000
#define O_DIRECT 04000000
#define O_NOFOLLOW_NOERROR 0x4000000
class Custody;

View file

@ -25,6 +25,7 @@ __BEGIN_DECLS
#define O_DIRECTORY 00200000
#define O_NOFOLLOW 00400000
#define O_CLOEXEC 02000000
#define O_DIRECT 04000000
#define S_IFMT 0170000
#define S_IFDIR 0040000