ladybird/Userland/Libraries/LibSQL/TreeNode.cpp
Jan de Visser 224804b424 LibSQL: BTree index, Heap, and Meta objects for SQL Storage layer
Unfortunately this patch is quite large.

The main functionality included are a BTree index implementation and
the Heap class which manages persistent storage.

Also included are a Key subclass of the Tuple class, which is a
specialization for index key tuples. This "dragged in" the Meta layer,
which has classes defining SQL objects like tables and indexes.
2021-06-19 22:06:45 +02:00

404 lines
12 KiB
C++

/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Debug.h>
#include <AK/Format.h>
#include <AK/NonnullOwnPtr.h>
#include <AK/StringBuilder.h>
#include <LibSQL/BTree.h>
#include <LibSQL/Serialize.h>
namespace SQL {
DownPointer::DownPointer(TreeNode* owner, u32 pointer)
: m_owner(owner)
, m_pointer(pointer)
, m_node(nullptr)
{
}
DownPointer::DownPointer(TreeNode* owner, TreeNode* node)
: m_owner(owner)
, m_pointer((node) ? node->pointer() : 0)
, m_node(adopt_own_if_nonnull(node))
{
}
DownPointer::DownPointer(TreeNode* owner, DownPointer& down)
: m_owner(owner)
, m_pointer(down.m_pointer)
, m_node(move(down.m_node))
{
}
DownPointer::DownPointer(DownPointer const& other)
: m_owner(other.m_owner)
, m_pointer(other.pointer())
{
if (other.m_node)
// FIXME This is gross. We modify the other object which we promised
// to be const. However, this particular constructor is needed
// when we take DownPointers from the Vector they live in when
// we split a node. The original object is going to go away, so
// there is no harm done. However, it's yucky. If anybody has
// a better idea...
m_node = move(const_cast<DownPointer&>(other).m_node);
else
m_node = nullptr;
}
TreeNode* DownPointer::node()
{
if (!m_node)
inflate();
return m_node;
}
void DownPointer::inflate()
{
if (m_node || !m_pointer)
return;
auto buffer = m_owner->tree().read_block(m_pointer);
size_t offset = 0;
m_node = make<TreeNode>(m_owner->tree(), m_owner, m_pointer, buffer, offset);
}
TreeNode::TreeNode(BTree& tree, TreeNode* up, u32 pointer)
: IndexNode(pointer)
, m_tree(tree)
, m_up(up)
, m_entries()
, m_down()
{
m_down.append(DownPointer(this, nullptr));
m_is_leaf = true;
}
TreeNode::TreeNode(BTree& tree, TreeNode* up, DownPointer& left, u32 pointer)
: IndexNode(pointer)
, m_tree(tree)
, m_up(up)
, m_entries()
, m_down()
{
if (left.m_node != nullptr)
left.m_node->m_up = this;
m_down.append(DownPointer(this, left));
m_is_leaf = left.pointer() == 0;
if (!pointer)
set_pointer(m_tree.new_record_pointer());
}
TreeNode::TreeNode(BTree& tree, TreeNode* up, TreeNode* left, u32 pointer)
: IndexNode(pointer)
, m_tree(tree)
, m_up(up)
, m_entries()
, m_down()
{
m_down.append(DownPointer(this, left));
m_is_leaf = left->pointer() == 0;
}
TreeNode::TreeNode(BTree& tree, TreeNode* up, u32 pointer, ByteBuffer& buffer, size_t& at_offset)
: IndexNode(pointer)
, m_tree(tree)
, m_up(up)
, m_entries()
, m_down()
{
u32 nodes;
deserialize_from<u32>(buffer, at_offset, nodes);
dbgln_if(SQL_DEBUG, "Deserializing node. Size {}", nodes);
if (nodes > 0) {
for (u32 i = 0; i < nodes; i++) {
u32 left;
deserialize_from<u32>(buffer, at_offset, left);
dbgln_if(SQL_DEBUG, "Down[{}] {}", i, left);
if (!m_down.is_empty())
VERIFY((left == 0) == m_is_leaf);
else
m_is_leaf = (left == 0);
m_entries.append(Key(m_tree.descriptor(), buffer, at_offset));
m_down.empend(this, left);
}
u32 right;
deserialize_from<u32>(buffer, at_offset, right);
dbgln_if(SQL_DEBUG, "Right {}", right);
VERIFY((right == 0) == m_is_leaf);
m_down.empend(this, right);
}
}
bool TreeNode::insert(Key const& key)
{
dbgln_if(SQL_DEBUG, "[#{}] INSERT({})", pointer(), key.to_string());
if (!is_leaf())
return node_for(key)->insert_in_leaf(key);
return insert_in_leaf(key);
}
bool TreeNode::update_key_pointer(Key const& key)
{
dbgln_if(SQL_DEBUG, "[#{}] UPDATE({}, {})", pointer(), key.to_string(), key.pointer());
if (!is_leaf())
return node_for(key)->update_key_pointer(key);
for (auto ix = 0u; ix < size(); ix++) {
if (key == m_entries[ix]) {
dbgln_if(SQL_DEBUG, "[#{}] {} == {}",
pointer(), key.to_string(), m_entries[ix].to_string());
if (m_entries[ix].pointer() != key.pointer()) {
m_entries[ix].set_pointer(key.pointer());
dump_if(SQL_DEBUG, "To WAL");
tree().add_to_write_ahead_log(this);
}
return true;
}
}
return false;
}
bool TreeNode::insert_in_leaf(Key const& key)
{
VERIFY(is_leaf());
if (!m_tree.duplicates_allowed()) {
for (auto& entry : m_entries) {
if (key == entry) {
dbgln_if(SQL_DEBUG, "[#{}] duplicate key {}", pointer(), key.to_string());
return false;
}
}
}
dbgln_if(SQL_DEBUG, "[#{}] insert_in_leaf({})", pointer(), key.to_string());
just_insert(key, nullptr);
return true;
}
size_t TreeNode::max_keys_in_node()
{
auto descriptor = m_tree.descriptor();
auto key_size = descriptor.data_length() + sizeof(u32);
auto ret = (BLOCKSIZE - 2 * sizeof(u32)) / key_size;
if ((ret % 2) == 0)
--ret;
return ret;
}
Key const& TreeNode::operator[](size_t ix) const
{
VERIFY(ix < size());
return m_entries[ix];
}
u32 TreeNode::down_pointer(size_t ix) const
{
VERIFY(ix < m_down.size());
return m_down[ix].pointer();
}
TreeNode* TreeNode::down_node(size_t ix)
{
VERIFY(ix < m_down.size());
return m_down[ix].node();
}
TreeNode* TreeNode::node_for(Key const& key)
{
dump_if(SQL_DEBUG, String::formatted("node_for(Key {})", key.to_string()));
if (is_leaf())
return this;
for (size_t ix = 0; ix < size(); ix++) {
if (key < m_entries[ix]) {
dbgln_if(SQL_DEBUG, "[{}] {} < {} v{}",
pointer(), (String)key, (String)m_entries[ix], m_down[ix].pointer());
return down_node(ix)->node_for(key);
}
}
dbgln_if(SQL_DEBUG, "[#{}] {} >= {} v{}",
pointer(), key.to_string(), (String)m_entries[size() - 1], m_down[size()].pointer());
return down_node(size())->node_for(key);
}
Optional<u32> TreeNode::get(Key& key)
{
dump_if(SQL_DEBUG, String::formatted("get({})", key.to_string()));
for (auto ix = 0u; ix < size(); ix++) {
if (key < m_entries[ix]) {
if (is_leaf()) {
dbgln_if(SQL_DEBUG, "[#{}] {} < {} -> 0",
pointer(), key.to_string(), (String)m_entries[ix]);
return {};
} else {
dbgln_if(SQL_DEBUG, "[{}] {} < {} ({} -> {})",
pointer(), key.to_string(), (String)m_entries[ix],
ix, m_down[ix].pointer());
return down_node(ix)->get(key);
}
}
if (key == m_entries[ix]) {
dbgln_if(SQL_DEBUG, "[#{}] {} == {} -> {}",
pointer(), key.to_string(), (String)m_entries[ix],
m_entries[ix].pointer());
key.set_pointer(m_entries[ix].pointer());
return m_entries[ix].pointer();
}
}
if (m_entries.is_empty()) {
dbgln_if(SQL_DEBUG, "[#{}] {} Empty node??", pointer(), key.to_string());
VERIFY_NOT_REACHED();
}
if (is_leaf()) {
dbgln_if(SQL_DEBUG, "[#{}] {} > {} -> 0",
pointer(), key.to_string(), (String)m_entries[size() - 1]);
return {};
}
dbgln_if(SQL_DEBUG, "[#{}] {} > {} ({} -> {})",
pointer(), key.to_string(), (String)m_entries[size() - 1],
size(), m_down[size()].pointer());
return down_node(size())->get(key);
}
void TreeNode::serialize(ByteBuffer& buffer) const
{
u32 sz = size();
serialize_to<u32>(buffer, sz);
if (sz > 0) {
for (auto ix = 0u; ix < size(); ix++) {
auto& entry = m_entries[ix];
dbgln_if(SQL_DEBUG, "Serializing Left[{}] = {}", ix, m_down[ix].pointer());
serialize_to<u32>(buffer, is_leaf() ? 0u : m_down[ix].pointer());
entry.serialize(buffer);
}
dbgln_if(SQL_DEBUG, "Serializing Right = {}", m_down[size()].pointer());
serialize_to<u32>(buffer, is_leaf() ? 0u : m_down[size()].pointer());
}
}
void TreeNode::just_insert(Key const& key, TreeNode* right)
{
dbgln_if(SQL_DEBUG, "[#{}] just_insert({}, right = {})",
pointer(), (String)key, (right) ? right->pointer() : 0);
dump_if(SQL_DEBUG, "Before");
for (auto ix = 0u; ix < size(); ix++) {
if (key < m_entries[ix]) {
m_entries.insert(ix, key);
VERIFY(is_leaf() == (right == nullptr));
m_down.insert(ix + 1, DownPointer(this, right));
if (size() > max_keys_in_node()) {
split();
} else {
dump_if(SQL_DEBUG, "To WAL");
tree().add_to_write_ahead_log(this);
}
return;
}
}
m_entries.append(key);
m_down.empend(this, right);
if (size() > max_keys_in_node()) {
split();
} else {
dump_if(SQL_DEBUG, "To WAL");
tree().add_to_write_ahead_log(this);
}
}
void TreeNode::split()
{
dump_if(SQL_DEBUG, "Splitting node");
if (!m_up)
// Make new m_up. This is the new root node.
m_up = m_tree.new_root();
// Take the left pointer for the new node:
DownPointer left = m_down.take(max_keys_in_node() / 2 + 1);
// Create the new right node:
auto* new_node = new TreeNode(tree(), m_up, left);
// Move the rightmost keys from this node to the new right node:
while (m_entries.size() > max_keys_in_node() / 2 + 1) {
auto entry = m_entries.take(max_keys_in_node() / 2 + 1);
auto down = m_down.take(max_keys_in_node() / 2 + 1);
// Reparent to new right node:
if (down.m_node != nullptr) {
down.m_node->m_up = new_node;
}
new_node->m_entries.append(entry);
new_node->m_down.append(down);
}
// Move the median key in the node one level up. Its right node will
// be the new node:
auto median = m_entries.take_last();
dump_if(SQL_DEBUG, "Split Left To WAL");
tree().add_to_write_ahead_log(this);
new_node->dump_if(SQL_DEBUG, "Split Right to WAL");
tree().add_to_write_ahead_log(new_node);
m_up->just_insert(median, new_node);
}
void TreeNode::dump_if(int flag, String&& msg)
{
if (!flag)
return;
StringBuilder builder;
builder.appendff("[#{}] ", pointer());
if (!msg.is_empty())
builder.appendff("{}", msg);
builder.append(": ");
if (m_up)
builder.appendff("[^{}] -> ", m_up->pointer());
else
builder.append("* -> ");
for (size_t ix = 0; ix < m_entries.size(); ix++) {
if (!is_leaf())
builder.appendff("[v{}] ", m_down[ix].pointer());
else
VERIFY(m_down[ix].pointer() == 0);
builder.appendff("'{}' ", (String)m_entries[ix]);
}
if (!is_leaf()) {
builder.appendff("[v{}]", m_down[size()].pointer());
} else {
VERIFY(m_down[size()].pointer() == 0);
}
builder.appendff(" (size {}", (int)size());
if (is_leaf()) {
builder.append(", leaf");
}
builder.append(")");
dbgln(builder.build());
}
void TreeNode::list_node(int indent)
{
auto do_indent = [&]() {
for (int i = 0; i < indent; ++i) {
warn(" ");
}
};
do_indent();
warnln("--> #{}", pointer());
for (auto ix = 0u; ix < size(); ix++) {
if (!is_leaf()) {
down_node(ix)->list_node(indent + 2);
}
do_indent();
warnln("{}", m_entries[ix].to_string());
}
if (!is_leaf()) {
down_node(size())->list_node(indent + 2);
}
}
}