diff --git a/AK/ByteBuffer.h b/AK/ByteBuffer.h index 15f2aa7638b..ca078e2b286 100644 --- a/AK/ByteBuffer.h +++ b/AK/ByteBuffer.h @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include #include @@ -301,6 +302,23 @@ public: operator ReadonlyBytes() const { return bytes(); } ALWAYS_INLINE size_t capacity() const { return m_inline ? inline_capacity : m_outline_capacity; } + ALWAYS_INLINE bool is_inline() const { return m_inline; } + + struct OutlineBuffer { + Bytes buffer; + size_t capacity { 0 }; + }; + Optional leak_outline_buffer(Badge) + { + if (m_inline) + return {}; + + auto buffer = bytes(); + m_inline = true; + m_size = 0; + + return OutlineBuffer { buffer, capacity() }; + } private: void move_from(ByteBuffer&& other) diff --git a/AK/Forward.h b/AK/Forward.h index e074ffe455d..b5ba818ff7c 100644 --- a/AK/Forward.h +++ b/AK/Forward.h @@ -16,6 +16,8 @@ namespace AK { namespace Detail { template class ByteBuffer; + +class StringData; } enum class TrailingCodePointTransformation : u8; diff --git a/AK/String.cpp b/AK/String.cpp index ef679ea6931..3d0626427aa 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -96,6 +96,23 @@ ErrorOr String::from_stream(Stream& stream, size_t byte_count) return result; } +ErrorOr String::from_string_builder(Badge, StringBuilder& builder) +{ + if (!Utf8View { builder.string_view() }.validate()) + return Error::from_string_literal("String::from_string_builder: Input was not valid UTF-8"); + + String result; + result.replace_with_string_builder(builder); + return result; +} + +String String::from_string_builder_without_validation(Badge, StringBuilder& builder) +{ + String result; + result.replace_with_string_builder(builder); + return result; +} + ErrorOr String::repeated(u32 code_point, size_t count) { VERIFY(is_unicode(code_point)); diff --git a/AK/String.h b/AK/String.h index 1294496e204..130087eaae8 100644 --- a/AK/String.h +++ b/AK/String.h @@ -57,6 +57,9 @@ public: [[nodiscard]] static String from_utf8_without_validation(ReadonlyBytes); + static ErrorOr from_string_builder(Badge, StringBuilder&); + [[nodiscard]] static String from_string_builder_without_validation(Badge, StringBuilder&); + // Creates a new String from a sequence of UTF-16 encoded code points. static ErrorOr from_utf16(Utf16View const&); diff --git a/AK/StringBase.cpp b/AK/StringBase.cpp index 62746b91a58..622266d1764 100644 --- a/AK/StringBase.cpp +++ b/AK/StringBase.cpp @@ -90,6 +90,19 @@ bool StringBase::operator==(StringBase const& other) const return bytes() == other.bytes(); } +void StringBase::replace_with_string_builder(StringBuilder& builder) +{ + if (builder.length() <= MAX_SHORT_STRING_BYTE_COUNT) { + return replace_with_new_short_string(builder.length(), [&](Bytes buffer) { + builder.string_view().bytes().copy_to(buffer); + }); + } + + destroy_string(); + + m_data = &StringData::create_from_string_builder(builder).leak_ref(); +} + ErrorOr StringBase::replace_with_uninitialized_buffer(size_t byte_count) { if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) diff --git a/AK/StringBase.h b/AK/StringBase.h index a105f5f6173..e6086be2f72 100644 --- a/AK/StringBase.h +++ b/AK/StringBase.h @@ -92,6 +92,8 @@ protected: callback(buffer); } + void replace_with_string_builder(StringBuilder&); + // This is not a trivial operation with storage, so it does not belong here. Unfortunately, it // is impossible to implement it without access to StringData. ErrorOr substring_from_byte_offset_with_shared_superstring(size_t start, size_t byte_count) const; diff --git a/AK/StringBuilder.cpp b/AK/StringBuilder.cpp index 33306fed2e4..d6746edd633 100644 --- a/AK/StringBuilder.cpp +++ b/AK/StringBuilder.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,16 +19,33 @@ namespace AK { +static constexpr auto STRING_BASE_PREFIX_SIZE = sizeof(Detail::StringData); + +static ErrorOr create_buffer(size_t capacity) +{ + StringBuilder::Buffer buffer; + + if (capacity > StringBuilder::inline_capacity) + TRY(buffer.try_ensure_capacity(STRING_BASE_PREFIX_SIZE + capacity)); + + TRY(buffer.try_resize(STRING_BASE_PREFIX_SIZE)); + return buffer; +} + ErrorOr StringBuilder::create(size_t initial_capacity) { - StringBuilder builder; - TRY(builder.m_buffer.try_ensure_capacity(initial_capacity)); - return builder; + auto buffer = TRY(create_buffer(initial_capacity)); + return StringBuilder { move(buffer) }; } StringBuilder::StringBuilder(size_t initial_capacity) + : m_buffer(MUST(create_buffer(initial_capacity))) +{ +} + +StringBuilder::StringBuilder(Buffer buffer) + : m_buffer(move(buffer)) { - m_buffer.ensure_capacity(initial_capacity); } inline ErrorOr StringBuilder::will_append(size_t size) @@ -47,12 +65,12 @@ inline ErrorOr StringBuilder::will_append(size_t size) size_t StringBuilder::length() const { - return m_buffer.size(); + return m_buffer.size() - STRING_BASE_PREFIX_SIZE; } bool StringBuilder::is_empty() const { - return m_buffer.is_empty(); + return length() == 0; } void StringBuilder::trim(size_t count) @@ -122,14 +140,18 @@ ByteString StringBuilder::to_byte_string() const return ByteString((char const*)data(), length()); } -ErrorOr StringBuilder::to_string() const +ErrorOr StringBuilder::to_string() { - return String::from_utf8(string_view()); + if (m_buffer.is_inline()) + return String::from_utf8(string_view()); + return String::from_string_builder({}, *this); } -String StringBuilder::to_string_without_validation() const +String StringBuilder::to_string_without_validation() { - return String::from_utf8_without_validation(string_view().bytes()); + if (m_buffer.is_inline()) + return String::from_utf8_without_validation(string_view().bytes()); + return String::from_string_builder_without_validation({}, *this); } FlyString StringBuilder::to_fly_string_without_validation() const @@ -144,22 +166,22 @@ ErrorOr StringBuilder::to_fly_string() const u8* StringBuilder::data() { - return m_buffer.data(); + return m_buffer.data() + STRING_BASE_PREFIX_SIZE; } u8 const* StringBuilder::data() const { - return m_buffer.data(); + return m_buffer.data() + STRING_BASE_PREFIX_SIZE; } StringView StringBuilder::string_view() const { - return StringView { data(), m_buffer.size() }; + return m_buffer.span().slice(STRING_BASE_PREFIX_SIZE); } void StringBuilder::clear() { - m_buffer.clear(); + m_buffer.resize(STRING_BASE_PREFIX_SIZE); } ErrorOr StringBuilder::try_append_code_point(u32 code_point) @@ -272,4 +294,14 @@ ErrorOr StringBuilder::try_append_escaped_for_json(StringView string) return {}; } +auto StringBuilder::leak_buffer_for_string_construction(Badge) -> Optional +{ + if (auto buffer = m_buffer.leak_outline_buffer({}); buffer.has_value()) { + clear(); + return buffer; + } + + return {}; +} + } diff --git a/AK/StringBuilder.h b/AK/StringBuilder.h index 2dfff324249..5b83bcf0e6c 100644 --- a/AK/StringBuilder.h +++ b/AK/StringBuilder.h @@ -18,6 +18,7 @@ class StringBuilder { public: static constexpr size_t inline_capacity = 256; + using Buffer = Detail::ByteBuffer; using OutputType = ByteString; static ErrorOr create(size_t initial_capacity = inline_capacity); @@ -61,8 +62,8 @@ public: [[nodiscard]] ByteString to_byte_string() const; - [[nodiscard]] String to_string_without_validation() const; - ErrorOr to_string() const; + [[nodiscard]] String to_string_without_validation(); + ErrorOr to_string(); [[nodiscard]] FlyString to_fly_string_without_validation() const; ErrorOr to_fly_string() const; @@ -95,12 +96,16 @@ public: return {}; } + Optional leak_buffer_for_string_construction(Badge); + private: + explicit StringBuilder(Buffer); + ErrorOr will_append(size_t); u8* data(); u8 const* data() const; - Detail::ByteBuffer m_buffer; + Buffer m_buffer; }; } diff --git a/AK/StringData.h b/AK/StringData.h index 26c0aefea07..c029afd6413 100644 --- a/AK/StringData.h +++ b/AK/StringData.h @@ -11,6 +11,7 @@ #include #include #include +#include #include namespace AK::Detail { @@ -20,25 +21,39 @@ public: static ErrorOr> create_uninitialized(size_t byte_count, u8*& buffer) { VERIFY(byte_count); - void* slot = malloc(allocation_size_for_string_data(byte_count)); - if (!slot) { + + auto capacity = allocation_size_for_string_data(byte_count); + void* slot = malloc(capacity); + if (!slot) return Error::from_errno(ENOMEM); - } - auto new_string_data = adopt_ref(*new (slot) StringData(byte_count)); + + auto new_string_data = adopt_ref(*new (slot) StringData(byte_count, capacity)); buffer = const_cast(new_string_data->bytes().data()); return new_string_data; } + static NonnullRefPtr create_from_string_builder(StringBuilder& builder) + { + auto byte_count = builder.length(); + VERIFY(byte_count > MAX_SHORT_STRING_BYTE_COUNT); + + auto buffer = builder.leak_buffer_for_string_construction({}); + VERIFY(buffer.has_value()); // We should only arrive here if the buffer is outlined. + + return adopt_ref(*new (buffer->buffer.data()) StringData(byte_count, buffer->capacity)); + } + static ErrorOr> create_substring(StringData const& superstring, size_t start, size_t byte_count) { // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization. VERIFY(byte_count > MAX_SHORT_STRING_BYTE_COUNT); - void* slot = malloc(sizeof(StringData) + sizeof(StringData::SubstringData)); - if (!slot) { + auto capacity = sizeof(StringData) + sizeof(StringData::SubstringData); + void* slot = malloc(capacity); + if (!slot) return Error::from_errno(ENOMEM); - } - return adopt_ref(*new (slot) StringData(superstring, start, byte_count)); + + return adopt_ref(*new (slot) StringData(superstring, start, byte_count, capacity)); } struct SubstringData { @@ -48,7 +63,7 @@ public: void operator delete(void* ptr) { - kfree_sized(ptr, allocation_size_for_string_data(static_cast(ptr)->m_byte_count)); + kfree_sized(ptr, static_cast(ptr)->m_capacity); } ~StringData() @@ -99,13 +114,15 @@ private: return sizeof(StringData) + (sizeof(char) * length); } - explicit StringData(size_t byte_count) + StringData(size_t byte_count, size_t capacity) : m_byte_count(byte_count) + , m_capacity(capacity) { } - StringData(StringData const& superstring, size_t start, size_t byte_count) + StringData(StringData const& superstring, size_t start, size_t byte_count, size_t capacity) : m_byte_count(byte_count) + , m_capacity(capacity) , m_substring(true) { auto& data = const_cast(substring_data()); @@ -125,6 +142,8 @@ private: } u32 m_byte_count { 0 }; + u32 m_capacity { 0 }; + mutable unsigned m_hash { 0 }; mutable bool m_has_hash { false }; bool m_substring { false };