AK: Add a UTF-16 string with optimized short- and ASCII-string storage

This is a strictly UTF-16 string with some optimizations for ASCII. * If created from a short UTF-8 or UTF-16 string that is also ASCII, then the string is stored in an inlined byte buffer. * If created with a long UTF-8 or UTF-16 string that is also ASCII, then the string is stored in an outlined char buffer. * If created with a short or long UTF-8 or UTF-16 string that is not ASCII, then the string is stored in an outlined char16 buffer. We do not store short non-ASCII text in the inlined buffer to avoid confusion with operations such as `length_in_code_units` and `code_unit_at`. For example, "😀" would be stored as 4 UTF-8 bytes in short string form. But we still want `length_in_code_units` to be 2, and `code_unit_at(0)` to be 0xD83D.
Author: https://github.com/trflynn89 Commit: fe676585f5 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5388 Reviewed-by: https://github.com/shannonbooth ✅
2025-07-29 12:19:54 +00:00 · 2025-06-12 19:29:41 -04:00 · 2025-06-12 19:29:41 -04:00 · fe676585f5 · 2025-07-18 16:47:31 +00:00
commit fe676585f5
parent 8fbb80fffc
17 changed files with 1527 additions and 44 deletions
--- a/AK/CMakeLists.txt
+++ b/AK/CMakeLists.txt
@ -29,6 +29,8 @@ set(SOURCES
    StringUtils.cpp
    StringView.cpp
    Time.cpp
+    Utf16String.cpp
+    Utf16StringData.cpp
    Utf16View.cpp
    Utf32View.cpp
    Utf8View.cpp
--- a/AK/Forward.h
+++ b/AK/Forward.h
@ -19,6 +19,7 @@ template<size_t inline_capacity>
 class ByteBuffer;

 class StringData;
+class Utf16StringData;

 }

@ -52,6 +53,7 @@ class String;
 class StringBuilder;
 class StringView;
 class UnixDateTime;
+class Utf16String;
 class Utf16View;
 class Utf32CodePointIterator;
 class Utf32View;
@ -198,6 +200,7 @@ using AK::StringView;
 using AK::TrailingCodePointTransformation;
 using AK::Traits;
 using AK::UnixDateTime;
+using AK::Utf16String;
 using AK::Utf16View;
 using AK::Utf32CodePointIterator;
 using AK::Utf32View;
--- a/AK/StringBuilder.cpp
+++ b/AK/StringBuilder.cpp
@ -249,8 +249,10 @@ ErrorOr<void> StringBuilder::try_append(Utf16View const& utf16_view)
 {
    if (utf16_view.is_empty())
        return {};
+    if (utf16_view.has_ascii_storage())
+        return try_append(utf16_view.bytes());

-    auto remaining_view = utf16_view.span();
+    auto remaining_view = utf16_view.utf16_span();
    auto maximum_utf8_length = UnicodeUtils::maximum_utf8_length_from_utf16(remaining_view);

    // Possibly over-allocate a little to ensure we don't have to allocate later.
--- a/AK/Utf16String.cpp
+++ b/AK/Utf16String.cpp
@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/Utf16String.h>
+#include <AK/Utf32View.h>
+
+#include <simdutf.h>
+
+namespace AK {
+
+static_assert(sizeof(Detail::ShortString) == sizeof(Detail::Utf16StringData*));
+
+Utf16String Utf16String::from_utf8_without_validation(StringView utf8_string)
+{
+    if (utf8_string.length() <= Detail::MAX_SHORT_STRING_BYTE_COUNT && utf8_string.is_ascii()) {
+        Utf16String string;
+        string.m_value.short_ascii_string = Detail::ShortString::create_with_byte_count(utf8_string.length());
+
+        auto result = utf8_string.bytes().copy_to(string.m_value.short_ascii_string.storage);
+        VERIFY(result == utf8_string.length());
+
+        return string;
+    }
+
+    return Utf16String { Detail::Utf16StringData::from_utf8(utf8_string, Detail::Utf16StringData::AllowASCIIStorage::Yes) };
+}
+
+Utf16String Utf16String::from_utf16_without_validation(Utf16View const& utf16_string)
+{
+    if (utf16_string.length_in_code_units() <= Detail::MAX_SHORT_STRING_BYTE_COUNT && utf16_string.is_ascii()) {
+        Utf16String string;
+        string.m_value.short_ascii_string = Detail::ShortString::create_with_byte_count(utf16_string.length_in_code_units());
+
+        if (utf16_string.has_ascii_storage()) {
+            auto result = utf16_string.bytes().copy_to(string.m_value.short_ascii_string.storage);
+            VERIFY(result == utf16_string.length_in_code_units());
+        } else {
+            auto result = simdutf::convert_utf16_to_utf8(utf16_string.utf16_span().data(), utf16_string.length_in_code_units(), reinterpret_cast<char*>(string.m_value.short_ascii_string.storage));
+            VERIFY(result == utf16_string.length_in_code_units());
+        }
+
+        return string;
+    }
+
+    return Utf16String { Detail::Utf16StringData::from_utf16(utf16_string) };
+}
+
+Utf16String Utf16String::from_utf32(Utf32View const& utf32_string)
+{
+    if (utf32_string.length() <= Detail::MAX_SHORT_STRING_BYTE_COUNT && utf32_string.is_ascii()) {
+        Utf16String string;
+        string.m_value.short_ascii_string = Detail::ShortString::create_with_byte_count(utf32_string.length());
+
+        auto result = simdutf::convert_utf32_to_utf8(reinterpret_cast<char32_t const*>(utf32_string.code_points()), utf32_string.length(), reinterpret_cast<char*>(string.m_value.short_ascii_string.storage));
+        VERIFY(result == utf32_string.length());
+
+        return string;
+    }
+
+    return Utf16String { Detail::Utf16StringData::from_utf32(utf32_string) };
+}
+
+ErrorOr<void> Formatter<Utf16String>::format(FormatBuilder& builder, Utf16String const& utf16_string)
+{
+    if (utf16_string.has_long_utf16_storage())
+        return builder.builder().try_append(utf16_string.utf16_view());
+    return builder.put_string(utf16_string.ascii_view());
+}
+
+}
--- a/AK/Utf16String.h
+++ b/AK/Utf16String.h
@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Badge.h>
+#include <AK/Error.h>
+#include <AK/Format.h>
+#include <AK/NonnullRefPtr.h>
+#include <AK/String.h>
+#include <AK/StringView.h>
+#include <AK/Traits.h>
+#include <AK/UnicodeUtils.h>
+#include <AK/Utf16StringBase.h>
+#include <AK/Utf16StringData.h>
+#include <AK/Utf16View.h>
+#include <AK/Utf8View.h>
+
+namespace AK {
+
+// Utf16String is a strongly owned sequence of Unicode code points encoded as UTF-16.
+//
+// The data may or may not be heap-allocated, and may or may not be reference counted. As a memory optimization, if the
+// UTF-16 string is entirely ASCII, the string is stored as 8-bit bytes.
+class [[nodiscard]] Utf16String : public Detail::Utf16StringBase {
+public:
+    using Utf16StringBase::Utf16StringBase;
+
+    explicit constexpr Utf16String(Utf16StringBase&& base)
+        : Utf16StringBase(move(base))
+    {
+    }
+
+    ALWAYS_INLINE static Utf16String from_utf8(StringView utf8_string)
+    {
+        VERIFY(Utf8View { utf8_string }.validate());
+        return from_utf8_without_validation(utf8_string);
+    }
+
+    ALWAYS_INLINE static Utf16String from_utf8(String const& utf8_string)
+    {
+        return from_utf8_without_validation(utf8_string);
+    }
+
+    ALWAYS_INLINE static ErrorOr<Utf16String> try_from_utf8(StringView utf8_string)
+    {
+        if (!Utf8View { utf8_string }.validate())
+            return Error::from_string_literal("Input was not valid UTF-8");
+        return from_utf8_without_validation(utf8_string);
+    }
+
+    ALWAYS_INLINE static Utf16String from_utf16(Utf16View const& utf16_string)
+    {
+        VERIFY(utf16_string.validate());
+        return from_utf16_without_validation(utf16_string);
+    }
+
+    ALWAYS_INLINE static ErrorOr<Utf16String> try_from_utf16(Utf16View const& utf16_string)
+    {
+        if (!utf16_string.validate())
+            return Error::from_string_literal("Input was not valid UTF-16");
+        return from_utf16_without_validation(utf16_string);
+    }
+
+    static Utf16String from_utf8_without_validation(StringView);
+    static Utf16String from_utf16_without_validation(Utf16View const&);
+    static Utf16String from_utf32(Utf32View const&);
+
+    template<typename T>
+    requires(IsOneOf<RemoveCVReference<T>, Utf16String>)
+    static Utf16String from_utf16(T&&) = delete;
+
+    template<typename T>
+    requires(IsOneOf<RemoveCVReference<T>, Utf16String>)
+    static ErrorOr<Utf16String> try_from_utf16(T&&) = delete;
+
+    template<typename T>
+    requires(IsOneOf<RemoveCVReference<T>, Utf16String>)
+    static Utf16String from_utf16_without_validation(T&&) = delete;
+
+private:
+    ALWAYS_INLINE explicit Utf16String(NonnullRefPtr<Detail::Utf16StringData const> value)
+        : Utf16StringBase(move(value))
+    {
+    }
+};
+
+template<>
+struct Formatter<Utf16String> : Formatter<FormatString> {
+    ErrorOr<void> format(FormatBuilder&, Utf16String const&);
+};
+
+template<>
+struct Traits<Utf16String> : public DefaultTraits<Utf16String> {
+    static unsigned hash(Utf16String const& s) { return s.hash(); }
+};
+
+}
+
+[[nodiscard]] ALWAYS_INLINE AK::Utf16String operator""_utf16(char const* string, size_t length)
+{
+    AK::StringView view { string, length };
+
+    ASSERT(AK::Utf8View { view }.validate());
+    return AK::Utf16String::from_utf8_without_validation(view);
+}
+
+[[nodiscard]] ALWAYS_INLINE AK::Utf16String operator""_utf16(char16_t const* string, size_t length)
+{
+    AK::Utf16View view { string, length };
+
+    ASSERT(view.validate());
+    return AK::Utf16String::from_utf16_without_validation(view);
+}
--- a/AK/Utf16StringBase.h
+++ b/AK/Utf16StringBase.h
@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/CharacterTypes.h>
+#include <AK/NonnullRefPtr.h>
+#include <AK/StringBase.h>
+#include <AK/StringView.h>
+#include <AK/Utf16StringData.h>
+#include <AK/Utf16View.h>
+
+namespace AK::Detail {
+
+class Utf16StringBase {
+public:
+    constexpr Utf16StringBase()
+        : Utf16StringBase(ShortString::create_empty())
+    {
+    }
+
+    explicit constexpr Utf16StringBase(ShortString short_string)
+        : m_value { .short_ascii_string = short_string }
+    {
+    }
+
+    ALWAYS_INLINE explicit Utf16StringBase(NonnullRefPtr<Utf16StringData const> value)
+        : m_value { .data = &value.leak_ref() }
+    {
+    }
+
+    ALWAYS_INLINE Utf16StringBase(Utf16StringBase const& other)
+        : m_value(other.m_value)
+    {
+        if (has_long_storage())
+            data_without_union_member_assertion()->ref();
+    }
+
+    constexpr Utf16StringBase(Utf16StringBase&& other)
+        : m_value(other.m_value)
+    {
+        other.m_value = { .short_ascii_string = ShortString::create_empty() };
+    }
+
+    constexpr ~Utf16StringBase()
+    {
+        if (!is_constant_evaluated())
+            destroy_string();
+    }
+
+    ALWAYS_INLINE operator Utf16View() const& { return utf16_view(); }
+    explicit operator Utf16View() const&& = delete;
+
+    [[nodiscard]] ALWAYS_INLINE String to_utf8(AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const
+    {
+        return MUST(utf16_view().to_utf8(allow_lonely_surrogates));
+    }
+
+    [[nodiscard]] ALWAYS_INLINE String to_utf8_but_should_be_ported_to_utf16(AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const
+    {
+        return to_utf8(allow_lonely_surrogates);
+    }
+
+    [[nodiscard]] ALWAYS_INLINE ByteString to_byte_string(AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const
+    {
+        return MUST(utf16_view().to_byte_string(allow_lonely_surrogates));
+    }
+
+    [[nodiscard]] ALWAYS_INLINE StringView ascii_view() const&
+    {
+        if (has_short_ascii_storage())
+            return short_ascii_string_without_union_member_assertion().bytes();
+
+        VERIFY(has_long_ascii_storage());
+        return data_without_union_member_assertion()->ascii_view();
+    }
+
+    [[nodiscard]] ALWAYS_INLINE Utf16View utf16_view() const&
+    {
+        if (has_short_ascii_storage())
+            return Utf16View { ascii_view().characters_without_null_termination(), length_in_code_units() };
+        return data_without_union_member_assertion()->utf16_view();
+    }
+
+    StringView ascii_view() const&& = delete;
+    Utf16View utf16_view() const&& = delete;
+
+    ALWAYS_INLINE Utf16StringBase& operator=(Utf16StringBase const& other)
+    {
+        if (&other != this) {
+            if (has_long_storage())
+                data_without_union_member_assertion()->unref();
+
+            m_value = other.m_value;
+
+            if (has_long_storage())
+                data_without_union_member_assertion()->ref();
+        }
+
+        return *this;
+    }
+
+    ALWAYS_INLINE Utf16StringBase& operator=(Utf16StringBase&& other)
+    {
+        if (has_long_storage())
+            data_without_union_member_assertion()->unref();
+
+        m_value = exchange(other.m_value, { .short_ascii_string = ShortString::create_empty() });
+        return *this;
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool operator==(Utf16StringBase const& other) const
+    {
+        if (has_short_ascii_storage() && other.has_short_ascii_storage())
+            return bit_cast<FlatPtr>(m_value) == bit_cast<FlatPtr>(other.m_value);
+
+        if (has_long_storage() && other.has_long_storage())
+            return *data_without_union_member_assertion() == *other.data_without_union_member_assertion();
+
+        return utf16_view() == other.utf16_view();
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool operator==(Utf16View const& other) const { return utf16_view() == other; }
+    [[nodiscard]] ALWAYS_INLINE bool operator==(StringView other) const { return utf16_view() == other; }
+
+    [[nodiscard]] ALWAYS_INLINE bool equals_ignoring_ascii_case(Utf16View const& other) const { return utf16_view().equals_ignoring_ascii_case(other); }
+    [[nodiscard]] ALWAYS_INLINE bool equals_ignoring_ascii_case(Utf16StringBase const& other) const { return utf16_view().equals_ignoring_ascii_case(other.utf16_view()); }
+
+    template<typename... Ts>
+    [[nodiscard]] ALWAYS_INLINE bool is_one_of(Ts&&... strings) const
+    {
+        return (this->operator==(forward<Ts>(strings)) || ...);
+    }
+
+    template<typename... Ts>
+    [[nodiscard]] ALWAYS_INLINE bool is_one_of_ignoring_ascii_case(Ts&&... strings) const
+    {
+        return (this->equals_ignoring_ascii_case(forward<Ts>(strings)) || ...);
+    }
+
+    [[nodiscard]] ALWAYS_INLINE u32 hash() const
+    {
+        if (has_short_ascii_storage())
+            return StringView { short_ascii_string_without_union_member_assertion().bytes() }.hash();
+        return data_without_union_member_assertion()->hash();
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool is_empty() const { return length_in_code_units() == 0uz; }
+    [[nodiscard]] ALWAYS_INLINE bool is_ascii() const { return utf16_view().is_ascii(); }
+
+    [[nodiscard]] ALWAYS_INLINE size_t length_in_code_units() const
+    {
+        if (has_short_ascii_storage())
+            return short_ascii_string_without_union_member_assertion().byte_count();
+        return data_without_union_member_assertion()->length_in_code_units();
+    }
+
+    [[nodiscard]] ALWAYS_INLINE size_t length_in_code_points() const
+    {
+        if (has_short_ascii_storage())
+            return short_ascii_string_without_union_member_assertion().byte_count();
+        return data_without_union_member_assertion()->length_in_code_points();
+    }
+
+    [[nodiscard]] ALWAYS_INLINE char16_t code_unit_at(size_t code_unit_offset) const { return utf16_view().code_unit_at(code_unit_offset); }
+    [[nodiscard]] ALWAYS_INLINE u32 code_point_at(size_t code_unit_offset) const { return utf16_view().code_point_at(code_unit_offset); }
+
+    [[nodiscard]] ALWAYS_INLINE size_t code_unit_offset_of(size_t code_point_offset) const
+    {
+        if (has_ascii_storage())
+            return code_point_offset;
+        return utf16_view().code_unit_offset_of(code_point_offset);
+    }
+
+    [[nodiscard]] ALWAYS_INLINE size_t code_point_offset_of(size_t code_unit_offset) const
+    {
+        if (has_ascii_storage())
+            return code_unit_offset;
+        return utf16_view().code_point_offset_of(code_unit_offset);
+    }
+
+    [[nodiscard]] ALWAYS_INLINE Utf16CodePointIterator begin() const { return utf16_view().begin(); }
+    [[nodiscard]] ALWAYS_INLINE Utf16CodePointIterator end() const { return utf16_view().end(); }
+
+    [[nodiscard]] ALWAYS_INLINE Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const
+    {
+        return utf16_view().substring_view(code_unit_offset, code_unit_length);
+    }
+
+    [[nodiscard]] ALWAYS_INLINE Utf16View substring_view(size_t code_unit_offset) const
+    {
+        return utf16_view().substring_view(code_unit_offset);
+    }
+
+    ALWAYS_INLINE Optional<size_t> find_code_unit_offset(char16_t needle, size_t start_offset = 0) const
+    {
+        return utf16_view().find_code_unit_offset(needle, start_offset);
+    }
+
+    ALWAYS_INLINE Optional<size_t> find_code_unit_offset(Utf16View const& needle, size_t start_offset = 0) const
+    {
+        return utf16_view().find_code_unit_offset(needle, start_offset);
+    }
+
+    ALWAYS_INLINE Optional<size_t> find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset = 0) const
+    {
+        return utf16_view().find_code_unit_offset_ignoring_case(needle, start_offset);
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool starts_with(Utf16View const& needle) const { return utf16_view().starts_with(needle); }
+
+    // This is primarily interesting to unit tests.
+    [[nodiscard]] constexpr bool has_short_ascii_storage() const
+    {
+        if (is_constant_evaluated())
+            return (m_value.short_ascii_string.byte_count_and_short_string_flag & StringBase::SHORT_STRING_FLAG) != 0;
+        return (short_ascii_string_without_union_member_assertion().byte_count_and_short_string_flag & StringBase::SHORT_STRING_FLAG) != 0;
+    }
+
+    // This is primarily interesting to unit tests.
+    [[nodiscard]] ALWAYS_INLINE bool has_long_ascii_storage() const
+    {
+        if (has_short_ascii_storage())
+            return false;
+        return data_without_union_member_assertion()->has_ascii_storage();
+    }
+
+    // This is primarily interesting to unit tests.
+    [[nodiscard]] ALWAYS_INLINE bool has_ascii_storage() const
+    {
+        return has_short_ascii_storage() || has_long_ascii_storage();
+    }
+
+    // This is primarily interesting to unit tests.
+    [[nodiscard]] ALWAYS_INLINE bool has_long_utf16_storage() const
+    {
+        if (has_short_ascii_storage())
+            return false;
+        return data_without_union_member_assertion()->has_utf16_storage();
+    }
+
+    // This is primarily interesting to unit tests.
+    [[nodiscard]] ALWAYS_INLINE bool has_long_storage() const
+    {
+        return !has_short_ascii_storage();
+    }
+
+protected:
+    ALWAYS_INLINE void destroy_string() const
+    {
+        if (has_long_storage())
+            data_without_union_member_assertion()->unref();
+    }
+
+    // This is technically **invalid**! See StringBase for details.
+    ALWAYS_INLINE ShortString const& short_ascii_string_without_union_member_assertion() const { return *__builtin_launder(&m_value.short_ascii_string); }
+    ALWAYS_INLINE Utf16StringData const* data_without_union_member_assertion() const { return *__builtin_launder(&m_value.data); }
+
+    union {
+        ShortString short_ascii_string;
+        Utf16StringData const* data;
+    } m_value;
+};
+
+}
--- a/AK/Utf16StringData.cpp
+++ b/AK/Utf16StringData.cpp
@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <AK/TypedTransfer.h>
+#include <AK/Utf16StringData.h>
+#include <AK/Utf32View.h>
+#include <AK/Utf8View.h>
+
+#include <simdutf.h>
+
+namespace AK::Detail {
+
+// Due to internal optimizations, we have an explicit maximum string length of 2**63 - 1.
+#define VERIFY_UTF16_LENGTH(length) VERIFY(length >> Detail::UTF16_FLAG == 0);
+
+NonnullRefPtr<Utf16StringData> Utf16StringData::create_uninitialized(StorageType storage_type, size_t code_unit_length)
+{
+    auto allocation_size = storage_type == Utf16StringData::StorageType::ASCII
+        ? sizeof(Utf16StringData) + (sizeof(char) * code_unit_length)
+        : sizeof(Utf16StringData) + (sizeof(char16_t) * code_unit_length);
+
+    void* slot = malloc(allocation_size);
+    VERIFY(slot);
+
+    return adopt_ref(*new (slot) Utf16StringData(storage_type, code_unit_length));
+}
+
+template<typename ViewType>
+NonnullRefPtr<Utf16StringData> Utf16StringData::create_from_code_point_iterable(ViewType const& view)
+{
+    size_t code_unit_length = 0;
+    size_t code_point_length = 0;
+
+    for (auto code_point : view) {
+        code_unit_length += UnicodeUtils::code_unit_length_for_code_point(code_point);
+        ++code_point_length;
+    }
+
+    VERIFY_UTF16_LENGTH(code_unit_length);
+
+    auto string = create_uninitialized(StorageType::UTF16, code_unit_length);
+    string->m_length_in_code_points = code_point_length;
+
+    size_t code_unit_index = 0;
+
+    for (auto code_point : view) {
+        (void)UnicodeUtils::code_point_to_utf16(code_point, [&](auto code_unit) {
+            string->m_utf16_data[code_unit_index++] = code_unit;
+        });
+    }
+
+    return string;
+}
+
+NonnullRefPtr<Utf16StringData> Utf16StringData::from_utf8(StringView utf8_string, AllowASCIIStorage allow_ascii_storage)
+{
+    RefPtr<Utf16StringData> string;
+
+    if (allow_ascii_storage == AllowASCIIStorage::Yes && utf8_string.is_ascii()) {
+        VERIFY_UTF16_LENGTH(utf8_string.length());
+
+        string = create_uninitialized(StorageType::ASCII, utf8_string.length());
+        TypedTransfer<char>::copy(string->m_ascii_data, utf8_string.characters_without_null_termination(), utf8_string.length());
+    } else if (Utf8View view { utf8_string }; view.validate(AllowLonelySurrogates::No)) {
+        auto code_unit_length = simdutf::utf16_length_from_utf8(utf8_string.characters_without_null_termination(), utf8_string.length());
+        VERIFY_UTF16_LENGTH(code_unit_length);
+
+        string = create_uninitialized(StorageType::UTF16, code_unit_length);
+
+        auto result = simdutf::convert_utf8_to_utf16(utf8_string.characters_without_null_termination(), utf8_string.length(), string->m_utf16_data);
+        VERIFY(result == code_unit_length);
+    } else {
+        string = create_from_code_point_iterable(view);
+    }
+
+    return string.release_nonnull();
+}
+
+NonnullRefPtr<Utf16StringData> Utf16StringData::from_utf16(Utf16View const& utf16_string)
+{
+    VERIFY_UTF16_LENGTH(utf16_string.length_in_code_units());
+    RefPtr<Utf16StringData> string;
+
+    if (utf16_string.has_ascii_storage()) {
+        string = create_uninitialized(StorageType::ASCII, utf16_string.length_in_code_units());
+        TypedTransfer<char>::copy(string->m_ascii_data, utf16_string.ascii_span().data(), utf16_string.length_in_code_units());
+    } else if (utf16_string.is_ascii()) {
+        string = create_uninitialized(StorageType::ASCII, utf16_string.length_in_code_units());
+
+        auto result = simdutf::convert_utf16_to_utf8(utf16_string.utf16_span().data(), utf16_string.length_in_code_units(), string->m_ascii_data);
+        VERIFY(result == utf16_string.length_in_code_units());
+    } else {
+        string = create_uninitialized(StorageType::UTF16, utf16_string.length_in_code_units());
+        TypedTransfer<char16_t>::copy(string->m_utf16_data, utf16_string.utf16_span().data(), utf16_string.length_in_code_units());
+
+        string->m_length_in_code_points = utf16_string.m_length_in_code_points;
+    }
+
+    return string.release_nonnull();
+}
+
+NonnullRefPtr<Utf16StringData> Utf16StringData::from_utf32(Utf32View const& utf32_string)
+{
+    RefPtr<Utf16StringData> string;
+
+    auto const* utf32_data = reinterpret_cast<char32_t const*>(utf32_string.code_points());
+    auto utf32_length = utf32_string.length();
+
+    if (utf32_string.is_ascii()) {
+        VERIFY_UTF16_LENGTH(utf32_length);
+
+        string = create_uninitialized(StorageType::ASCII, utf32_length);
+
+        auto result = simdutf::convert_utf32_to_utf8(utf32_data, utf32_length, string->m_ascii_data);
+        VERIFY(result == utf32_length);
+    } else if (simdutf::validate_utf32(utf32_data, utf32_length)) {
+        auto code_unit_length = simdutf::utf16_length_from_utf32(utf32_data, utf32_length);
+        VERIFY_UTF16_LENGTH(code_unit_length);
+
+        string = create_uninitialized(StorageType::UTF16, code_unit_length);
+        string->m_length_in_code_points = utf32_length;
+
+        auto result = simdutf::convert_utf32_to_utf16(utf32_data, utf32_length, string->m_utf16_data);
+        VERIFY(result == code_unit_length);
+    } else {
+        string = create_from_code_point_iterable(utf32_string);
+    }
+
+    return string.release_nonnull();
+}
+
+size_t Utf16StringData::calculate_code_point_length() const
+{
+    ASSERT(!has_ascii_storage());
+
+    if (simdutf::validate_utf16(m_utf16_data, length_in_code_units()))
+        return simdutf::count_utf16(m_utf16_data, length_in_code_units());
+
+    size_t code_points = 0;
+    for ([[maybe_unused]] auto code_point : utf16_view())
+        ++code_points;
+    return code_points;
+}
+
+}
--- a/AK/Utf16StringData.h
+++ b/AK/Utf16StringData.h
@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/NonnullRefPtr.h>
+#include <AK/NumericLimits.h>
+#include <AK/RefCounted.h>
+#include <AK/Span.h>
+#include <AK/StringView.h>
+#include <AK/Types.h>
+#include <AK/Utf16View.h>
+
+namespace AK::Detail {
+
+class Utf16StringData final : public RefCounted<Utf16StringData> {
+public:
+    enum class StorageType : u8 {
+        ASCII,
+        UTF16,
+    };
+
+    enum class AllowASCIIStorage : u8 {
+        No,
+        Yes,
+    };
+
+    static NonnullRefPtr<Utf16StringData> from_utf8(StringView, AllowASCIIStorage);
+    static NonnullRefPtr<Utf16StringData> from_utf16(Utf16View const&);
+    static NonnullRefPtr<Utf16StringData> from_utf32(Utf32View const&);
+
+    ~Utf16StringData() = default;
+
+    void operator delete(void* ptr)
+    {
+        free(ptr);
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool operator==(Utf16StringData const& other) const
+    {
+        return utf16_view() == other.utf16_view();
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool operator==(Utf16View const& other) const
+    {
+        return utf16_view() == other;
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool operator==(StringView const& other) const
+    {
+        if (has_ascii_storage())
+            return ascii_view() == other;
+        return utf16_view() == Utf16View { other.characters_without_null_termination(), other.length() };
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool has_ascii_storage() const { return m_length_in_code_units >> Detail::UTF16_FLAG == 0; }
+    [[nodiscard]] ALWAYS_INLINE bool has_utf16_storage() const { return m_length_in_code_units >> Detail::UTF16_FLAG != 0; }
+
+    ALWAYS_INLINE u32 hash() const
+    {
+        if (!m_has_hash)
+            m_hash = calculate_hash();
+        return m_hash;
+    }
+
+    [[nodiscard]] ALWAYS_INLINE size_t length_in_code_units() const { return m_length_in_code_units & ~(1uz << Detail::UTF16_FLAG); }
+    [[nodiscard]] ALWAYS_INLINE size_t length_in_code_points() const
+    {
+        if (has_ascii_storage())
+            return length_in_code_units();
+        if (m_length_in_code_points == NumericLimits<size_t>::max())
+            m_length_in_code_points = calculate_code_point_length();
+        return m_length_in_code_points;
+    }
+
+    [[nodiscard]] ALWAYS_INLINE StringView ascii_view() const
+    {
+        ASSERT(has_ascii_storage());
+        return { m_ascii_data, length_in_code_units() };
+    }
+
+    [[nodiscard]] ALWAYS_INLINE Utf16View utf16_view() const
+    {
+        if (has_ascii_storage())
+            return { m_ascii_data, length_in_code_units() };
+
+        Utf16View view { m_utf16_data, length_in_code_units() };
+        view.m_length_in_code_points = m_length_in_code_points;
+
+        return view;
+    }
+
+private:
+    ALWAYS_INLINE Utf16StringData(StorageType storage_type, size_t code_unit_length)
+        : m_length_in_code_units(code_unit_length)
+    {
+        if (storage_type == StorageType::UTF16)
+            m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
+    }
+
+    static NonnullRefPtr<Utf16StringData> create_uninitialized(StorageType storage_type, size_t code_unit_length);
+
+    template<typename ViewType>
+    static NonnullRefPtr<Utf16StringData> create_from_code_point_iterable(ViewType const&);
+
+    [[nodiscard]] size_t calculate_code_point_length() const;
+
+    [[nodiscard]] ALWAYS_INLINE u32 calculate_hash() const
+    {
+        if (has_ascii_storage())
+            return ascii_view().hash();
+        return utf16_view().hash();
+    }
+
+    // We store whether this string has ASCII or UTF-16 storage by setting the most significant bit of m_length_in_code_units
+    // to 1 for UTF-16 storage. This shrinks the size of most UTF-16 string related classes, at the cost of not being
+    // allowed to create a string larger than 2**63 - 1.
+    size_t m_length_in_code_units { 0 };
+    mutable size_t m_length_in_code_points { NumericLimits<size_t>::max() };
+
+    mutable u32 m_hash { 0 };
+    mutable bool m_has_hash { false };
+
+    union {
+        char m_ascii_data[0];
+        char16_t m_utf16_data[0];
+    };
+};
+
+}
--- a/AK/Utf16View.cpp
+++ b/AK/Utf16View.cpp
@ -99,15 +99,19 @@ ErrorOr<String> Utf16View::to_utf8(AllowLonelySurrogates allow_lonely_surrogates
 {
    if (is_empty())
        return String {};
+    if (has_ascii_storage())
+        return String::from_utf8_without_validation(bytes());
+
    if (!validate(allow_lonely_surrogates))
        return Error::from_string_literal("Input was not valid UTF-16");

    if (allow_lonely_surrogates == AllowLonelySurrogates::No) {
        String result;
-        auto utf8_length = simdutf::utf8_length_from_utf16(m_string, length_in_code_units());
+
+        auto utf8_length = simdutf::utf8_length_from_utf16(m_string.utf16, length_in_code_units());

        TRY(result.replace_with_new_string(Badge<Utf16View> {}, utf8_length, [&](Bytes buffer) -> ErrorOr<void> {
-            [[maybe_unused]] auto result = simdutf::convert_utf16_to_utf8(m_string, length_in_code_units(), reinterpret_cast<char*>(buffer.data()));
+            [[maybe_unused]] auto result = simdutf::convert_utf16_to_utf8(m_string.utf16, length_in_code_units(), reinterpret_cast<char*>(buffer.data()));
            ASSERT(result == buffer.size());
            return {};
        }));
@ -127,17 +131,25 @@ ErrorOr<ByteString> Utf16View::to_byte_string(AllowLonelySurrogates allow_lonely

 bool Utf16View::is_ascii() const
 {
+    if (has_ascii_storage())
+        return true;
+
    // FIXME: Petition simdutf to implement an ASCII validator for UTF-16.
-    return all_of(span(), AK::is_ascii);
+    return all_of(utf16_span(), AK::is_ascii);
 }

 bool Utf16View::validate(size_t& valid_code_units, AllowLonelySurrogates allow_lonely_surrogates) const
 {
+    if (has_ascii_storage()) {
+        valid_code_units = length_in_code_units();
+        return true;
+    }
+
    auto view = *this;
    valid_code_units = 0;

    while (!view.is_empty()) {
-        auto result = simdutf::validate_utf16_with_errors(view.m_string, view.length_in_code_units());
+        auto result = simdutf::validate_utf16_with_errors(view.m_string.utf16, view.length_in_code_units());
        valid_code_units += result.count;

        if (result.error == simdutf::SUCCESS)
@ -197,7 +209,9 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod
        return substring_view(code_point_offset, code_point_length);

    auto code_unit_offset_of = [&](Utf16CodePointIterator const& it) {
-        return it.m_iterator - m_string;
+        if (has_ascii_storage())
+            return it.m_iterator.ascii - m_string.ascii;
+        return it.m_iterator.utf16 - m_string.utf16;
    };

    size_t code_point_index = 0;
@ -220,9 +234,11 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod

 size_t Utf16View::calculate_length_in_code_points() const
 {
+    ASSERT(!has_ascii_storage());
+
    // simdutf's code point length method assumes valid UTF-16, whereas we allow lonely surrogates.
    if (validate(AllowLonelySurrogates::No)) [[likely]]
-        return simdutf::count_utf16(m_string, length_in_code_units());
+        return simdutf::count_utf16(m_string.utf16, length_in_code_units());

    size_t code_points = 0;
    for ([[maybe_unused]] auto code_point : *this)
--- a/AK/Utf16View.h
+++ b/AK/Utf16View.h
@ -37,6 +37,13 @@ ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const&);

 size_t utf16_code_unit_length_from_utf8(StringView);

+namespace Detail {
+
+static constexpr inline auto UTF16_FLAG = NumericLimits<size_t>::digits() - 1;
+class Utf16StringBase;
+
+}
+
 class Utf16CodePointIterator {
    friend class Utf16View;

@ -46,23 +53,35 @@ public:

    constexpr Utf16CodePointIterator& operator++()
    {
-        VERIFY(m_remaining_code_units > 0);
+        auto remaining_code_units = this->remaining_code_units();
+        VERIFY(remaining_code_units > 0);

-        auto length = min(length_in_code_units(), m_remaining_code_units);
-        m_iterator += length;
+        if (has_ascii_storage()) {
+            ++m_iterator.ascii;
+            --m_remaining_code_units;
+        } else {
+            auto length = min(length_in_code_units(), remaining_code_units);
+
+            m_iterator.utf16 += length;
            m_remaining_code_units -= length;
+        }

        return *this;
    }

    constexpr u32 operator*() const
    {
-        VERIFY(m_remaining_code_units > 0);
-        auto code_unit = *m_iterator;
+        auto remaining_code_units = this->remaining_code_units();
+        VERIFY(remaining_code_units > 0);
+
+        if (has_ascii_storage())
+            return *m_iterator.ascii;
+
+        auto code_unit = *m_iterator.utf16;

        if (UnicodeUtils::is_utf16_high_surrogate(code_unit)) {
-            if (m_remaining_code_units > 1) {
-                auto next_code_unit = *(m_iterator + 1);
+            if (remaining_code_units > 1) {
+                auto next_code_unit = *(m_iterator.utf16 + 1);

                if (UnicodeUtils::is_utf16_low_surrogate(next_code_unit))
                    return UnicodeUtils::decode_utf16_surrogate_pair(code_unit, next_code_unit);
@ -79,22 +98,46 @@ public:

    [[nodiscard]] constexpr bool operator==(Utf16CodePointIterator const& other) const
    {
-        return (m_iterator == other.m_iterator) && (m_remaining_code_units == other.m_remaining_code_units);
+        // Note that this also protects against iterators with different underlying storage.
+        if (m_remaining_code_units != other.m_remaining_code_units)
+            return false;
+
+        if (has_ascii_storage())
+            return m_iterator.ascii == other.m_iterator.ascii;
+        return m_iterator.utf16 == other.m_iterator.utf16;
    }

-    [[nodiscard]] constexpr size_t length_in_code_units() const
+    [[nodiscard]] ALWAYS_INLINE size_t length_in_code_units()
    {
+        if (has_ascii_storage())
+            return 1;
        return UnicodeUtils::code_unit_length_for_code_point(**this);
    }

 private:
-    constexpr Utf16CodePointIterator(char16_t const* ptr, size_t length)
-        : m_iterator(ptr)
+    constexpr Utf16CodePointIterator(char const* iterator, size_t length)
+        : m_iterator { .ascii = iterator }
        , m_remaining_code_units(length)
    {
    }

-    char16_t const* m_iterator { nullptr };
+    constexpr Utf16CodePointIterator(char16_t const* iterator, size_t length)
+        : m_iterator { .utf16 = iterator }
+        , m_remaining_code_units(length)
+    {
+        m_remaining_code_units |= 1uz << Detail::UTF16_FLAG;
+    }
+
+    constexpr bool has_ascii_storage() const { return m_remaining_code_units >> Detail::UTF16_FLAG == 0; }
+    constexpr size_t remaining_code_units() const { return m_remaining_code_units & ~(1uz << Detail::UTF16_FLAG); }
+
+    union {
+        char const* ascii;
+        char16_t const* utf16;
+    } m_iterator { .ascii = nullptr };
+
+    // Just like Utf16StringData, we store whether this string has ASCII or UTF-16 storage by setting the most
+    // significant bit of m_remaining_code_units for UTF-16 storage.
    size_t m_remaining_code_units { 0 };
 };

@ -106,38 +149,86 @@ public:
    ~Utf16View() = default;

    constexpr Utf16View(char16_t const* string, size_t length_in_code_units)
-        : m_string(string)
+        : m_string { .utf16 = string }
        , m_length_in_code_units(length_in_code_units)
    {
+        m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
    }

    constexpr Utf16View(Utf16Data const& string)
-        : m_string(string.data())
+        : m_string { .utf16 = string.data() }
        , m_length_in_code_units(string.size())
    {
+        m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
+    }
+
+    consteval Utf16View(StringView string)
+        : m_string { .ascii = string.characters_without_null_termination() }
+        , m_length_in_code_units(string.length())
+    {
+        VERIFY(all_of(string, AK::is_ascii));
    }

    Utf16View(Utf16ConversionResult&&) = delete;
    explicit Utf16View(Utf16ConversionResult const& conversion_result)
-        : m_string(conversion_result.data.data())
+        : m_string { .utf16 = conversion_result.data.data() }
        , m_length_in_code_units(conversion_result.data.size())
        , m_length_in_code_points(conversion_result.code_point_count)
    {
+        m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
    }

    ErrorOr<String> to_utf8(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
    ErrorOr<ByteString> to_byte_string(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;

-    [[nodiscard]] constexpr ReadonlySpan<char16_t> span() const
+    ALWAYS_INLINE String to_utf8_but_should_be_ported_to_utf16(AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const
    {
-        return { m_string, length_in_code_units() };
+        return MUST(to_utf8(allow_lonely_surrogates));
+    }
+
+    [[nodiscard]] ALWAYS_INLINE bool has_ascii_storage() const { return m_length_in_code_units >> Detail::UTF16_FLAG == 0; }
+
+    [[nodiscard]] constexpr ReadonlyBytes bytes() const
+    {
+        VERIFY(has_ascii_storage());
+        return { m_string.ascii, length_in_code_units() };
+    }
+
+    [[nodiscard]] constexpr ReadonlySpan<char> ascii_span() const
+    {
+        VERIFY(has_ascii_storage());
+        return { m_string.ascii, length_in_code_units() };
+    }
+
+    [[nodiscard]] constexpr ReadonlySpan<char16_t> utf16_span() const
+    {
+        VERIFY(!has_ascii_storage());
+        return { m_string.utf16, length_in_code_units() };
    }

    [[nodiscard]] constexpr bool operator==(Utf16View const& other) const
    {
        if (length_in_code_units() != other.length_in_code_units())
            return false;
-        return TypedTransfer<char16_t>::compare(m_string, other.m_string, length_in_code_units());
+
+        if (has_ascii_storage() && other.has_ascii_storage())
+            return TypedTransfer<char>::compare(m_string.ascii, other.m_string.ascii, length_in_code_units());
+        if (!has_ascii_storage() && !other.has_ascii_storage())
+            return TypedTransfer<char16_t>::compare(m_string.utf16, other.m_string.utf16, length_in_code_units());
+
+        for (size_t i = 0; i < length_in_code_units(); ++i) {
+            if (code_unit_at(i) != other.code_unit_at(i))
+                return false;
+        }
+
+        return true;
+    }
+
+    [[nodiscard]] constexpr bool operator==(StringView other) const
+    {
+        if (has_ascii_storage())
+            return bytes() == other.bytes();
+        return *this == Utf16View { other.characters_without_null_termination(), other.length() };
    }

    [[nodiscard]] constexpr bool equals_ignoring_case(Utf16View const& other) const
@ -175,10 +266,18 @@ public:
    {
        if (is_empty())
            return 0;
-        return string_hash(reinterpret_cast<char const*>(m_string), length_in_code_units() * sizeof(char16_t));
+        if (has_ascii_storage())
+            return string_hash(m_string.ascii, length_in_code_units());
+        return string_hash(reinterpret_cast<char const*>(m_string.utf16), length_in_code_units() * sizeof(char16_t));
+    }
+
+    [[nodiscard]] constexpr bool is_null() const
+    {
+        if (has_ascii_storage())
+            return m_string.ascii == nullptr;
+        return m_string.utf16 == nullptr;
    }

-    [[nodiscard]] constexpr bool is_null() const { return m_string == nullptr; }
    [[nodiscard]] constexpr bool is_empty() const { return length_in_code_units() == 0; }
    [[nodiscard]] bool is_ascii() const;

@ -190,10 +289,13 @@ public:

    [[nodiscard]] bool validate(size_t& valid_code_units, AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;

-    [[nodiscard]] constexpr size_t length_in_code_units() const { return m_length_in_code_units; }
+    [[nodiscard]] constexpr size_t length_in_code_units() const { return m_length_in_code_units & ~(1uz << Detail::UTF16_FLAG); }

    [[nodiscard]] ALWAYS_INLINE size_t length_in_code_points() const
    {
+        if (has_ascii_storage())
+            return m_length_in_code_units;
+
        if (m_length_in_code_points == NumericLimits<size_t>::max())
            m_length_in_code_points = calculate_length_in_code_points();
        return m_length_in_code_points;
@ -201,6 +303,9 @@ public:

    constexpr Optional<size_t> length_in_code_points_if_known() const
    {
+        if (has_ascii_storage())
+            return m_length_in_code_units;
+
        if (m_length_in_code_points == NumericLimits<size_t>::max())
            return {};
        return m_length_in_code_points;
@ -211,7 +316,10 @@ public:
    [[nodiscard]] constexpr char16_t code_unit_at(size_t index) const
    {
        VERIFY(index < length_in_code_units());
-        return m_string[index];
+
+        if (has_ascii_storage())
+            return m_string.ascii[index];
+        return m_string.utf16[index];
    }

    [[nodiscard]] constexpr u32 code_point_at(size_t index) const
@ -236,18 +344,25 @@ public:

    [[nodiscard]] constexpr Utf16CodePointIterator begin() const
    {
-        return { m_string, length_in_code_units() };
+        if (has_ascii_storage())
+            return { m_string.ascii, length_in_code_units() };
+        return { m_string.utf16, length_in_code_units() };
    }

    [[nodiscard]] constexpr Utf16CodePointIterator end() const
    {
-        return { m_string + length_in_code_units(), 0 };
+        if (has_ascii_storage())
+            return { m_string.ascii + length_in_code_units(), 0 };
+        return { m_string.utf16 + length_in_code_units(), 0 };
    }

    [[nodiscard]] constexpr Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const
    {
        VERIFY(code_unit_offset + code_unit_length <= length_in_code_units());
-        return { m_string + code_unit_offset, code_unit_length };
+
+        if (has_ascii_storage())
+            return { m_string.ascii + code_unit_offset, code_unit_length };
+        return { m_string.utf16 + code_unit_offset, code_unit_length };
    }

    [[nodiscard]] constexpr Utf16View substring_view(size_t code_unit_offset) const { return substring_view(code_unit_offset, length_in_code_units() - code_unit_offset); }
@ -259,12 +374,42 @@ public:
    {
        if (start_offset >= length_in_code_units())
            return {};
-        return AK::memmem_optional(m_string + start_offset, (length_in_code_units() - start_offset) * sizeof(char16_t), &needle, sizeof(needle));
+
+        if (has_ascii_storage()) {
+            if (!AK::is_ascii(needle))
+                return false;
+
+            auto byte = static_cast<char>(needle);
+            return AK::memmem_optional(m_string.ascii + start_offset, length_in_code_units() - start_offset, &byte, sizeof(byte));
+        }
+
+        return AK::memmem_optional(m_string.utf16 + start_offset, (length_in_code_units() - start_offset) * sizeof(char16_t), &needle, sizeof(needle));
    }

    constexpr Optional<size_t> find_code_unit_offset(Utf16View const& needle, size_t start_offset = 0) const
    {
-        return span().index_of(needle.span(), start_offset);
+        if (has_ascii_storage() && needle.has_ascii_storage())
+            return ascii_span().index_of(needle.ascii_span(), start_offset);
+        if (!has_ascii_storage() && !needle.has_ascii_storage())
+            return utf16_span().index_of(needle.utf16_span(), start_offset);
+
+        Checked maximum_offset { start_offset };
+        maximum_offset += needle.length_in_code_units();
+        if (maximum_offset.has_overflow() || maximum_offset.value() > length_in_code_units())
+            return {};
+
+        if (needle.is_empty())
+            return start_offset;
+
+        for (size_t index = start_offset; index <= length_in_code_units() - needle.length_in_code_units();) {
+            auto slice = substring_view(index, needle.length_in_code_units());
+            if (slice == needle)
+                return index;
+
+            index += slice.begin().length_in_code_units();
+        }
+
+        return {};
    }

    constexpr Optional<size_t> find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset = 0) const
@ -298,9 +443,24 @@ public:
        if (needle.length_in_code_units() > length_in_code_units())
            return false;

-        if (m_string == needle.m_string)
+        if (has_ascii_storage() && needle.has_ascii_storage()) {
+            if (m_string.ascii == needle.m_string.ascii)
+                return true;
+            return ascii_span().starts_with(needle.ascii_span());
+        }
+
+        if (!has_ascii_storage() && !needle.has_ascii_storage()) {
+            if (m_string.utf16 == needle.m_string.utf16)
+                return true;
+            return utf16_span().starts_with(needle.utf16_span());
+        }
+
+        for (auto this_it = begin(), needle_it = needle.begin(); needle_it != needle.end(); ++needle_it, ++this_it) {
+            if (*this_it != *needle_it)
+                return false;
+        }
+
        return true;
-        return span().starts_with(needle.span());
    }

    // https://infra.spec.whatwg.org/#code-unit-less-than
@ -320,9 +480,24 @@ public:
    }

 private:
+    friend Detail::Utf16StringBase;
+    friend Detail::Utf16StringData;
+
+    constexpr Utf16View(char const* string, size_t length_in_code_units)
+        : m_string { .ascii = string }
+        , m_length_in_code_units(length_in_code_units)
+    {
+    }
+
    [[nodiscard]] size_t calculate_length_in_code_points() const;

-    char16_t const* m_string { nullptr };
+    union {
+        char const* ascii;
+        char16_t const* utf16;
+    } m_string { .ascii = nullptr };
+
+    // Just like Utf16StringData, we store whether this string has ASCII or UTF-16 storage by setting the most
+    // significant bit of m_code_unit_length for UTF-16 storage.
    size_t m_length_in_code_units { 0 };
    mutable size_t m_length_in_code_points { NumericLimits<size_t>::max() };
 };
@ -342,6 +517,16 @@ struct Traits<Utf16View> : public DefaultTraits<Utf16View> {
    static unsigned hash(Utf16View const& s) { return s.hash(); }
 };

+namespace Detail {
+
+template<>
+inline constexpr bool IsHashCompatible<Utf16View, Utf16String> = true;
+
+template<>
+inline constexpr bool IsHashCompatible<Utf16String, Utf16View> = true;
+
+}
+
 }

 [[nodiscard]] ALWAYS_INLINE AK_STRING_VIEW_LITERAL_CONSTEVAL AK::Utf16View operator""sv(char16_t const* string, size_t length)
--- a/AK/Utf32View.h
+++ b/AK/Utf32View.h
@ -6,7 +6,9 @@

 #pragma once

+#include <AK/AllOf.h>
 #include <AK/Assertions.h>
+#include <AK/CharacterTypes.h>
 #include <AK/Checked.h>
 #include <AK/Format.h>
 #include <AK/Types.h>
@ -99,6 +101,12 @@ public:
    bool is_null() const { return !m_code_points; }
    size_t length() const { return m_length; }

+    bool is_ascii() const
+    {
+        // FIXME: Petition simdutf to implement an ASCII validator for UTF-32.
+        return all_of(*this, AK::is_ascii);
+    }
+
    size_t iterator_offset(Utf32CodePointIterator const& it) const
    {
        VERIFY(it.m_ptr >= m_code_points);
--- a/Libraries/LibJS/Runtime/AbstractOperations.cpp
+++ b/Libraries/LibJS/Runtime/AbstractOperations.cpp
@ -1451,7 +1451,7 @@ ThrowCompletionOr<String> get_substitution(VM& vm, Utf16View const& matched, Utf
        auto ref_length = ref.length_in_code_units();

        // k. Set result to the string-concatenation of result and refReplacement.
-        result.append(ref_replacement.span().data(), ref_replacement.length_in_code_units());
+        result.append(ref_replacement.utf16_span().data(), ref_replacement.length_in_code_units());

        // j. Set templateRemainder to the substring of templateRemainder from refLength.
        // NOTE: We do this step last because refReplacement may point to templateRemainder.
--- a/Libraries/LibJS/Runtime/Utf16String.cpp
+++ b/Libraries/LibJS/Runtime/Utf16String.cpp
@ -44,7 +44,13 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
 {
    Utf16Data string;
    string.ensure_capacity(view.length_in_code_units());
-    string.unchecked_append(view.span().data(), view.length_in_code_units());
+
+    if (view.has_ascii_storage()) {
+        for (size_t i = 0; i < view.length_in_code_units(); ++i)
+            string.unchecked_append(static_cast<char16_t>(view.code_unit_at(i)));
+    } else {
+        string.unchecked_append(view.utf16_span().data(), view.length_in_code_units());
+    }

    auto impl = create(move(string));
    if (auto length_in_code_points = view.length_in_code_points_if_known(); length_in_code_points.has_value())
--- a/Libraries/LibUnicode/Segmenter.cpp
+++ b/Libraries/LibUnicode/Segmenter.cpp
@ -75,7 +75,12 @@ public:

    virtual void set_segmented_text(Utf16View const& text) override
    {
-        m_segmented_text = icu::UnicodeString { text.span().data(), static_cast<i32>(text.length_in_code_units()) };
+        if (text.has_ascii_storage()) {
+            set_segmented_text(MUST(text.to_utf8()));
+            return;
+        }
+
+        m_segmented_text = icu::UnicodeString { text.utf16_span().data(), static_cast<i32>(text.length_in_code_units()) };
        m_segmenter->setText(m_segmented_text.get<icu::UnicodeString>());
    }

--- a/Libraries/LibWeb/DOM/CharacterData.cpp
+++ b/Libraries/LibWeb/DOM/CharacterData.cpp
@ -89,9 +89,9 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun

    Utf16Data full_data;
    full_data.ensure_capacity(before_data.length_in_code_units() + inserted_data_result.data.size() + after_data.length_in_code_units());
-    full_data.append(before_data.span().data(), before_data.length_in_code_units());
+    full_data.append(before_data.utf16_span().data(), before_data.length_in_code_units());
    full_data.extend(inserted_data_result.data);
-    full_data.append(after_data.span().data(), after_data.length_in_code_units());
+    full_data.append(after_data.utf16_span().data(), after_data.length_in_code_units());
    Utf16View full_view { full_data };

    bool characters_are_the_same = utf16_view == full_view;
--- a/Tests/AK/CMakeLists.txt
+++ b/Tests/AK/CMakeLists.txt
@ -76,6 +76,7 @@ set(AK_TEST_SOURCES
    TestTypeTraits.cpp
    TestTypedTransfer.cpp
    TestUFixedBigInt.cpp
+    TestUtf16String.cpp
    TestUtf16View.cpp
    TestUtf8View.cpp
    TestVariant.cpp
--- a/Tests/AK/TestUtf16String.cpp
+++ b/Tests/AK/TestUtf16String.cpp
@ -0,0 +1,516 @@
+/*
+ * Copyright (c) 2025, Tim Flynn <trflynn89@ladybird.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibTest/TestCase.h>
+
+#include <AK/Array.h>
+#include <AK/CharacterTypes.h>
+#include <AK/Enumerate.h>
+#include <AK/StringBuilder.h>
+#include <AK/Utf16String.h>
+#include <AK/Utf32View.h>
+
+static Utf16String make_copy(Utf16String const& string)
+{
+    return string.has_ascii_storage()
+        ? Utf16String::from_utf8(string.ascii_view())
+        : Utf16String::from_utf16(string.utf16_view());
+}
+
+TEST_CASE(empty_string)
+{
+    Utf16String string {};
+    EXPECT(string.is_empty());
+    EXPECT(string.is_ascii());
+    EXPECT(!string.has_long_ascii_storage());
+    EXPECT(string.has_short_ascii_storage());
+    EXPECT_EQ(string.length_in_code_units(), 0uz);
+    EXPECT_EQ(string.length_in_code_points(), 0uz);
+    EXPECT_EQ(string.ascii_view(), StringView {});
+}
+
+TEST_CASE(from_utf8)
+{
+    {
+        auto string = Utf16String::from_utf8("hello!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 6uz);
+        EXPECT_EQ(string.length_in_code_points(), 6uz);
+        EXPECT_EQ(string.ascii_view(), "hello!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf8("hello there!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(string.is_ascii());
+        EXPECT(string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 12uz);
+        EXPECT_EQ(string.length_in_code_points(), 12uz);
+        EXPECT_EQ(string.ascii_view(), "hello there!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf8("😀"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 2uz);
+        EXPECT_EQ(string.length_in_code_points(), 1uz);
+        EXPECT_EQ(string.utf16_view(), u"😀"sv);
+    }
+    {
+        auto string = Utf16String::from_utf8("hello 😀 there!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 15uz);
+        EXPECT_EQ(string.length_in_code_points(), 14uz);
+        EXPECT_EQ(string.utf16_view(), u"hello 😀 there!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf8("hello \xed\xa0\x80!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 8uz);
+        EXPECT_EQ(string.length_in_code_points(), 8uz);
+        EXPECT_EQ(string.utf16_view(), u"hello \xd800!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf8("hello \xed\xb0\x80!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 8uz);
+        EXPECT_EQ(string.length_in_code_points(), 8uz);
+        EXPECT_EQ(string.utf16_view(), u"hello \xdc00!"sv);
+    }
+}
+
+TEST_CASE(from_utf16)
+{
+    {
+        auto string = Utf16String::from_utf16(u"hello!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 6uz);
+        EXPECT_EQ(string.length_in_code_points(), 6uz);
+        EXPECT_EQ(string.ascii_view(), "hello!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf16(u"hello there!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(string.is_ascii());
+        EXPECT(string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 12uz);
+        EXPECT_EQ(string.length_in_code_points(), 12uz);
+        EXPECT_EQ(string.ascii_view(), "hello there!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf16(u"😀"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 2uz);
+        EXPECT_EQ(string.length_in_code_points(), 1uz);
+        EXPECT_EQ(string.utf16_view(), u"😀"sv);
+    }
+    {
+        auto string = Utf16String::from_utf16(u"hello 😀 there!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 15uz);
+        EXPECT_EQ(string.length_in_code_points(), 14uz);
+        EXPECT_EQ(string.utf16_view(), u"hello 😀 there!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf16(u"hello \xd800!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 8uz);
+        EXPECT_EQ(string.length_in_code_points(), 8uz);
+        EXPECT_EQ(string.utf16_view(), u"hello \xd800!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf16(u"hello \xdc00!"sv);
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 8uz);
+        EXPECT_EQ(string.length_in_code_points(), 8uz);
+        EXPECT_EQ(string.utf16_view(), u"hello \xdc00!"sv);
+    }
+}
+
+TEST_CASE(from_utf32)
+{
+    auto strlen32 = [](char32_t const* string) {
+        auto const* start = string;
+        while (*start)
+            ++start;
+        return static_cast<size_t>(start - string);
+    };
+
+    auto to_utf32_view = [&](char32_t const* string) {
+        return Utf32View { reinterpret_cast<u32 const*>(string), strlen32(string) };
+    };
+
+    {
+        auto string = Utf16String::from_utf32(to_utf32_view(U"hello!"));
+        EXPECT(!string.is_empty());
+        EXPECT(string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 6uz);
+        EXPECT_EQ(string.length_in_code_points(), 6uz);
+        EXPECT_EQ(string.ascii_view(), "hello!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf32(to_utf32_view(U"hello there!"));
+        EXPECT(!string.is_empty());
+        EXPECT(string.is_ascii());
+        EXPECT(string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 12uz);
+        EXPECT_EQ(string.length_in_code_points(), 12uz);
+        EXPECT_EQ(string.ascii_view(), "hello there!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf32(to_utf32_view(U"😀"));
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 2uz);
+        EXPECT_EQ(string.length_in_code_points(), 1uz);
+        EXPECT_EQ(string.utf16_view(), u"😀"sv);
+    }
+    {
+        auto string = Utf16String::from_utf32(to_utf32_view(U"hello 😀 there!"));
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 15uz);
+        EXPECT_EQ(string.length_in_code_points(), 14uz);
+        EXPECT_EQ(string.utf16_view(), u"hello 😀 there!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf32(to_utf32_view(U"hello \xd800!"));
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 8uz);
+        EXPECT_EQ(string.length_in_code_points(), 8uz);
+        EXPECT_EQ(string.utf16_view(), u"hello \xd800!"sv);
+    }
+    {
+        auto string = Utf16String::from_utf32(to_utf32_view(U"hello \xdc00!"));
+        EXPECT(!string.is_empty());
+        EXPECT(!string.is_ascii());
+        EXPECT(!string.has_long_ascii_storage());
+        EXPECT(!string.has_short_ascii_storage());
+        EXPECT_EQ(string.length_in_code_units(), 8uz);
+        EXPECT_EQ(string.length_in_code_points(), 8uz);
+        EXPECT_EQ(string.utf16_view(), u"hello \xdc00!"sv);
+    }
+}
+
+TEST_CASE(copy_operations)
+{
+    auto test = [](Utf16String const& string1) {
+        auto original = make_copy(string1);
+
+        // Copy constructor.
+        Utf16String string2(string1);
+
+        EXPECT_EQ(string1, original);
+        EXPECT_EQ(string1, string2);
+
+        // Copy assignment.
+        Utf16String string3;
+        string3 = string1;
+
+        EXPECT_EQ(string1, original);
+        EXPECT_EQ(string1, string3);
+    };
+
+    test({});
+    test("hello"_utf16);
+    test("hello there general!"_utf16);
+    test("hello 😀 there!"_utf16);
+}
+
+TEST_CASE(move_operations)
+{
+    auto test = [](Utf16String string1) {
+        auto original = make_copy(string1);
+
+        // Move constructor.
+        Utf16String string2(move(string1));
+
+        EXPECT(string1.is_empty());
+        EXPECT_EQ(string1, Utf16String {});
+        EXPECT_EQ(string2, original);
+
+        // Move assignment.
+        Utf16String string3;
+        string3 = move(string2);
+
+        EXPECT(string2.is_empty());
+        EXPECT_EQ(string2, Utf16String {});
+        EXPECT_EQ(string3, original);
+    };
+
+    test({});
+    test("hello"_utf16);
+    test("hello there general!"_utf16);
+    test("hello 😀 there!"_utf16);
+}
+
+TEST_CASE(equals)
+{
+    auto test = [](Utf16String const& string1, Utf16String const& inequal_string) {
+        auto string2 = make_copy(string1);
+
+        EXPECT_EQ(string1, string1);
+        EXPECT_EQ(string1, string2);
+        EXPECT_EQ(string2, string1);
+        EXPECT_EQ(string2, string2);
+
+        if (string1.has_long_utf16_storage()) {
+            EXPECT_EQ(string1, string1.utf16_view());
+            EXPECT_EQ(string1, string2.utf16_view());
+            EXPECT_EQ(string2, string1.utf16_view());
+            EXPECT_EQ(string2, string2.utf16_view());
+
+            EXPECT_EQ(string1.utf16_view(), string1);
+            EXPECT_EQ(string1.utf16_view(), string2);
+            EXPECT_EQ(string2.utf16_view(), string1);
+            EXPECT_EQ(string2.utf16_view(), string2);
+        }
+
+        EXPECT_NE(string1, inequal_string);
+        EXPECT_NE(string2, inequal_string);
+        EXPECT_NE(inequal_string, string1);
+        EXPECT_NE(inequal_string, string2);
+
+        if (string1.has_long_utf16_storage()) {
+            EXPECT_NE(string1, inequal_string.utf16_view());
+            EXPECT_NE(string2, inequal_string.utf16_view());
+            EXPECT_NE(inequal_string, string1.utf16_view());
+            EXPECT_NE(inequal_string, string2.utf16_view());
+
+            EXPECT_NE(string1.utf16_view(), inequal_string);
+            EXPECT_NE(string2.utf16_view(), inequal_string);
+            EXPECT_NE(inequal_string.utf16_view(), string1);
+            EXPECT_NE(inequal_string.utf16_view(), string2);
+        }
+    };
+
+    // Short (empty) ASCII string comparison.
+    test(Utf16String {}, "hello"_utf16);
+
+    // Short ASCII string comparison.
+    test("hello"_utf16, "there"_utf16);
+
+    // Short and long ASCII string comparison.
+    test("hello"_utf16, "hello there general!"_utf16);
+
+    // Long ASCII string comparison.
+    test("hello there!"_utf16, "hello there general!"_utf16);
+
+    // UTF-16 string comparison.
+    test("😀"_utf16, "hello 😀"_utf16);
+
+    // Short ASCII and UTF-16 string comparison.
+    test("hello"_utf16, "😀"_utf16);
+
+    // Short ASCII and UTF-16 string of same code unit length comparison.
+    test("ab"_utf16, "😀"_utf16);
+
+    // Long ASCII and UTF-16 string comparison.
+    test("hello there general!"_utf16, "😀"_utf16);
+
+    // Long ASCII and UTF-16 string of same code unit length comparison.
+    test("ababababab"_utf16, "😀😀😀😀😀"_utf16);
+}
+
+TEST_CASE(equals_ascii)
+{
+    auto test = [](StringView ascii, Utf16String const& inequal_string) {
+        auto string = Utf16String::from_utf8(ascii);
+
+        EXPECT_EQ(ascii, string);
+        EXPECT_EQ(string, ascii);
+
+        EXPECT_NE(ascii, inequal_string);
+        EXPECT_NE(inequal_string, ascii);
+    };
+
+    // Short (empty) ASCII string comparison.
+    test({}, "hello"_utf16);
+
+    // Short ASCII string comparison.
+    test("hello"sv, "there"_utf16);
+
+    // Short and long ASCII string comparison.
+    test("hello"sv, "hello there general!"_utf16);
+
+    // Long ASCII string comparison.
+    test("hello there!"sv, "hello there general!"_utf16);
+
+    // Short ASCII and UTF-16 string comparison.
+    test("hello"sv, "😀"_utf16);
+
+    // Short ASCII and UTF-16 string of same code unit length comparison.
+    test("ab"sv, "😀"_utf16);
+
+    // Long ASCII and UTF-16 string comparison.
+    test("hello there general!"sv, "😀"_utf16);
+
+    // Long ASCII and UTF-16 string of same code unit length comparison.
+    test("ababababab"sv, "😀😀😀😀😀"_utf16);
+
+    // Non-ASCII string comparison.
+    EXPECT_NE("😀"sv, "😀"_utf16);
+}
+
+TEST_CASE(equals_ignoring_ascii_case)
+{
+    auto test = [](Utf16String const& string1, Utf16String const& inequal_string) {
+        StringBuilder builder;
+        for (auto [i, code_point] : enumerate(string1))
+            builder.append_code_point(i % 2 == 0 ? to_ascii_uppercase(code_point) : code_point);
+
+        auto string2 = Utf16String::from_utf8(builder.string_view());
+
+        EXPECT(string1.equals_ignoring_ascii_case(string1));
+        EXPECT(string1.equals_ignoring_ascii_case(string2));
+        EXPECT(string2.equals_ignoring_ascii_case(string1));
+        EXPECT(string2.equals_ignoring_ascii_case(string2));
+
+        if (string1.has_long_utf16_storage()) {
+            EXPECT(string1.equals_ignoring_ascii_case(string1.utf16_view()));
+            EXPECT(string1.equals_ignoring_ascii_case(string2.utf16_view()));
+            EXPECT(string2.equals_ignoring_ascii_case(string1.utf16_view()));
+            EXPECT(string2.equals_ignoring_ascii_case(string2.utf16_view()));
+        }
+
+        EXPECT(!string1.equals_ignoring_ascii_case(inequal_string));
+        EXPECT(!string2.equals_ignoring_ascii_case(inequal_string));
+        EXPECT(!inequal_string.equals_ignoring_ascii_case(string1));
+        EXPECT(!inequal_string.equals_ignoring_ascii_case(string2));
+
+        if (string1.has_long_utf16_storage()) {
+            EXPECT(!string1.equals_ignoring_ascii_case(inequal_string.utf16_view()));
+            EXPECT(!string2.equals_ignoring_ascii_case(inequal_string.utf16_view()));
+            EXPECT(!inequal_string.equals_ignoring_ascii_case(string1.utf16_view()));
+            EXPECT(!inequal_string.equals_ignoring_ascii_case(string2.utf16_view()));
+        }
+    };
+
+    // Short (empty) ASCII string comparison.
+    test(Utf16String {}, "hello"_utf16);
+
+    // Short ASCII string comparison.
+    test("hello"_utf16, "there"_utf16);
+
+    // Short and long ASCII string comparison.
+    test("hello"_utf16, "hello there general!"_utf16);
+
+    // Long ASCII string comparison.
+    test("hello there!"_utf16, "hello there general!"_utf16);
+
+    // UTF-16 string comparison.
+    test("😀"_utf16, "hello 😀"_utf16);
+
+    // Short ASCII and UTF-16 string comparison.
+    test("hello"_utf16, "😀"_utf16);
+
+    // Short ASCII and UTF-16 string of same code unit length comparison.
+    test("ab"_utf16, "😀"_utf16);
+
+    // Long ASCII and UTF-16 string comparison.
+    test("hello there general!"_utf16, "😀"_utf16);
+
+    // Long ASCII and UTF-16 string of same code unit length comparison.
+    test("ababababab"_utf16, "😀😀😀😀😀"_utf16);
+}
+
+TEST_CASE(iteration)
+{
+    auto test = [](Utf16String const& string, ReadonlySpan<u32> code_points) {
+        EXPECT_EQ(string.length_in_code_points(), code_points.size());
+
+        for (auto [i, code_point] : enumerate(string)) {
+            if (code_points.size() == 0)
+                FAIL("Iterating an empty UTF-16 string should not produce any values");
+            else
+                EXPECT_EQ(code_point, code_points[i]);
+        }
+
+        auto iterator = string.end();
+        EXPECT_DEATH("Dereferencing a UTF-16 iterator which is at its end", *iterator);
+        EXPECT_DEATH("Incrementing a UTF-16 iterator which is at its end", ++iterator);
+    };
+
+    test({}, {});
+    test("hello"_utf16, { { 'h', 'e', 'l', 'l', 'o' } });
+    test("hello there general!"_utf16, { { 'h', 'e', 'l', 'l', 'o', ' ', 't', 'h', 'e', 'r', 'e', ' ', 'g', 'e', 'n', 'e', 'r', 'a', 'l', '!' } });
+    test("😀"_utf16, { { 0x1f600 } });
+    test("hello 😀 there!"_utf16, { { 'h', 'e', 'l', 'l', 'o', ' ', 0x1f600, ' ', 't', 'h', 'e', 'r', 'e', '!' } });
+}
+
+TEST_CASE(code_unit_at)
+{
+    auto test = [](Utf16View const& view, size_t length_in_code_units) {
+        auto string = Utf16String::from_utf16(view);
+        EXPECT_EQ(string.length_in_code_units(), length_in_code_units);
+
+        for (size_t i = 0; i < length_in_code_units; ++i)
+            EXPECT_EQ(string.code_unit_at(i), view.code_unit_at(i));
+    };
+
+    test({}, 0);
+    test(u"hello"sv, 5);
+    test(u"hello there general!"sv, 20);
+    test(u"😀"sv, 2);
+    test(u"hello 😀 there!"sv, 15);
+}
+
+TEST_CASE(code_point_at)
+{
+    auto test = [](Utf16View const& view, size_t length_in_code_points) {
+        auto string = Utf16String::from_utf16(view);
+        EXPECT_EQ(string.length_in_code_points(), length_in_code_points);
+
+        for (size_t i = 0; i < string.length_in_code_units(); ++i)
+            EXPECT_EQ(string.code_point_at(i), view.code_point_at(i));
+    };
+
+    test({}, 0);
+    test(u"hello"sv, 5);
+    test(u"hello there general!"sv, 20);
+    test(u"😀"sv, 1);
+    test(u"hello 😀 there!"sv, 14);
+}