From 86b1c78c1a428d489de433b9f9bba31799203a42 Mon Sep 17 00:00:00 2001
From: Timothy Flynn <trflynn89@pm.me>
Date: Thu, 26 Jun 2025 12:52:23 -0400
Subject: [PATCH] AK+Everywhere: Prepare Utf16View for integration with a
 UTF-16 string

To prepare for an upcoming Utf16String, this migrates Utf16View to store
its data as a char16_t. Most function definitions are moved inline and
made constexpr.

This also adds a UDL to construct a Utf16View from a string literal:

    auto string = u"hello"sv;

This let's us remove the NTTP Utf16View constructor, as we have found
that such constructors bloat binary size quite a bit.
---
 AK/String.cpp                                 |  17 +-
 AK/StringBase.h                               |   6 +
 AK/StringBuilder.cpp                          |  18 +-
 AK/StringView.cpp                             |   4 +-
 AK/UnicodeUtils.h                             |   2 +-
 AK/Utf16View.cpp                              | 293 ++++-------------
 AK/Utf16View.h                                | 294 ++++++++++++++----
 Libraries/LibCore/ProcessWindows.cpp          |   2 +-
 .../LibJS/Runtime/AbstractOperations.cpp      |  28 +-
 Libraries/LibJS/Runtime/Utf16String.cpp       |   2 +-
 Libraries/LibJS/Runtime/Utf16String.h         |   2 +-
 Libraries/LibRegex/RegexMatch.h               |   3 +-
 Libraries/LibUnicode/ICU.cpp                  |   3 +-
 Libraries/LibUnicode/Segmenter.cpp            |   2 +-
 Libraries/LibWeb/DOM/CharacterData.cpp        |   5 +-
 Libraries/LibWeb/FileAPI/FileReader.cpp       |   2 +-
 Tests/AK/TestUtf16View.cpp                    | 144 ++++-----
 17 files changed, 406 insertions(+), 421 deletions(-)
diff --git a/AK/String.cpp b/AK/String.cpp
index 37d8381003b..e430dede5ea 100644
--- a/AK/String.cpp
+++ b/AK/String.cpp
@@ -93,22 +93,7 @@ ErrorOr<String> String::from_utf16_be(ReadonlyBytes bytes)
 
 ErrorOr<String> String::from_utf16(Utf16View const& utf16)
 {
-    if (!utf16.validate())
-        return Error::from_string_literal("String::from_utf16: Input was not valid UTF-16");
-    if (utf16.is_empty())
-        return String {};
-
-    String result;
-
-    auto utf8_length = simdutf::utf8_length_from_utf16(utf16.char_data(), utf16.length_in_code_units());
-
-    TRY(result.replace_with_new_string(utf8_length, [&](Bytes buffer) -> ErrorOr<void> {
-        [[maybe_unused]] auto result = simdutf::convert_utf16_to_utf8(utf16.char_data(), utf16.length_in_code_units(), reinterpret_cast<char*>(buffer.data()));
-        ASSERT(result == buffer.size());
-        return {};
-    }));
-
-    return result;
+    return utf16.to_utf8();
 }
 
 ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)
diff --git a/AK/StringBase.h b/AK/StringBase.h
index 50635e9c8d3..f9fc61ed30e 100644
--- a/AK/StringBase.h
+++ b/AK/StringBase.h
@@ -86,6 +86,12 @@ public:
         return replace_with_new_string(byte_count, forward<Func>(callback));
     }
 
+    template<typename Func>
+    ALWAYS_INLINE ErrorOr<void> replace_with_new_string(Badge<Utf16View>, size_t byte_count, Func&& callback)
+    {
+        return replace_with_new_string(byte_count, forward<Func>(callback));
+    }
+
 protected:
     template<typename Func>
     ErrorOr<void> replace_with_new_string(size_t byte_count, Func&& callback)
diff --git a/AK/StringBuilder.cpp b/AK/StringBuilder.cpp
index 55ff609ef12..53bfa358ae9 100644
--- a/AK/StringBuilder.cpp
+++ b/AK/StringBuilder.cpp
@@ -250,17 +250,17 @@ ErrorOr<void> StringBuilder::try_append(Utf16View const& utf16_view)
     if (utf16_view.is_empty())
         return {};
 
-    auto maximum_utf8_length = UnicodeUtils::maximum_utf8_length_from_utf16(utf16_view.span());
+    auto remaining_view = utf16_view.span();
+    auto maximum_utf8_length = UnicodeUtils::maximum_utf8_length_from_utf16(remaining_view);
 
     // Possibly over-allocate a little to ensure we don't have to allocate later.
     TRY(will_append(maximum_utf8_length));
 
-    Utf16View remaining_view = utf16_view;
     for (;;) {
-        auto uninitialized_data_pointer = static_cast<char*>(m_buffer.end_pointer());
+        auto* uninitialized_data_pointer = static_cast<char*>(m_buffer.end_pointer());
 
         // Fast path.
-        auto result = simdutf::convert_utf16_to_utf8_with_errors(remaining_view.char_data(), remaining_view.length_in_code_units(), uninitialized_data_pointer);
+        auto result = simdutf::convert_utf16_to_utf8_with_errors(remaining_view.data(), remaining_view.size(), uninitialized_data_pointer);
         if (result.error == simdutf::SUCCESS) {
             auto bytes_just_written = result.count;
             m_buffer.set_size(m_buffer.size() + bytes_just_written);
@@ -269,13 +269,13 @@ ErrorOr<void> StringBuilder::try_append(Utf16View const& utf16_view)
 
         // Slow path. Found unmatched surrogate code unit.
         auto first_invalid_code_unit = result.count;
-        ASSERT(first_invalid_code_unit < remaining_view.length_in_code_units());
+        ASSERT(first_invalid_code_unit < remaining_view.size());
 
         // Unfortunately, `simdutf` does not tell us how many bytes it just wrote in case of an error, so we have to calculate it ourselves.
-        auto bytes_just_written = simdutf::utf8_length_from_utf16(remaining_view.char_data(), first_invalid_code_unit);
+        auto bytes_just_written = simdutf::utf8_length_from_utf16(remaining_view.data(), first_invalid_code_unit);
 
         do {
-            auto code_unit = remaining_view.code_unit_at(first_invalid_code_unit++);
+            auto code_unit = remaining_view[first_invalid_code_unit++];
 
             // Invalid surrogate code units are U+D800 - U+DFFF, so they are always encoded using 3 bytes.
             ASSERT(code_unit >= 0xD800 && code_unit <= 0xDFFF);
@@ -283,11 +283,11 @@ ErrorOr<void> StringBuilder::try_append(Utf16View const& utf16_view)
             uninitialized_data_pointer[bytes_just_written++] = (((code_unit >> 12) & 0x0f) | 0xe0);
             uninitialized_data_pointer[bytes_just_written++] = (((code_unit >> 6) & 0x3f) | 0x80);
             uninitialized_data_pointer[bytes_just_written++] = (((code_unit >> 0) & 0x3f) | 0x80);
-        } while (first_invalid_code_unit < remaining_view.length_in_code_units() && UnicodeUtils::is_utf16_low_surrogate(remaining_view.data()[first_invalid_code_unit]));
+        } while (first_invalid_code_unit < remaining_view.size() && UnicodeUtils::is_utf16_low_surrogate(remaining_view.data()[first_invalid_code_unit]));
 
         // Code unit might no longer be invalid, retry on the remaining data.
         m_buffer.set_size(m_buffer.size() + bytes_just_written);
-        remaining_view = remaining_view.substring_view(first_invalid_code_unit);
+        remaining_view = remaining_view.slice(first_invalid_code_unit);
     }
 
     return {};
diff --git a/AK/StringView.cpp b/AK/StringView.cpp
index b5519a3fe1b..d6039331f36 100644
--- a/AK/StringView.cpp
+++ b/AK/StringView.cpp
@@ -212,7 +212,7 @@ String StringView::to_ascii_lowercase_string() const
 
     String result;
 
-    MUST(result.replace_with_new_string({}, length(), [&](Bytes buffer) -> ErrorOr<void> {
+    MUST(result.replace_with_new_string(Badge<StringView> {}, length(), [&](Bytes buffer) -> ErrorOr<void> {
         for (auto [i, character] : enumerate(bytes()))
             buffer[i] = static_cast<u8>(AK::to_ascii_lowercase(character));
         return {};
@@ -227,7 +227,7 @@ String StringView::to_ascii_uppercase_string() const
 
     String result;
 
-    MUST(result.replace_with_new_string({}, length(), [&](Bytes buffer) -> ErrorOr<void> {
+    MUST(result.replace_with_new_string(Badge<StringView> {}, length(), [&](Bytes buffer) -> ErrorOr<void> {
         for (auto [i, character] : enumerate(bytes()))
             buffer[i] = static_cast<u8>(AK::to_ascii_uppercase(character));
         return {};
diff --git a/AK/UnicodeUtils.h b/AK/UnicodeUtils.h
index a9518b55314..6c1cb6fc906 100644
--- a/AK/UnicodeUtils.h
+++ b/AK/UnicodeUtils.h
@@ -158,7 +158,7 @@ constexpr ErrorOr<size_t> try_code_point_to_utf16(u32 code_point, Callback callb
  * Compute the maximum number of UTF-8 bytes needed to store a given UTF-16 string, accounting for unmatched UTF-16 surrogates.
  * This function will overcount by at most 33%; 2 bytes for every valid UTF-16 codepoint between U+100000 and U+10FFFF.
  */
-[[nodiscard]] static inline size_t maximum_utf8_length_from_utf16(ReadonlySpan<u16> code_units)
+[[nodiscard]] static inline size_t maximum_utf8_length_from_utf16(ReadonlySpan<char16_t> code_units)
 {
     // # UTF-8 code point -> no. UTF-8 bytes needed
     // U+0000   - U+007F   => 1 UTF-8 bytes
diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp
index 3ba4fe370af..84a43c939c6 100644
--- a/AK/Utf16View.cpp
+++ b/AK/Utf16View.cpp
@@ -80,77 +80,75 @@ ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const& utf32_view)
     return Utf16ConversionResult { utf16_data, length };
 }
 
+bool validate_utf16_le(ReadonlyBytes bytes)
+{
+    return simdutf::validate_utf16le(reinterpret_cast<char16_t const*>(bytes.data()), bytes.size() / 2);
+}
+
+bool validate_utf16_be(ReadonlyBytes bytes)
+{
+    return simdutf::validate_utf16be(reinterpret_cast<char16_t const*>(bytes.data()), bytes.size() / 2);
+}
+
 size_t utf16_code_unit_length_from_utf8(StringView string)
 {
     return simdutf::utf16_length_from_utf8(string.characters_without_null_termination(), string.length());
 }
 
+ErrorOr<String> Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
+{
+    if (is_empty())
+        return String {};
+    if (!validate(allow_invalid_code_units))
+        return Error::from_string_literal("Input was not valid UTF-16");
+
+    if (allow_invalid_code_units == AllowInvalidCodeUnits::No) {
+        String result;
+        auto utf8_length = simdutf::utf8_length_from_utf16(m_string, length_in_code_units());
+
+        TRY(result.replace_with_new_string(Badge<Utf16View> {}, utf8_length, [&](Bytes buffer) -> ErrorOr<void> {
+            [[maybe_unused]] auto result = simdutf::convert_utf16_to_utf8(m_string, length_in_code_units(), reinterpret_cast<char*>(buffer.data()));
+            ASSERT(result == buffer.size());
+            return {};
+        }));
+
+        return result;
+    }
+
+    StringBuilder builder;
+    builder.append(*this);
+    return builder.to_string();
+}
+
 ErrorOr<ByteString> Utf16View::to_byte_string(AllowInvalidCodeUnits allow_invalid_code_units) const
 {
     return TRY(to_utf8(allow_invalid_code_units)).to_byte_string();
 }
 
-ErrorOr<String> Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const
-{
-    if (allow_invalid_code_units == AllowInvalidCodeUnits::No)
-        return String::from_utf16(*this);
-
-    StringBuilder builder;
-    builder.append(*this);
-    return builder.to_string();
-}
-
 bool Utf16View::is_ascii() const
 {
-    return simdutf::validate_ascii(reinterpret_cast<char const*>(m_code_units.data()), length_in_code_units() * sizeof(char16_t));
+    return simdutf::validate_ascii(reinterpret_cast<char const*>(m_string), length_in_code_units() * sizeof(char16_t));
 }
 
-size_t Utf16View::length_in_code_points() const
+bool Utf16View::validate(size_t& valid_code_units, AllowInvalidCodeUnits allow_invalid_code_units) const
 {
-    if (m_length_in_code_points == NumericLimits<size_t>::max())
-        m_length_in_code_points = calculate_length_in_code_points();
-    return m_length_in_code_points;
-}
+    auto view = *this;
+    valid_code_units = 0;
 
-u16 Utf16View::code_unit_at(size_t index) const
-{
-    VERIFY(index < length_in_code_units());
-    return m_code_units[index];
-}
+    while (!view.is_empty()) {
+        auto result = simdutf::validate_utf16_with_errors(view.m_string, view.length_in_code_units());
+        valid_code_units += result.count;
 
-u32 Utf16View::code_point_at(size_t index) const
-{
-    VERIFY(index < length_in_code_units());
+        if (result.error == simdutf::SUCCESS)
+            return true;
+        if (allow_invalid_code_units == AllowInvalidCodeUnits::No || result.error != simdutf::SURROGATE)
+            return false;
 
-    u32 code_point = code_unit_at(index);
-    if (!UnicodeUtils::is_utf16_high_surrogate(code_point) && !UnicodeUtils::is_utf16_low_surrogate(code_point))
-        return code_point;
-    if (UnicodeUtils::is_utf16_low_surrogate(code_point) || (index + 1 == length_in_code_units()))
-        return code_point;
-
-    auto second = code_unit_at(index + 1);
-    if (!UnicodeUtils::is_utf16_low_surrogate(second))
-        return code_point;
-
-    return UnicodeUtils::decode_utf16_surrogate_pair(code_point, second);
-}
-
-size_t Utf16View::code_point_offset_of(size_t code_unit_offset) const
-{
-    if (length_in_code_points() == length_in_code_units()) // Fast path: all code points are one code unit.
-        return code_unit_offset;
-
-    size_t code_point_offset = 0;
-
-    for (auto it = begin(); it != end(); ++it) {
-        if (code_unit_offset == 0)
-            return code_point_offset;
-
-        code_unit_offset -= it.length_in_code_units();
-        ++code_point_offset;
+        view = view.substring_view(result.count + 1);
+        ++valid_code_units;
     }
 
-    return code_point_offset;
+    return true;
 }
 
 size_t Utf16View::code_unit_offset_of(size_t code_point_offset) const
@@ -171,19 +169,22 @@ size_t Utf16View::code_unit_offset_of(size_t code_point_offset) const
     return code_unit_offset;
 }
 
-size_t Utf16View::code_unit_offset_of(Utf16CodePointIterator const& it) const
+size_t Utf16View::code_point_offset_of(size_t code_unit_offset) const
 {
-    VERIFY(it.m_ptr >= begin_ptr());
-    VERIFY(it.m_ptr <= end_ptr());
+    if (length_in_code_points() == length_in_code_units()) // Fast path: all code points are one code unit.
+        return code_unit_offset;
 
-    return it.m_ptr - begin_ptr();
-}
+    size_t code_point_offset = 0;
 
-Utf16View Utf16View::substring_view(size_t code_unit_offset, size_t code_unit_length) const
-{
-    VERIFY(!Checked<size_t>::addition_would_overflow(code_unit_offset, code_unit_length));
+    for (auto it = begin(); it != end(); ++it) {
+        if (code_unit_offset == 0)
+            return code_point_offset;
 
-    return Utf16View { m_code_units.slice(code_unit_offset, code_unit_length) };
+        code_unit_offset -= it.length_in_code_units();
+        ++code_point_offset;
+    }
+
+    return code_point_offset;
 }
 
 Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t code_point_length) const
@@ -194,7 +195,10 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod
     if (length_in_code_points() == length_in_code_units()) // Fast path: all code points are one code unit.
         return substring_view(code_point_offset, code_point_length);
 
-    auto code_unit_offset_of = [&](Utf16CodePointIterator const& it) { return it.m_ptr - begin_ptr(); };
+    auto code_unit_offset_of = [&](Utf16CodePointIterator const& it) {
+        return it.m_iterator - m_string;
+    };
+
     size_t code_point_index = 0;
     size_t code_unit_offset = 0;
 
@@ -213,101 +217,13 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod
     VERIFY_NOT_REACHED();
 }
 
-Optional<size_t> Utf16View::find_code_unit_offset(Utf16View const& needle, size_t start_offset) const
-{
-    return m_code_units.index_of(needle.m_code_units, start_offset);
-}
-
-Optional<size_t> Utf16View::find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset) const
-{
-    Checked maximum_offset { start_offset };
-    maximum_offset += needle.length_in_code_units();
-    if (maximum_offset.has_overflow() || maximum_offset.value() > length_in_code_units())
-        return {};
-
-    if (needle.is_empty())
-        return start_offset;
-
-    size_t index = start_offset;
-    while (index <= length_in_code_units() - needle.length_in_code_units()) {
-        Utf16View const slice { m_code_units.slice(index, needle.length_in_code_units()) };
-        if (slice.equals_ignoring_case(needle))
-            return index;
-        index += slice.begin().length_in_code_units();
-    }
-
-    return {};
-}
-
-bool Utf16View::starts_with(Utf16View const& needle) const
-{
-    if (needle.is_empty())
-        return true;
-    if (is_empty())
-        return false;
-    if (needle.length_in_code_units() > length_in_code_units())
-        return false;
-    if (begin_ptr() == needle.begin_ptr())
-        return true;
-
-    for (auto this_it = begin(), needle_it = needle.begin(); needle_it != needle.end(); ++needle_it, ++this_it) {
-        if (*this_it != *needle_it)
-            return false;
-    }
-
-    return true;
-}
-
-// https://infra.spec.whatwg.org/#code-unit-less-than
-bool Utf16View::is_code_unit_less_than(Utf16View const& other) const
-{
-    auto a = m_code_units;
-    auto b = other.m_code_units;
-
-    auto common_length = min(a.size(), b.size());
-
-    for (size_t position = 0; position < common_length; ++position) {
-        if (a[position] != b[position])
-            return a[position] < b[position];
-    }
-
-    return a.size() < b.size();
-}
-
-bool Utf16View::validate(AllowInvalidCodeUnits allow_invalid_code_units) const
-{
-    size_t valid_code_units = 0;
-    return validate(valid_code_units, allow_invalid_code_units);
-}
-
-bool Utf16View::validate(size_t& valid_code_units, AllowInvalidCodeUnits allow_invalid_code_units) const
-{
-    auto view = *this;
-    valid_code_units = 0;
-
-    while (!view.is_empty()) {
-        auto result = simdutf::validate_utf16_with_errors(view.char_data(), view.length_in_code_units());
-        valid_code_units += result.count;
-
-        if (result.error == simdutf::SUCCESS)
-            return true;
-        if (allow_invalid_code_units == AllowInvalidCodeUnits::No || result.error != simdutf::SURROGATE)
-            return false;
-
-        view = view.substring_view(result.count + 1);
-        ++valid_code_units;
-    }
-
-    return true;
-}
-
 size_t Utf16View::calculate_length_in_code_points() const
 {
     // FIXME: simdutf's code point length method assumes valid UTF-16, whereas Utf16View uses U+FFFD as a replacement
     //        for invalid code points. If we change Utf16View to only accept valid encodings as an invariant, we can
     //        remove this branch.
     if (validate()) [[likely]]
-        return simdutf::count_utf16(char_data(), length_in_code_units());
+        return simdutf::count_utf16(m_string, length_in_code_units());
 
     size_t code_points = 0;
     for ([[maybe_unused]] auto code_point : *this)
@@ -315,81 +231,4 @@ size_t Utf16View::calculate_length_in_code_points() const
     return code_points;
 }
 
-bool Utf16View::equals_ignoring_case(Utf16View const& other) const
-{
-    if (length_in_code_units() != other.length_in_code_units())
-        return false;
-
-    for (size_t i = 0; i < length_in_code_units(); ++i) {
-        // FIXME: Handle non-ASCII case insensitive comparisons.
-        if (to_ascii_lowercase(m_code_units[i]) != to_ascii_lowercase(other.m_code_units[i]))
-            return false;
-    }
-
-    return true;
-}
-
-Utf16CodePointIterator& Utf16CodePointIterator::operator++()
-{
-    size_t code_units = length_in_code_units();
-
-    if (code_units > m_remaining_code_units) {
-        // If there aren't enough code units remaining, skip to the end.
-        m_ptr += m_remaining_code_units;
-        m_remaining_code_units = 0;
-    } else {
-        m_ptr += code_units;
-        m_remaining_code_units -= code_units;
-    }
-
-    return *this;
-}
-
-u32 Utf16CodePointIterator::operator*() const
-{
-    VERIFY(m_remaining_code_units > 0);
-
-    // rfc2781, 2.2 Decoding UTF-16
-    // 1) If W1 < 0xD800 or W1 > 0xDFFF, the character value U is the value
-    //    of W1. Terminate.
-    // 2) Determine if W1 is between 0xD800 and 0xDBFF. If not, the sequence
-    //    is in error and no valid character can be obtained using W1.
-    //    Terminate.
-    // 3) If there is no W2 (that is, the sequence ends with W1), or if W2
-    //    is not between 0xDC00 and 0xDFFF, the sequence is in error.
-    //    Terminate.
-    // 4) Construct a 20-bit unsigned integer U', taking the 10 low-order
-    //    bits of W1 as its 10 high-order bits and the 10 low-order bits of
-    //    W2 as its 10 low-order bits.
-    // 5) Add 0x10000 to U' to obtain the character value U. Terminate.
-
-    auto code_unit = *m_ptr;
-
-    if (UnicodeUtils::is_utf16_high_surrogate(code_unit)) {
-        if (m_remaining_code_units > 1) {
-            auto next_code_unit = *(m_ptr + 1);
-
-            if (UnicodeUtils::is_utf16_low_surrogate(next_code_unit))
-                return UnicodeUtils::decode_utf16_surrogate_pair(code_unit, next_code_unit);
-        }
-
-        return UnicodeUtils::REPLACEMENT_CODE_POINT;
-    }
-
-    if (UnicodeUtils::is_utf16_low_surrogate(code_unit))
-        return UnicodeUtils::REPLACEMENT_CODE_POINT;
-
-    return static_cast<u32>(code_unit);
-}
-
-bool validate_utf16_le(ReadonlyBytes bytes)
-{
-    return simdutf::validate_utf16le(reinterpret_cast<char16_t const*>(bytes.data()), bytes.size() / 2);
-}
-
-bool validate_utf16_be(ReadonlyBytes bytes)
-{
-    return simdutf::validate_utf16be(reinterpret_cast<char16_t const*>(bytes.data()), bytes.size() / 2);
-}
-
 }
diff --git a/AK/Utf16View.h b/AK/Utf16View.h
index 5bcd7b727f4..2eb04632a1e 100644
--- a/AK/Utf16View.h
+++ b/AK/Utf16View.h
@@ -10,6 +10,7 @@
 #include <AK/Error.h>
 #include <AK/Format.h>
 #include <AK/Forward.h>
+#include <AK/MemMem.h>
 #include <AK/Optional.h>
 #include <AK/Span.h>
 #include <AK/String.h>
@@ -21,7 +22,7 @@
 
 namespace AK {
 
-using Utf16Data = Vector<u16, 1>;
+using Utf16Data = Vector<char16_t, 1>;
 
 struct Utf16ConversionResult {
     Utf16Data data;
@@ -36,8 +37,6 @@ ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const&);
 
 size_t utf16_code_unit_length_from_utf8(StringView);
 
-class Utf16View;
-
 class Utf16CodePointIterator {
     friend class Utf16View;
 
@@ -45,27 +44,57 @@ public:
     Utf16CodePointIterator() = default;
     ~Utf16CodePointIterator() = default;
 
-    bool operator==(Utf16CodePointIterator const& other) const
+    constexpr Utf16CodePointIterator& operator++()
     {
-        return (m_ptr == other.m_ptr) && (m_remaining_code_units == other.m_remaining_code_units);
+        VERIFY(m_remaining_code_units > 0);
+
+        auto length = min(length_in_code_units(), m_remaining_code_units);
+        m_iterator += length;
+        m_remaining_code_units -= length;
+
+        return *this;
     }
 
-    Utf16CodePointIterator& operator++();
-    u32 operator*() const;
+    constexpr u32 operator*() const
+    {
+        VERIFY(m_remaining_code_units > 0);
+        auto code_unit = *m_iterator;
 
-    size_t length_in_code_units() const
+        if (UnicodeUtils::is_utf16_high_surrogate(code_unit)) {
+            if (m_remaining_code_units > 1) {
+                auto next_code_unit = *(m_iterator + 1);
+
+                if (UnicodeUtils::is_utf16_low_surrogate(next_code_unit))
+                    return UnicodeUtils::decode_utf16_surrogate_pair(code_unit, next_code_unit);
+            }
+
+            return UnicodeUtils::REPLACEMENT_CODE_POINT;
+        }
+
+        if (UnicodeUtils::is_utf16_low_surrogate(code_unit))
+            return UnicodeUtils::REPLACEMENT_CODE_POINT;
+
+        return static_cast<u32>(code_unit);
+    }
+
+    [[nodiscard]] constexpr bool operator==(Utf16CodePointIterator const& other) const
+    {
+        return (m_iterator == other.m_iterator) && (m_remaining_code_units == other.m_remaining_code_units);
+    }
+
+    [[nodiscard]] constexpr size_t length_in_code_units() const
     {
         return UnicodeUtils::code_unit_length_for_code_point(**this);
     }
 
 private:
-    Utf16CodePointIterator(u16 const* ptr, size_t length)
-        : m_ptr(ptr)
+    Utf16CodePointIterator(char16_t const* ptr, size_t length)
+        : m_iterator(ptr)
         , m_remaining_code_units(length)
     {
     }
 
-    u16 const* m_ptr { nullptr };
+    char16_t const* m_iterator { nullptr };
     size_t m_remaining_code_units { 0 };
 };
 
@@ -73,101 +102,233 @@ class Utf16View {
 public:
     using Iterator = Utf16CodePointIterator;
 
+    enum class AllowInvalidCodeUnits {
+        No,
+        Yes,
+    };
+
     Utf16View() = default;
     ~Utf16View() = default;
 
-    explicit Utf16View(ReadonlySpan<u16> code_units)
-        : m_code_units(code_units)
+    constexpr Utf16View(char16_t const* string, size_t length_in_code_units)
+        : m_string(string)
+        , m_length_in_code_units(length_in_code_units)
+    {
+    }
+
+    constexpr Utf16View(Utf16Data const& string)
+        : m_string(string.data())
+        , m_length_in_code_units(string.size())
     {
     }
 
     Utf16View(Utf16ConversionResult&&) = delete;
     explicit Utf16View(Utf16ConversionResult const& conversion_result)
-        : m_code_units(conversion_result.data)
+        : m_string(conversion_result.data.data())
+        , m_length_in_code_units(conversion_result.data.size())
         , m_length_in_code_points(conversion_result.code_point_count)
     {
     }
 
-    template<size_t Size>
-    Utf16View(char16_t const (&code_units)[Size])
-        : m_code_units(
-              reinterpret_cast<u16 const*>(&code_units[0]),
-              code_units[Size - 1] == u'\0' ? Size - 1 : Size)
+    ErrorOr<String> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
+    ErrorOr<ByteString> to_byte_string(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
+
+    [[nodiscard]] constexpr ReadonlySpan<char16_t> span() const
     {
+        return { m_string, length_in_code_units() };
     }
 
-    bool operator==(Utf16View const& other) const { return m_code_units == other.m_code_units; }
+    [[nodiscard]] constexpr bool operator==(Utf16View const& other) const
+    {
+        if (length_in_code_units() != other.length_in_code_units())
+            return false;
+        return TypedTransfer<char16_t>::compare(m_string, other.m_string, length_in_code_units());
+    }
 
-    enum class AllowInvalidCodeUnits {
-        Yes,
-        No,
-    };
+    [[nodiscard]] constexpr bool equals_ignoring_case(Utf16View const& other) const
+    {
+        // FIXME: Handle non-ASCII case insensitive comparisons.
+        return equals_ignoring_ascii_case(other);
+    }
 
-    ErrorOr<ByteString> to_byte_string(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
-    ErrorOr<String> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
+    [[nodiscard]] constexpr bool equals_ignoring_ascii_case(Utf16View const& other) const
+    {
+        if (length_in_code_units() != other.length_in_code_units())
+            return false;
 
-    void unsafe_set_code_point_length(size_t length) const { m_length_in_code_points = length; }
+        for (size_t i = 0; i < length_in_code_units(); ++i) {
+            if (to_ascii_lowercase(code_unit_at(i)) != to_ascii_lowercase(other.code_unit_at(i)))
+                return false;
+        }
 
-    bool is_null() const { return m_code_units.is_null(); }
-    bool is_empty() const { return m_code_units.is_empty(); }
-    bool is_ascii() const;
+        return true;
+    }
 
-    size_t length_in_code_units() const { return m_code_units.size(); }
-    size_t length_in_code_points() const;
+    template<typename... Ts>
+    [[nodiscard]] constexpr bool is_one_of(Ts&&... strings) const
+    {
+        return (this->operator==(forward<Ts>(strings)) || ...);
+    }
 
-    Optional<size_t> length_in_code_points_if_known() const
+    template<typename... Ts>
+    [[nodiscard]] constexpr bool is_one_of_ignoring_ascii_case(Ts&&... strings) const
+    {
+        return (this->equals_ignoring_ascii_case(forward<Ts>(strings)) || ...);
+    }
+
+    [[nodiscard]] constexpr u32 hash() const
+    {
+        if (is_empty())
+            return 0;
+        return string_hash(reinterpret_cast<char const*>(m_string), length_in_code_units() * sizeof(char16_t));
+    }
+
+    [[nodiscard]] constexpr bool is_null() const { return m_string == nullptr; }
+    [[nodiscard]] constexpr bool is_empty() const { return length_in_code_units() == 0; }
+    [[nodiscard]] bool is_ascii() const;
+
+    [[nodiscard]] ALWAYS_INLINE bool validate(AllowInvalidCodeUnits allow_invalid_code_units = AllowInvalidCodeUnits::No) const
+    {
+        size_t valid_code_units = 0;
+        return validate(valid_code_units, allow_invalid_code_units);
+    }
+
+    [[nodiscard]] bool validate(size_t& valid_code_units, AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
+
+    [[nodiscard]] constexpr size_t length_in_code_units() const { return m_length_in_code_units; }
+
+    [[nodiscard]] ALWAYS_INLINE size_t length_in_code_points() const
+    {
+        if (m_length_in_code_points == NumericLimits<size_t>::max())
+            m_length_in_code_points = calculate_length_in_code_points();
+        return m_length_in_code_points;
+    }
+
+    constexpr Optional<size_t> length_in_code_points_if_known() const
     {
         if (m_length_in_code_points == NumericLimits<size_t>::max())
             return {};
         return m_length_in_code_points;
     }
 
-    u32 hash() const
+    constexpr void unsafe_set_code_point_length(size_t length) const { m_length_in_code_points = length; }
+
+    [[nodiscard]] constexpr char16_t code_unit_at(size_t index) const
     {
-        if (is_empty())
-            return 0;
-        return string_hash(reinterpret_cast<char const*>(m_code_units.data()), m_code_units.size() * sizeof(u16));
+        VERIFY(index < length_in_code_units());
+        return m_string[index];
     }
 
-    Utf16CodePointIterator begin() const { return { begin_ptr(), m_code_units.size() }; }
-    Utf16CodePointIterator end() const { return { end_ptr(), 0 }; }
+    [[nodiscard]] constexpr u32 code_point_at(size_t index) const
+    {
+        VERIFY(index < length_in_code_units());
+        u32 code_point = code_unit_at(index);
 
-    u16 const* data() const { return m_code_units.data(); }
-    char16_t const* char_data() const { return reinterpret_cast<char16_t const*>(data()); }
+        if (!UnicodeUtils::is_utf16_high_surrogate(code_point) && !UnicodeUtils::is_utf16_low_surrogate(code_point))
+            return code_point;
+        if (UnicodeUtils::is_utf16_low_surrogate(code_point) || (index + 1 == length_in_code_units()))
+            return code_point;
 
-    ReadonlySpan<u16> span() const { return m_code_units; }
+        auto second = code_unit_at(index + 1);
+        if (!UnicodeUtils::is_utf16_low_surrogate(second))
+            return code_point;
 
-    u16 code_unit_at(size_t index) const;
-    u32 code_point_at(size_t index) const;
+        return UnicodeUtils::decode_utf16_surrogate_pair(code_point, second);
+    }
 
-    size_t code_point_offset_of(size_t code_unit_offset) const;
-    size_t code_unit_offset_of(size_t code_point_offset) const;
-    size_t code_unit_offset_of(Utf16CodePointIterator const&) const;
+    [[nodiscard]] size_t code_unit_offset_of(size_t code_point_offset) const;
+    [[nodiscard]] size_t code_point_offset_of(size_t code_unit_offset) const;
 
-    Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const;
-    Utf16View substring_view(size_t code_unit_offset) const { return substring_view(code_unit_offset, length_in_code_units() - code_unit_offset); }
+    [[nodiscard]] constexpr Utf16CodePointIterator begin() const
+    {
+        return { m_string, length_in_code_units() };
+    }
 
-    Utf16View unicode_substring_view(size_t code_point_offset, size_t code_point_length) const;
-    Utf16View unicode_substring_view(size_t code_point_offset) const { return unicode_substring_view(code_point_offset, length_in_code_points() - code_point_offset); }
+    [[nodiscard]] constexpr Utf16CodePointIterator end() const
+    {
+        return { m_string + length_in_code_units(), 0 };
+    }
 
-    Optional<size_t> find_code_unit_offset(Utf16View const& needle, size_t start_offset = 0) const;
-    Optional<size_t> find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset = 0) const;
+    [[nodiscard]] constexpr Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const
+    {
+        VERIFY(code_unit_offset + code_unit_length <= length_in_code_units());
+        return { m_string + code_unit_offset, code_unit_length };
+    }
 
-    bool starts_with(Utf16View const&) const;
-    bool is_code_unit_less_than(Utf16View const& other) const;
+    [[nodiscard]] constexpr Utf16View substring_view(size_t code_unit_offset) const { return substring_view(code_unit_offset, length_in_code_units() - code_unit_offset); }
 
-    bool validate(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
-    bool validate(size_t& valid_code_units, AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
+    [[nodiscard]] Utf16View unicode_substring_view(size_t code_point_offset, size_t code_point_length) const;
+    [[nodiscard]] Utf16View unicode_substring_view(size_t code_point_offset) const { return unicode_substring_view(code_point_offset, length_in_code_points() - code_point_offset); }
 
-    bool equals_ignoring_case(Utf16View const&) const;
+    constexpr Optional<size_t> find_code_unit_offset(char16_t needle, size_t start_offset = 0) const
+    {
+        if (start_offset >= length_in_code_units())
+            return {};
+        return AK::memmem_optional(m_string + start_offset, (length_in_code_units() - start_offset) * sizeof(char16_t), &needle, sizeof(needle));
+    }
+
+    constexpr Optional<size_t> find_code_unit_offset(Utf16View const& needle, size_t start_offset = 0) const
+    {
+        return span().index_of(needle.span(), start_offset);
+    }
+
+    constexpr Optional<size_t> find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset = 0) const
+    {
+        Checked maximum_offset { start_offset };
+        maximum_offset += needle.length_in_code_units();
+        if (maximum_offset.has_overflow() || maximum_offset.value() > length_in_code_units())
+            return {};
+
+        if (needle.is_empty())
+            return start_offset;
+
+        size_t index = start_offset;
+        while (index <= length_in_code_units() - needle.length_in_code_units()) {
+            auto slice = substring_view(index, needle.length_in_code_units());
+            if (slice.equals_ignoring_case(needle))
+                return index;
+
+            index += slice.begin().length_in_code_units();
+        }
+
+        return {};
+    }
+
+    [[nodiscard]] constexpr bool starts_with(Utf16View const& needle) const
+    {
+        if (needle.is_empty())
+            return true;
+        if (is_empty())
+            return false;
+        if (needle.length_in_code_units() > length_in_code_units())
+            return false;
+
+        if (m_string == needle.m_string)
+            return true;
+        return span().starts_with(needle.span());
+    }
+
+    // https://infra.spec.whatwg.org/#code-unit-less-than
+    [[nodiscard]] constexpr bool is_code_unit_less_than(Utf16View const& other) const
+    {
+        auto common_length = min(length_in_code_units(), other.length_in_code_units());
+
+        for (size_t position = 0; position < common_length; ++position) {
+            auto this_code_unit = code_unit_at(position);
+            auto other_code_unit = other.code_unit_at(position);
+
+            if (this_code_unit != other_code_unit)
+                return this_code_unit < other_code_unit;
+        }
+
+        return length_in_code_units() < other.length_in_code_units();
+    }
 
 private:
-    u16 const* begin_ptr() const { return m_code_units.data(); }
-    u16 const* end_ptr() const { return begin_ptr() + m_code_units.size(); }
+    [[nodiscard]] size_t calculate_length_in_code_points() const;
 
-    size_t calculate_length_in_code_points() const;
-
-    ReadonlySpan<u16> m_code_units;
+    char16_t const* m_string { nullptr };
+    size_t m_length_in_code_units { 0 };
     mutable size_t m_length_in_code_points { NumericLimits<size_t>::max() };
 };
 
@@ -188,6 +349,13 @@ struct Traits<Utf16View> : public DefaultTraits<Utf16View> {
 
 }
 
+[[nodiscard]] ALWAYS_INLINE AK_STRING_VIEW_LITERAL_CONSTEVAL AK::Utf16View operator""sv(char16_t const* string, size_t length)
+{
+    AK::Utf16View view { string, length };
+    ASSERT(view.validate());
+    return view;
+}
+
 #if USING_AK_GLOBALLY
 using AK::Utf16Data;
 using AK::Utf16View;
diff --git a/Libraries/LibCore/ProcessWindows.cpp b/Libraries/LibCore/ProcessWindows.cpp
index 5fd5566dc08..d66b7989da4 100644
--- a/Libraries/LibCore/ProcessWindows.cpp
+++ b/Libraries/LibCore/ProcessWindows.cpp
@@ -111,7 +111,7 @@ ErrorOr<String> Process::get_name()
     if (!length)
         return Error::from_windows_error();
 
-    return String::from_utf16(Utf16View { { (u16*)path, length } });
+    return String::from_utf16(Utf16View { reinterpret_cast<char16_t const*>(path), length });
 }
 
 ErrorOr<void> Process::set_name(StringView, SetThreadName)
diff --git a/Libraries/LibJS/Runtime/AbstractOperations.cpp b/Libraries/LibJS/Runtime/AbstractOperations.cpp
index b410d6ed7fb..80b90c65a94 100644
--- a/Libraries/LibJS/Runtime/AbstractOperations.cpp
+++ b/Libraries/LibJS/Runtime/AbstractOperations.cpp
@@ -1271,33 +1271,33 @@ ThrowCompletionOr<String> get_substitution(VM& vm, Utf16View const& matched, Utf
         Optional<Utf16String> capture_string;
 
         // b. If templateRemainder starts with "$$", then
-        if (template_remainder.starts_with(u"$$")) {
+        if (template_remainder.starts_with(u"$$"sv)) {
             // i. Let ref be "$$".
-            ref = u"$$";
+            ref = u"$$"sv;
 
             // ii. Let refReplacement be "$".
-            ref_replacement = u"$";
+            ref_replacement = u"$"sv;
         }
         // c. Else if templateRemainder starts with "$`", then
-        else if (template_remainder.starts_with(u"$`")) {
+        else if (template_remainder.starts_with(u"$`"sv)) {
             // i. Let ref be "$`".
-            ref = u"$`";
+            ref = u"$`"sv;
 
             // ii. Let refReplacement be the substring of str from 0 to position.
             ref_replacement = str.substring_view(0, position);
         }
         // d. Else if templateRemainder starts with "$&", then
-        else if (template_remainder.starts_with(u"$&")) {
+        else if (template_remainder.starts_with(u"$&"sv)) {
             // i. Let ref be "$&".
-            ref = u"$&";
+            ref = u"$&"sv;
 
             // ii. Let refReplacement be matched.
             ref_replacement = matched;
         }
         // e. Else if templateRemainder starts with "$'" (0x0024 (DOLLAR SIGN) followed by 0x0027 (APOSTROPHE)), then
-        else if (template_remainder.starts_with(u"$'")) {
+        else if (template_remainder.starts_with(u"$'"sv)) {
             // i. Let ref be "$'".
-            ref = u"$'";
+            ref = u"$'"sv;
 
             // ii. Let matchLength be the length of matched.
             auto match_length = matched.length_in_code_units();
@@ -1311,7 +1311,7 @@ ThrowCompletionOr<String> get_substitution(VM& vm, Utf16View const& matched, Utf
             // v. NOTE: tailPos can exceed stringLength only if this abstract operation was invoked by a call to the intrinsic @@replace method of %RegExp.prototype% on an object whose "exec" property is not the intrinsic %RegExp.prototype.exec%.
         }
         // f. Else if templateRemainder starts with "$" followed by 1 or more decimal digits, then
-        else if (template_remainder.starts_with(u"$") && template_remainder.length_in_code_units() > 1 && is_ascii_digit(template_remainder.code_unit_at(1))) {
+        else if (template_remainder.starts_with(u"$"sv) && template_remainder.length_in_code_units() > 1 && is_ascii_digit(template_remainder.code_unit_at(1))) {
             // i. If templateRemainder starts with "$" followed by 2 or more decimal digits, let digitCount be 2. Otherwise, let digitCount be 1.
             size_t digit_count = 1;
 
@@ -1373,15 +1373,15 @@ ThrowCompletionOr<String> get_substitution(VM& vm, Utf16View const& matched, Utf
             }
         }
         // g. Else if templateRemainder starts with "$<", then
-        else if (template_remainder.starts_with(u"$<")) {
+        else if (template_remainder.starts_with(u"$<"sv)) {
             // i. Let gtPos be StringIndexOf(templateRemainder, ">", 0).
             // NOTE: We can actually start at index 2 because we know the string starts with "$<".
-            auto greater_than_position = string_index_of(template_remainder, u">", 2);
+            auto greater_than_position = string_index_of(template_remainder, u">"sv, 2);
 
             // ii. If gtPos = -1 or namedCaptures is undefined, then
             if (!greater_than_position.has_value() || named_captures.is_undefined()) {
                 // 1. Let ref be "$<".
-                ref = u"$<";
+                ref = u"$<"sv;
 
                 // 2. Let refReplacement be ref.
                 ref_replacement = ref;
@@ -1427,7 +1427,7 @@ ThrowCompletionOr<String> get_substitution(VM& vm, Utf16View const& matched, Utf
         auto ref_length = ref.length_in_code_units();
 
         // k. Set result to the string-concatenation of result and refReplacement.
-        result.append(ref_replacement.data(), ref_replacement.length_in_code_points());
+        result.append(ref_replacement.span().data(), ref_replacement.length_in_code_units());
 
         // j. Set templateRemainder to the substring of templateRemainder from refLength.
         // NOTE: We do this step last because refReplacement may point to templateRemainder.
diff --git a/Libraries/LibJS/Runtime/Utf16String.cpp b/Libraries/LibJS/Runtime/Utf16String.cpp
index ba94c32a82c..461590b53ea 100644
--- a/Libraries/LibJS/Runtime/Utf16String.cpp
+++ b/Libraries/LibJS/Runtime/Utf16String.cpp
@@ -44,7 +44,7 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
 {
     Utf16Data string;
     string.ensure_capacity(view.length_in_code_units());
-    string.unchecked_append(view.data(), view.length_in_code_units());
+    string.unchecked_append(view.span().data(), view.length_in_code_units());
 
     auto impl = create(move(string));
     if (auto length_in_code_points = view.length_in_code_points_if_known(); length_in_code_points.has_value())
diff --git a/Libraries/LibJS/Runtime/Utf16String.h b/Libraries/LibJS/Runtime/Utf16String.h
index 4e9fe940ce2..3dbee3c711f 100644
--- a/Libraries/LibJS/Runtime/Utf16String.h
+++ b/Libraries/LibJS/Runtime/Utf16String.h
@@ -48,7 +48,7 @@ private:
     mutable bool m_has_hash { false };
     mutable u32 m_hash { 0 };
     Utf16Data m_string;
-    Utf16View m_cached_view { m_string.span() };
+    Utf16View m_cached_view { m_string };
 };
 
 }
diff --git a/Libraries/LibRegex/RegexMatch.h b/Libraries/LibRegex/RegexMatch.h
index c6200f87fec..f1860ae13c3 100644
--- a/Libraries/LibRegex/RegexMatch.h
+++ b/Libraries/LibRegex/RegexMatch.h
@@ -147,9 +147,8 @@ public:
                     return Vector<RegexStringView> { view };
 
                 Vector<RegexStringView> views;
-                u16 newline = '\n';
                 while (!view.is_empty()) {
-                    auto position = AK::memmem_optional(view.data(), view.length_in_code_units() * sizeof(u16), &newline, sizeof(u16));
+                    auto position = view.find_code_unit_offset(u'\n');
                     if (!position.has_value())
                         break;
                     auto offset = position.value() / sizeof(u16);
diff --git a/Libraries/LibUnicode/ICU.cpp b/Libraries/LibUnicode/ICU.cpp
index 0b6cf867c29..6911c6b9409 100644
--- a/Libraries/LibUnicode/ICU.cpp
+++ b/Libraries/LibUnicode/ICU.cpp
@@ -159,8 +159,7 @@ String icu_string_to_string(icu::UnicodeString const& string)
 
 String icu_string_to_string(UChar const* string, i32 length)
 {
-    ReadonlySpan<u16> view { reinterpret_cast<u16 const*>(string), static_cast<size_t>(length) };
-    return MUST(Utf16View { view }.to_utf8());
+    return MUST(Utf16View { string, static_cast<size_t>(length) }.to_utf8());
 }
 
 }
diff --git a/Libraries/LibUnicode/Segmenter.cpp b/Libraries/LibUnicode/Segmenter.cpp
index 63023995d88..25cfa5e59f5 100644
--- a/Libraries/LibUnicode/Segmenter.cpp
+++ b/Libraries/LibUnicode/Segmenter.cpp
@@ -75,7 +75,7 @@ public:
 
     virtual void set_segmented_text(Utf16View const& text) override
     {
-        m_segmented_text = icu::UnicodeString { text.data(), static_cast<i32>(text.length_in_code_units()) };
+        m_segmented_text = icu::UnicodeString { text.span().data(), static_cast<i32>(text.length_in_code_units()) };
         m_segmenter->setText(m_segmented_text.get<icu::UnicodeString>());
     }
 
diff --git a/Libraries/LibWeb/DOM/CharacterData.cpp b/Libraries/LibWeb/DOM/CharacterData.cpp
index 3ad01cbc0a8..5125ab2b009 100644
--- a/Libraries/LibWeb/DOM/CharacterData.cpp
+++ b/Libraries/LibWeb/DOM/CharacterData.cpp
@@ -86,11 +86,12 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
     auto before_data = utf16_view.substring_view(0, offset);
     auto inserted_data_result = MUST(AK::utf8_to_utf16(data));
     auto after_data = utf16_view.substring_view(offset + count);
+
     Utf16Data full_data;
     full_data.ensure_capacity(before_data.length_in_code_units() + inserted_data_result.data.size() + after_data.length_in_code_units());
-    full_data.append(before_data.data(), before_data.length_in_code_units());
+    full_data.append(before_data.span().data(), before_data.length_in_code_units());
     full_data.extend(inserted_data_result.data);
-    full_data.append(after_data.data(), after_data.length_in_code_units());
+    full_data.append(after_data.span().data(), after_data.length_in_code_units());
     Utf16View full_view { full_data };
 
     bool characters_are_the_same = utf16_view == full_view;
diff --git a/Libraries/LibWeb/FileAPI/FileReader.cpp b/Libraries/LibWeb/FileAPI/FileReader.cpp
index 64cbee1925d..6d6ec1baac6 100644
--- a/Libraries/LibWeb/FileAPI/FileReader.cpp
+++ b/Libraries/LibWeb/FileAPI/FileReader.cpp
@@ -106,7 +106,7 @@ WebIDL::ExceptionOr<FileReader::Result> FileReader::blob_package_data(JS::Realm&
         return JS::ArrayBuffer::create(realm, move(bytes));
     case Type::BinaryString:
         // Return bytes as a binary string, in which every byte is represented by a code unit of equal value [0..255].
-        Vector<u16> builder;
+        Utf16Data builder;
         builder.ensure_capacity(bytes.size());
         for (auto byte : bytes.bytes())
             builder.unchecked_append(byte);
diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp
index 90247c876bf..ba9cfe63aae 100644
--- a/Tests/AK/TestUtf16View.cpp
+++ b/Tests/AK/TestUtf16View.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
+ * Copyright (c) 2021-2025, Tim Flynn <trflynn89@ladybird.org>
  *
  * SPDX-License-Identifier: BSD-2-Clause
  */
@@ -60,8 +60,7 @@ TEST_CASE(encode_utf8)
         EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), utf8_string);
     }
     {
-        auto encoded = Array { (u16)0xd83d };
-        Utf16View view { encoded };
+        Utf16View view { u"\xd83d"sv };
         EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv);
         EXPECT(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No).is_error());
     }
@@ -69,11 +68,8 @@ TEST_CASE(encode_utf8)
 
 TEST_CASE(decode_utf16)
 {
-    // Same string as the decode_utf8 test.
-    auto encoded = Array { (u16)0x041f, 0x0440, 0x0438, 0x0432, 0x0435, 0x0442, 0x002c, 0x0020, 0x043c, 0x0438, 0x0440, 0x0021, 0x0020, 0xd83d, 0xde00, 0x0020, 0x03b3, 0x03b5, 0x03b9, 0x03ac, 0x0020, 0x03c3, 0x03bf, 0x03c5, 0x0020, 0x03ba, 0x03cc, 0x03c3, 0x03bc, 0x03bf, 0x03c2, 0x0020, 0x3053, 0x3093, 0x306b, 0x3061, 0x306f, 0x4e16, 0x754c };
-
-    Utf16View view { encoded };
-    EXPECT_EQ(encoded.size(), view.length_in_code_units());
+    Utf16View view { u"Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv };
+    EXPECT_EQ(view.length_in_code_units(), 39uz);
 
     size_t valid_code_units = 0;
     EXPECT(view.validate(valid_code_units));
@@ -113,18 +109,18 @@ TEST_CASE(null_view)
 TEST_CASE(utf16_literal)
 {
     {
-        Utf16View view { u"" };
+        Utf16View view { u""sv };
         EXPECT(view.validate());
         EXPECT_EQ(view.length_in_code_units(), 0u);
     }
     {
-        Utf16View view { u"a" };
+        Utf16View view { u"a"sv };
         EXPECT(view.validate());
         EXPECT_EQ(view.length_in_code_units(), 1u);
         EXPECT_EQ(view.code_unit_at(0), 0x61u);
     }
     {
-        Utf16View view { u"abc" };
+        Utf16View view { u"abc"sv };
         EXPECT(view.validate());
         EXPECT_EQ(view.length_in_code_units(), 3u);
         EXPECT_EQ(view.code_unit_at(0), 0x61u);
@@ -132,7 +128,7 @@ TEST_CASE(utf16_literal)
         EXPECT_EQ(view.code_unit_at(2), 0x63u);
     }
     {
-        Utf16View view { u"🙃" };
+        Utf16View view { u"🙃"sv };
         EXPECT(view.validate());
         EXPECT_EQ(view.length_in_code_units(), 2u);
         EXPECT_EQ(view.code_unit_at(0), 0xd83du);
@@ -190,14 +186,14 @@ TEST_CASE(validate_invalid_utf16)
     Utf16View invalid;
     {
         // Lonely high surrogate.
-        invalid = u"\xd800";
+        invalid = u"\xd800"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 0uz);
 
         EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
         EXPECT_EQ(valid_code_units, 1uz);
 
-        invalid = u"\xdbff";
+        invalid = u"\xdbff"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 0uz);
 
@@ -206,14 +202,14 @@ TEST_CASE(validate_invalid_utf16)
     }
     {
         // Lonely low surrogate.
-        invalid = u"\xdc00";
+        invalid = u"\xdc00"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 0uz);
 
         EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
         EXPECT_EQ(valid_code_units, 1uz);
 
-        invalid = u"\xdfff";
+        invalid = u"\xdfff"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 0uz);
 
@@ -222,14 +218,14 @@ TEST_CASE(validate_invalid_utf16)
     }
     {
         // High surrogate followed by non-surrogate.
-        invalid = u"\xd800\x0000";
+        invalid = u"\xd800\x0000"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 0uz);
 
         EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
         EXPECT_EQ(valid_code_units, 2uz);
 
-        invalid = u"\xd800\xe000";
+        invalid = u"\xd800\xe000"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 0uz);
 
@@ -238,14 +234,14 @@ TEST_CASE(validate_invalid_utf16)
     }
     {
         // High surrogate followed by high surrogate.
-        invalid = u"\xd800\xd800";
+        invalid = u"\xd800\xd800"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 0uz);
 
         EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
         EXPECT_EQ(valid_code_units, 2uz);
 
-        invalid = u"\xd800\xdbff";
+        invalid = u"\xd800\xdbff"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 0uz);
 
@@ -254,14 +250,14 @@ TEST_CASE(validate_invalid_utf16)
     }
     {
         // Valid UTF-16 followed by invalid code units.
-        invalid = u"\x0041\x0041\xd800";
+        invalid = u"\x0041\x0041\xd800"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 2uz);
 
         EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
         EXPECT_EQ(valid_code_units, 3uz);
 
-        invalid = u"\x0041\x0041\xd800";
+        invalid = u"\x0041\x0041\xd800"sv;
         EXPECT(!invalid.validate(valid_code_units));
         EXPECT_EQ(valid_code_units, 2uz);
 
@@ -274,10 +270,8 @@ TEST_CASE(decode_invalid_utf16)
 {
     {
         // Lonely high surrogate.
-        auto invalid = Array { (u16)0x41, 0x42, 0xd800 };
-
-        Utf16View view { invalid };
-        EXPECT_EQ(invalid.size(), view.length_in_code_units());
+        Utf16View view { u"AB\xd800"sv };
+        EXPECT_EQ(view.length_in_code_units(), 3uz);
 
         auto expected = Array { (u32)0x41, 0x42, 0xfffd };
         EXPECT_EQ(expected.size(), view.length_in_code_points());
@@ -290,10 +284,8 @@ TEST_CASE(decode_invalid_utf16)
     }
     {
         // Lonely low surrogate.
-        auto invalid = Array { (u16)0x41, 0x42, 0xdc00 };
-
-        Utf16View view { invalid };
-        EXPECT_EQ(invalid.size(), view.length_in_code_units());
+        Utf16View view { u"AB\xdc00"sv };
+        EXPECT_EQ(view.length_in_code_units(), 3uz);
 
         auto expected = Array { (u32)0x41, 0x42, 0xfffd };
         EXPECT_EQ(expected.size(), view.length_in_code_points());
@@ -306,10 +298,8 @@ TEST_CASE(decode_invalid_utf16)
     }
     {
         // High surrogate followed by non-surrogate.
-        auto invalid = Array { (u16)0x41, 0x42, 0xd800, 0 };
-
-        Utf16View view { invalid };
-        EXPECT_EQ(invalid.size(), view.length_in_code_units());
+        Utf16View view { u"AB\xd800\x0000"sv };
+        EXPECT_EQ(view.length_in_code_units(), 4uz);
 
         auto expected = Array { (u32)0x41, 0x42, 0xfffd, 0 };
         EXPECT_EQ(expected.size(), view.length_in_code_points());
@@ -322,10 +312,8 @@ TEST_CASE(decode_invalid_utf16)
     }
     {
         // High surrogate followed by high surrogate.
-        auto invalid = Array { (u16)0x41, 0x42, 0xd800, 0xd800 };
-
-        Utf16View view { invalid };
-        EXPECT_EQ(invalid.size(), view.length_in_code_units());
+        Utf16View view { u"AB\xd800\xd800"sv };
+        EXPECT_EQ(view.length_in_code_units(), 4uz);
 
         auto expected = Array { (u32)0x41, 0x42, 0xfffd, 0xfffd };
         EXPECT_EQ(expected.size(), view.length_in_code_points());
@@ -341,13 +329,13 @@ TEST_CASE(decode_invalid_utf16)
 TEST_CASE(is_ascii)
 {
     EXPECT(Utf16View {}.is_ascii());
-    EXPECT(Utf16View { u"a" }.is_ascii());
-    EXPECT(Utf16View { u"foo" }.is_ascii());
-    EXPECT(Utf16View { u"foo\t\n\rbar\v\b123" }.is_ascii());
+    EXPECT(u"a"sv.is_ascii());
+    EXPECT(u"foo"sv.is_ascii());
+    EXPECT(u"foo\t\n\rbar\v\b123"sv.is_ascii());
 
-    EXPECT(!Utf16View { u"😀" }.is_ascii());
-    EXPECT(!Utf16View { u"foo 😀" }.is_ascii());
-    EXPECT(!Utf16View { u"😀 foo" }.is_ascii());
+    EXPECT(!u"😀"sv.is_ascii());
+    EXPECT(!u"foo 😀"sv.is_ascii());
+    EXPECT(!u"😀 foo"sv.is_ascii());
 }
 
 TEST_CASE(equals_ignoring_case)
@@ -387,28 +375,28 @@ TEST_CASE(substring_view)
 
 TEST_CASE(starts_with)
 {
-    EXPECT(Utf16View {}.starts_with(u""));
-    EXPECT(!Utf16View {}.starts_with(u" "));
+    EXPECT(Utf16View {}.starts_with(u""sv));
+    EXPECT(!Utf16View {}.starts_with(u" "sv));
 
-    EXPECT(Utf16View { u"a" }.starts_with(u""));
-    EXPECT(Utf16View { u"a" }.starts_with(u"a"));
-    EXPECT(!Utf16View { u"a" }.starts_with(u"b"));
-    EXPECT(!Utf16View { u"a" }.starts_with(u"ab"));
+    EXPECT(u"a"sv.starts_with(u""sv));
+    EXPECT(u"a"sv.starts_with(u"a"sv));
+    EXPECT(!u"a"sv.starts_with(u"b"sv));
+    EXPECT(!u"a"sv.starts_with(u"ab"sv));
 
-    EXPECT(Utf16View { u"abc" }.starts_with(u""));
-    EXPECT(Utf16View { u"abc" }.starts_with(u"a"));
-    EXPECT(Utf16View { u"abc" }.starts_with(u"ab"));
-    EXPECT(Utf16View { u"abc" }.starts_with(u"abc"));
-    EXPECT(!Utf16View { u"abc" }.starts_with(u"b"));
-    EXPECT(!Utf16View { u"abc" }.starts_with(u"bc"));
+    EXPECT(u"abc"sv.starts_with(u""sv));
+    EXPECT(u"abc"sv.starts_with(u"a"sv));
+    EXPECT(u"abc"sv.starts_with(u"ab"sv));
+    EXPECT(u"abc"sv.starts_with(u"abc"sv));
+    EXPECT(!u"abc"sv.starts_with(u"b"sv));
+    EXPECT(!u"abc"sv.starts_with(u"bc"sv));
 
-    auto emoji = Utf16View { u"😀🙃" };
+    auto emoji = u"😀🙃"sv;
 
-    EXPECT(emoji.starts_with(u""));
-    EXPECT(emoji.starts_with(u"😀"));
-    EXPECT(emoji.starts_with(u"😀🙃"));
-    EXPECT(!emoji.starts_with(u"a"));
-    EXPECT(!emoji.starts_with(u"🙃"));
+    EXPECT(emoji.starts_with(u""sv));
+    EXPECT(emoji.starts_with(u"😀"sv));
+    EXPECT(emoji.starts_with(u"😀🙃"sv));
+    EXPECT(!emoji.starts_with(u"a"sv));
+    EXPECT(!emoji.starts_with(u"🙃"sv));
 }
 
 TEST_CASE(find_code_unit_offset)
@@ -416,16 +404,16 @@ TEST_CASE(find_code_unit_offset)
     auto conversion_result = MUST(AK::utf8_to_utf16("😀foo😀bar"sv));
     Utf16View const view { conversion_result };
 
-    EXPECT_EQ(0u, view.find_code_unit_offset(u"").value());
-    EXPECT_EQ(4u, view.find_code_unit_offset(u"", 4).value());
-    EXPECT(!view.find_code_unit_offset(u"", 16).has_value());
+    EXPECT_EQ(0u, view.find_code_unit_offset(u""sv).value());
+    EXPECT_EQ(4u, view.find_code_unit_offset(u""sv, 4).value());
+    EXPECT(!view.find_code_unit_offset(u""sv, 16).has_value());
 
-    EXPECT_EQ(0u, view.find_code_unit_offset(u"😀").value());
-    EXPECT_EQ(5u, view.find_code_unit_offset(u"😀", 1).value());
-    EXPECT_EQ(2u, view.find_code_unit_offset(u"foo").value());
-    EXPECT_EQ(7u, view.find_code_unit_offset(u"bar").value());
+    EXPECT_EQ(0u, view.find_code_unit_offset(u"😀"sv).value());
+    EXPECT_EQ(5u, view.find_code_unit_offset(u"😀"sv, 1).value());
+    EXPECT_EQ(2u, view.find_code_unit_offset(u"foo"sv).value());
+    EXPECT_EQ(7u, view.find_code_unit_offset(u"bar"sv).value());
 
-    EXPECT(!view.find_code_unit_offset(u"baz").has_value());
+    EXPECT(!view.find_code_unit_offset(u"baz"sv).has_value());
 }
 
 TEST_CASE(find_code_unit_offset_ignoring_case)
@@ -433,13 +421,13 @@ TEST_CASE(find_code_unit_offset_ignoring_case)
     auto conversion_result = MUST(AK::utf8_to_utf16("😀Foo😀Bar"sv));
     Utf16View const view { conversion_result };
 
-    EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u"").value());
-    EXPECT_EQ(4u, view.find_code_unit_offset_ignoring_case(u"", 4).value());
-    EXPECT(!view.find_code_unit_offset_ignoring_case(u"", 16).has_value());
+    EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u""sv).value());
+    EXPECT_EQ(4u, view.find_code_unit_offset_ignoring_case(u""sv, 4).value());
+    EXPECT(!view.find_code_unit_offset_ignoring_case(u""sv, 16).has_value());
 
-    EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u"😀").value());
-    EXPECT_EQ(5u, view.find_code_unit_offset_ignoring_case(u"😀", 1).value());
-    EXPECT_EQ(2u, view.find_code_unit_offset_ignoring_case(u"foO").value());
-    EXPECT_EQ(7u, view.find_code_unit_offset_ignoring_case(u"baR").value());
-    EXPECT(!view.find_code_unit_offset_ignoring_case(u"baz").has_value());
+    EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u"😀"sv).value());
+    EXPECT_EQ(5u, view.find_code_unit_offset_ignoring_case(u"😀"sv, 1).value());
+    EXPECT_EQ(2u, view.find_code_unit_offset_ignoring_case(u"foO"sv).value());
+    EXPECT_EQ(7u, view.find_code_unit_offset_ignoring_case(u"baR"sv).value());
+    EXPECT(!view.find_code_unit_offset_ignoring_case(u"baz"sv).has_value());
 }