From 9582895759675b612bbd9fbadf6ce205db3b592d Mon Sep 17 00:00:00 2001
From: Timothy Flynn <trflynn89@pm.me>
Date: Wed, 9 Jul 2025 14:13:38 -0400
Subject: [PATCH] AK+LibJS+LibWeb+LibRegex: Replace AK::Utf16Data with
 AK::Utf16String

---
 AK/String.cpp                                 |  9 +--
 AK/String.h                                   |  1 -
 AK/Utf16View.cpp                              | 65 -------------------
 AK/Utf16View.h                                | 39 -----------
 Libraries/LibCore/ProcessWindows.cpp          |  2 +-
 Libraries/LibJS/Runtime/GlobalObject.cpp      | 16 ++---
 Libraries/LibJS/Runtime/RegExpObject.cpp      | 13 ++--
 Libraries/LibRegex/RegexByteCode.cpp          |  2 +-
 Libraries/LibRegex/RegexMatch.h               | 12 ++--
 Libraries/LibWeb/DOM/CharacterData.cpp        | 32 +++++----
 Libraries/LibWeb/DOM/Document.cpp             | 13 ++--
 .../LibWeb/Editing/Internal/Algorithms.cpp    | 26 ++++----
 Libraries/LibWeb/FileAPI/FileReader.cpp       |  7 +-
 Libraries/LibWeb/Infra/Strings.cpp            | 14 ++--
 Libraries/LibWeb/Layout/Viewport.cpp          | 14 ++--
 Libraries/LibWeb/Layout/Viewport.h            |  3 +-
 .../LibWeb/Painting/PaintableFragment.cpp     |  5 +-
 Libraries/LibWeb/Painting/PaintableFragment.h |  3 +-
 .../LibWeb/SVG/SVGTextContentElement.cpp      |  4 +-
 Tests/AK/TestUtf16View.cpp                    | 26 ++++----
 Tests/LibRegex/TestRegex.cpp                  |  4 +-
 Tests/LibUnicode/TestSegmenter.cpp            | 13 ++--
 22 files changed, 101 insertions(+), 222 deletions(-)
diff --git a/AK/String.cpp b/AK/String.cpp
index 00162a8aa37..8e1a744ac4c 100644
--- a/AK/String.cpp
+++ b/AK/String.cpp
@@ -67,11 +67,6 @@ ErrorOr<String> String::from_utf8(StringView view)
     return result;
 }
 
-ErrorOr<String> String::from_utf16(Utf16View const& utf16)
-{
-    return utf16.to_utf8();
-}
-
 ErrorOr<String> String::from_utf16_le_with_replacement_character(ReadonlyBytes bytes)
 {
     if (bytes.is_empty())
@@ -80,7 +75,7 @@ ErrorOr<String> String::from_utf16_le_with_replacement_character(ReadonlyBytes b
     auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
     auto utf16_length = bytes.size() / 2;
 
-    Utf16Data well_formed_utf16;
+    Vector<char16_t> well_formed_utf16;
 
     if (!validate_utf16_le(bytes)) {
         well_formed_utf16.resize(bytes.size());
@@ -109,7 +104,7 @@ ErrorOr<String> String::from_utf16_be_with_replacement_character(ReadonlyBytes b
     auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
     auto utf16_length = bytes.size() / 2;
 
-    Utf16Data well_formed_utf16;
+    Vector<char16_t> well_formed_utf16;
 
     if (!validate_utf16_le(bytes)) {
         well_formed_utf16.resize(bytes.size());
diff --git a/AK/String.h b/AK/String.h
index f30fbb6e405..639fa3160e3 100644
--- a/AK/String.h
+++ b/AK/String.h
@@ -69,7 +69,6 @@ public:
     [[nodiscard]] static String from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder&);
 
     // Creates a new String from a sequence of UTF-16 encoded code points.
-    static ErrorOr<String> from_utf16(Utf16View const&);
     static ErrorOr<String> from_utf16_le_with_replacement_character(ReadonlyBytes);
     static ErrorOr<String> from_utf16_be_with_replacement_character(ReadonlyBytes);
 
diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp
index ceb6e9c2cd4..995eee1754b 100644
--- a/AK/Utf16View.cpp
+++ b/AK/Utf16View.cpp
@@ -10,77 +10,12 @@
 #include <AK/StringView.h>
 #include <AK/Utf16String.h>
 #include <AK/Utf16View.h>
-#include <AK/Utf32View.h>
 #include <AK/Utf8View.h>
 
 #include <simdutf.h>
 
 namespace AK {
 
-template<OneOf<Utf8View, Utf32View> UtfViewType>
-static ErrorOr<Utf16ConversionResult> to_utf16_slow(UtfViewType const& view)
-{
-    Utf16Data utf16_data;
-    TRY(utf16_data.try_ensure_capacity(view.length()));
-
-    size_t code_point_count = 0;
-    for (auto code_point : view) {
-        TRY(UnicodeUtils::try_code_point_to_utf16(code_point, [&](auto code_unit) -> ErrorOr<void> {
-            TRY(utf16_data.try_append(code_unit));
-            return {};
-        }));
-
-        code_point_count++;
-    }
-
-    return Utf16ConversionResult { move(utf16_data), code_point_count };
-}
-
-ErrorOr<Utf16ConversionResult> utf8_to_utf16(StringView utf8_view)
-{
-    return utf8_to_utf16(Utf8View { utf8_view });
-}
-
-ErrorOr<Utf16ConversionResult> utf8_to_utf16(Utf8View const& utf8_view)
-{
-    if (utf8_view.is_empty())
-        return Utf16ConversionResult { Utf16Data {}, 0 };
-
-    // All callers want to allow lonely surrogates, which simdutf does not permit.
-    if (!utf8_view.validate(AllowLonelySurrogates::No)) [[unlikely]]
-        return to_utf16_slow(utf8_view);
-
-    auto const* data = reinterpret_cast<char const*>(utf8_view.bytes());
-    auto length = utf8_view.byte_length();
-
-    Utf16Data utf16_data;
-    TRY(utf16_data.try_resize(simdutf::utf16_length_from_utf8(data, length)));
-    // FIXME: simdutf _could_ be telling us about this, but it doesn't -- so we have to compute it again.
-    auto code_point_length = simdutf::count_utf8(data, length);
-
-    [[maybe_unused]] auto result = simdutf::convert_utf8_to_utf16(data, length, reinterpret_cast<char16_t*>(utf16_data.data()));
-    ASSERT(result == utf16_data.size());
-
-    return Utf16ConversionResult { utf16_data, code_point_length };
-}
-
-ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const& utf32_view)
-{
-    if (utf32_view.is_empty())
-        return Utf16ConversionResult { Utf16Data {}, 0 };
-
-    auto const* data = reinterpret_cast<char32_t const*>(utf32_view.code_points());
-    auto length = utf32_view.length();
-
-    Utf16Data utf16_data;
-    TRY(utf16_data.try_resize(simdutf::utf16_length_from_utf32(data, length)));
-
-    [[maybe_unused]] auto result = simdutf::convert_utf32_to_utf16(data, length, reinterpret_cast<char16_t*>(utf16_data.data()));
-    ASSERT(result == utf16_data.size());
-
-    return Utf16ConversionResult { utf16_data, length };
-}
-
 bool validate_utf16_le(ReadonlyBytes bytes)
 {
     return simdutf::validate_utf16le(reinterpret_cast<char16_t const*>(bytes.data()), bytes.size() / 2);
diff --git a/AK/Utf16View.h b/AK/Utf16View.h
index 49996a5bf5f..eec2d62eba1 100644
--- a/AK/Utf16View.h
+++ b/AK/Utf16View.h
@@ -23,16 +23,6 @@
 
 namespace AK {
 
-using Utf16Data = Vector<char16_t, 1>;
-
-struct Utf16ConversionResult {
-    Utf16Data data;
-    size_t code_point_count;
-};
-ErrorOr<Utf16ConversionResult> utf8_to_utf16(StringView);
-ErrorOr<Utf16ConversionResult> utf8_to_utf16(Utf8View const&);
-ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const&);
-
 [[nodiscard]] bool validate_utf16_le(ReadonlyBytes);
 [[nodiscard]] bool validate_utf16_be(ReadonlyBytes);
 
@@ -156,13 +146,6 @@ public:
         m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
     }
 
-    constexpr Utf16View(Utf16Data const& string)
-        : m_string { .utf16 = string.data() }
-        , m_length_in_code_units(string.size())
-    {
-        m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
-    }
-
     consteval Utf16View(StringView string)
         : m_string { .ascii = string.characters_without_null_termination() }
         , m_length_in_code_units(string.length())
@@ -170,15 +153,6 @@ public:
         VERIFY(all_of(string, AK::is_ascii));
     }
 
-    Utf16View(Utf16ConversionResult&&) = delete;
-    explicit Utf16View(Utf16ConversionResult const& conversion_result)
-        : m_string { .utf16 = conversion_result.data.data() }
-        , m_length_in_code_units(conversion_result.data.size())
-        , m_length_in_code_points(conversion_result.code_point_count)
-    {
-        m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
-    }
-
     ErrorOr<String> to_utf8(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
     ErrorOr<ByteString> to_byte_string(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
 
@@ -314,18 +288,6 @@ public:
         return m_length_in_code_points;
     }
 
-    constexpr Optional<size_t> length_in_code_points_if_known() const
-    {
-        if (has_ascii_storage())
-            return m_length_in_code_units;
-
-        if (m_length_in_code_points == NumericLimits<size_t>::max())
-            return {};
-        return m_length_in_code_points;
-    }
-
-    constexpr void unsafe_set_code_point_length(size_t length) const { m_length_in_code_points = length; }
-
     [[nodiscard]] constexpr char16_t code_unit_at(size_t index) const
     {
         VERIFY(index < length_in_code_units());
@@ -591,6 +553,5 @@ inline constexpr bool IsHashCompatible<Utf16String, Utf16View> = true;
 }
 
 #if USING_AK_GLOBALLY
-using AK::Utf16Data;
 using AK::Utf16View;
 #endif
diff --git a/Libraries/LibCore/ProcessWindows.cpp b/Libraries/LibCore/ProcessWindows.cpp
index d66b7989da4..e2070e64658 100644
--- a/Libraries/LibCore/ProcessWindows.cpp
+++ b/Libraries/LibCore/ProcessWindows.cpp
@@ -111,7 +111,7 @@ ErrorOr<String> Process::get_name()
     if (!length)
         return Error::from_windows_error();
 
-    return String::from_utf16(Utf16View { reinterpret_cast<char16_t const*>(path), length });
+    return MUST(Utf16View { reinterpret_cast<char16_t const*>(path), length }.to_utf8());
 }
 
 ErrorOr<void> Process::set_name(StringView, SetThreadName)
diff --git a/Libraries/LibJS/Runtime/GlobalObject.cpp b/Libraries/LibJS/Runtime/GlobalObject.cpp
index 6cfb243e82b..090a9a66d34 100644
--- a/Libraries/LibJS/Runtime/GlobalObject.cpp
+++ b/Libraries/LibJS/Runtime/GlobalObject.cpp
@@ -559,7 +559,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::encode_uri_component)
 JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
 {
     // 1. Set string to ? ToString(string).
-    auto string = TRY(vm.argument(0).to_byte_string(vm));
+    auto string = TRY(vm.argument(0).to_utf16_string(vm));
 
     // 3. Let R be the empty String.
     StringBuilder escaped;
@@ -570,29 +570,29 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
     // 2. Let length be the length of string.
     // 5. Let k be 0.
     // 6. Repeat, while k < length,
-    auto utf16_conversion = TRY_OR_THROW_OOM(vm, utf8_to_utf16(string));
-    for (auto code_point : utf16_conversion.data) {
+    for (size_t k = 0; k < string.length_in_code_units(); ++k) {
         // a. Let char be the code unit at index k within string.
+        auto code_unit = string.code_unit_at(k);
 
         // b. If unescapedSet contains char, then
         // NOTE: We know unescapedSet is ASCII-only, so ensure we have an ASCII codepoint before casting to char.
-        if (is_ascii(code_point) && unescaped_set.contains(static_cast<char>(code_point))) {
+        if (is_ascii(code_unit) && unescaped_set.contains(static_cast<char>(code_unit))) {
             // i. Let S be the String value containing the single code unit char.
-            escaped.append(code_point);
+            escaped.append(static_cast<char>(code_unit));
         }
         // c. Else,
         // i. Let n be the numeric value of char.
         // ii. If n < 256, then
-        else if (code_point < 256) {
+        else if (code_unit < 256) {
             // 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
             // 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
-            escaped.appendff("%{:02X}", code_point);
+            escaped.appendff("%{:02X}", code_unit);
         }
         // iii. Else,
         else {
             // 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
             // 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
-            escaped.appendff("%u{:04X}", code_point);
+            escaped.appendff("%u{:04X}", code_unit);
         }
 
         // d. Set R to the string-concatenation of R and S.
diff --git a/Libraries/LibJS/Runtime/RegExpObject.cpp b/Libraries/LibJS/Runtime/RegExpObject.cpp
index 817095a4468..0d87d35146d 100644
--- a/Libraries/LibJS/Runtime/RegExpObject.cpp
+++ b/Libraries/LibJS/Runtime/RegExpObject.cpp
@@ -93,26 +93,21 @@ ErrorOr<String, ParseRegexPatternError> parse_regex_pattern(StringView pattern,
     if (unicode && unicode_sets)
         return ParseRegexPatternError { MUST(String::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v')) };
 
-    auto utf16_pattern_result = AK::utf8_to_utf16(pattern);
-    if (utf16_pattern_result.is_error())
-        return ParseRegexPatternError { "Out of memory"_string };
-
-    auto utf16_result = utf16_pattern_result.release_value();
-    Utf16View utf16_pattern_view { utf16_result };
+    auto utf16_pattern = Utf16String::from_utf8(pattern);
     StringBuilder builder;
 
     // If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
     // code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
     auto previous_code_unit_was_backslash = false;
-    for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) {
+    for (size_t i = 0; i < utf16_pattern.length_in_code_units();) {
         if (unicode || unicode_sets) {
-            auto code_point = code_point_at(utf16_pattern_view, i);
+            auto code_point = code_point_at(utf16_pattern, i);
             builder.append_code_point(code_point.code_point);
             i += code_point.code_unit_count;
             continue;
         }
 
-        u16 code_unit = utf16_pattern_view.code_unit_at(i);
+        u16 code_unit = utf16_pattern.code_unit_at(i);
         ++i;
 
         if (code_unit > 0x7f) {
diff --git a/Libraries/LibRegex/RegexByteCode.cpp b/Libraries/LibRegex/RegexByteCode.cpp
index a3e272cc940..8cbef6af595 100644
--- a/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Libraries/LibRegex/RegexByteCode.cpp
@@ -512,7 +512,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
                 return ExecutionResult::Failed_ExecuteLowPrioForks;
 
             Optional<ByteString> str;
-            Utf16Data utf16;
+            Utf16String utf16;
             Vector<u32> data;
             data.ensure_capacity(length);
             for (size_t i = offset; i < offset + length; ++i)
diff --git a/Libraries/LibRegex/RegexMatch.h b/Libraries/LibRegex/RegexMatch.h
index b5ed2d5ca80..ddbc36fee83 100644
--- a/Libraries/LibRegex/RegexMatch.h
+++ b/Libraries/LibRegex/RegexMatch.h
@@ -8,14 +8,15 @@
 
 #include "Forward.h"
 #include "RegexOptions.h"
-#include <AK/Error.h>
 
 #include <AK/ByteString.h>
 #include <AK/COWVector.h>
+#include <AK/Error.h>
 #include <AK/FlyString.h>
 #include <AK/MemMem.h>
 #include <AK/StringBuilder.h>
 #include <AK/StringView.h>
+#include <AK/Utf16String.h>
 #include <AK/Utf16View.h>
 #include <AK/Utf32View.h>
 #include <AK/Utf8View.h>
@@ -110,7 +111,7 @@ public:
         return view;
     }
 
-    RegexStringView construct_as_same(Span<u32> data, Optional<ByteString>& optional_string_storage, Utf16Data& optional_utf16_storage) const
+    RegexStringView construct_as_same(Span<u32> data, Optional<ByteString>& optional_string_storage, Utf16String& optional_utf16_storage) const
     {
         auto view = m_view.visit(
             [&optional_string_storage, data]<typename T>(T const&) {
@@ -121,11 +122,8 @@ public:
                 return RegexStringView { T { *optional_string_storage } };
             },
             [&optional_utf16_storage, data](Utf16View) {
-                auto conversion_result = utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors();
-                optional_utf16_storage = conversion_result.data;
-                auto view = Utf16View { optional_utf16_storage };
-                view.unsafe_set_code_point_length(conversion_result.code_point_count);
-                return RegexStringView { view };
+                optional_utf16_storage = Utf16String::from_utf32({ data.data(), data.size() });
+                return RegexStringView { optional_utf16_storage.utf16_view() };
             });
 
         view.set_unicode(unicode());
diff --git a/Libraries/LibWeb/DOM/CharacterData.cpp b/Libraries/LibWeb/DOM/CharacterData.cpp
index 72eacdfb157..6f685fe0424 100644
--- a/Libraries/LibWeb/DOM/CharacterData.cpp
+++ b/Libraries/LibWeb/DOM/CharacterData.cpp
@@ -46,9 +46,8 @@ WebIDL::ExceptionOr<String> CharacterData::substring_data(size_t offset, size_t
 {
     // 1. Let length be node’s length.
     // FIXME: This is very inefficient!
-    auto utf16_result = MUST(AK::utf8_to_utf16(m_data));
-    Utf16View utf16_view { utf16_result };
-    auto length = utf16_view.length_in_code_units();
+    auto utf16_string = Utf16String::from_utf8(m_data);
+    auto length = utf16_string.length_in_code_units();
 
     // 2. If offset is greater than length, then throw an "IndexSizeError" DOMException.
     if (offset > length)
@@ -57,10 +56,10 @@ WebIDL::ExceptionOr<String> CharacterData::substring_data(size_t offset, size_t
     // 3. If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit
     //    to the end of node’s data, and then return.
     if (offset + count > length)
-        return MUST(utf16_view.substring_view(offset).to_utf8());
+        return MUST(utf16_string.substring_view(offset).to_utf8());
 
     // 4. Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data.
-    return MUST(utf16_view.substring_view(offset, count).to_utf8());
+    return MUST(utf16_string.substring_view(offset, count).to_utf8());
 }
 
 // https://dom.spec.whatwg.org/#concept-cd-replace
@@ -68,9 +67,8 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
 {
     // 1. Let length be node’s length.
     // FIXME: This is very inefficient!
-    auto utf16_data = MUST(AK::utf8_to_utf16(m_data));
-    Utf16View utf16_view { utf16_data };
-    auto length = utf16_view.length_in_code_units();
+    auto utf16_string = Utf16String::from_utf8(m_data);
+    auto length = utf16_string.length_in_code_units();
 
     // 2. If offset is greater than length, then throw an "IndexSizeError" DOMException.
     if (offset > length)
@@ -83,17 +81,17 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
     // 5. Insert data into node’s data after offset code units.
     // 6. Let delete offset be offset + data’s length.
     // 7. Starting from delete offset code units, remove count code units from node’s data.
-    auto before_data = utf16_view.substring_view(0, offset);
-    auto inserted_data_result = MUST(AK::utf8_to_utf16(data));
-    auto after_data = utf16_view.substring_view(offset + count);
+    auto before_data = utf16_string.substring_view(0, offset);
+    auto inserted_data = Utf16String::from_utf8(data);
+    auto after_data = utf16_string.substring_view(offset + count);
 
-    StringBuilder full_data(StringBuilder::Mode::UTF16, before_data.length_in_code_units() + inserted_data_result.data.size() + after_data.length_in_code_units());
+    StringBuilder full_data(StringBuilder::Mode::UTF16, before_data.length_in_code_units() + inserted_data.length_in_code_units() + after_data.length_in_code_units());
     full_data.append(before_data);
-    full_data.append(inserted_data_result.data);
+    full_data.append(inserted_data);
     full_data.append(after_data);
-    auto full_view = full_data.utf16_string_view();
 
-    bool characters_are_the_same = utf16_view == full_view;
+    auto full_view = full_data.utf16_string_view();
+    bool characters_are_the_same = utf16_string == full_view;
     auto old_data = m_data;
 
     // OPTIMIZATION: Skip UTF-8 encoding if the characters are the same.
@@ -123,14 +121,14 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
     //     start offset by data’s length and decrease it by count.
     for (auto* range : Range::live_ranges()) {
         if (range->start_container() == this && range->start_offset() > (offset + count))
-            range->set_start_offset(range->start_offset() + inserted_data_result.data.size() - count);
+            range->set_start_offset(range->start_offset() + inserted_data.length_in_code_units() - count);
     }
 
     // 11. For each live range whose end node is node and end offset is greater than offset plus count, increase its end
     //     offset by data’s length and decrease it by count.
     for (auto* range : Range::live_ranges()) {
         if (range->end_container() == this && range->end_offset() > (offset + count))
-            range->set_end_offset(range->end_offset() + inserted_data_result.data.size() - count);
+            range->set_end_offset(range->end_offset() + inserted_data.length_in_code_units() - count);
     }
 
     // 12. If node’s parent is non-null, then run the children changed steps for node’s parent.
diff --git a/Libraries/LibWeb/DOM/Document.cpp b/Libraries/LibWeb/DOM/Document.cpp
index 1a7524d1d4a..1d83b99c1cc 100644
--- a/Libraries/LibWeb/DOM/Document.cpp
+++ b/Libraries/LibWeb/DOM/Document.cpp
@@ -6158,8 +6158,7 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
     if (text_blocks.is_empty())
         return {};
 
-    auto utf16_query = MUST(AK::utf8_to_utf16(query));
-    Utf16View query_view { utf16_query };
+    auto utf16_query = Utf16String::from_utf8(query);
 
     Vector<GC::Root<Range>> matches;
     for (auto const& text_block : text_blocks) {
@@ -6169,8 +6168,8 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
         auto* match_start_position = text_block.positions.data();
         while (true) {
             auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
-                ? text_view.find_code_unit_offset_ignoring_case(query_view, offset)
-                : text_view.find_code_unit_offset(query_view, offset);
+                ? text_view.find_code_unit_offset_ignoring_case(utf16_query, offset)
+                : text_view.find_code_unit_offset(utf16_query, offset);
             if (!match_index.has_value())
                 break;
 
@@ -6181,15 +6180,15 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
             auto& start_dom_node = match_start_position->dom_node;
 
             auto* match_end_position = match_start_position;
-            for (; i < text_block.positions.size() - 1 && (match_index.value() + query_view.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
+            for (; i < text_block.positions.size() - 1 && (match_index.value() + utf16_query.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
                 match_end_position = &text_block.positions[i + 1];
 
             auto& end_dom_node = match_end_position->dom_node;
-            auto end_position = match_index.value() + query_view.length_in_code_units() - match_end_position->start_offset;
+            auto end_position = match_index.value() + utf16_query.length_in_code_units() - match_end_position->start_offset;
 
             matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position));
             match_start_position = match_end_position;
-            offset = match_index.value() + query_view.length_in_code_units() + 1;
+            offset = match_index.value() + utf16_query.length_in_code_units() + 1;
             if (offset >= text_view.length_in_code_units())
                 break;
         }
diff --git a/Libraries/LibWeb/Editing/Internal/Algorithms.cpp b/Libraries/LibWeb/Editing/Internal/Algorithms.cpp
index cc8d165d5ab..1ad0579e9f1 100644
--- a/Libraries/LibWeb/Editing/Internal/Algorithms.cpp
+++ b/Libraries/LibWeb/Editing/Internal/Algorithms.cpp
@@ -384,9 +384,9 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
             auto parent_white_space_collapse = resolved_keyword(*start_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
 
             // FIXME: Find a way to get code points directly from the UTF-8 string
-            auto start_node_data = *start_node->text_content();
-            auto utf16_code_units = MUST(AK::utf8_to_utf16(start_node_data));
-            auto offset_minus_one_code_point = Utf16View { utf16_code_units }.code_point_at(start_offset - 1);
+            auto start_node_data = Utf16String::from_utf8(*start_node->text_content());
+            auto offset_minus_one_code_point = start_node_data.code_point_at(start_offset - 1);
+
             if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_minus_one_code_point == 0x20 || offset_minus_one_code_point == 0xA0)) {
                 --start_offset;
                 continue;
@@ -437,9 +437,9 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
             auto parent_white_space_collapse = resolved_keyword(*end_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
 
             // FIXME: Find a way to get code points directly from the UTF-8 string
-            auto end_node_data = *end_node->text_content();
-            auto utf16_code_units = MUST(AK::utf8_to_utf16(end_node_data));
-            auto offset_code_point = Utf16View { utf16_code_units }.code_point_at(end_offset);
+            auto end_node_data = Utf16String::from_utf8(*end_node->text_content());
+            auto offset_code_point = end_node_data.code_point_at(end_offset);
+
             if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_code_point == 0x20 || offset_code_point == 0xA0)) {
                 // 1. If fix collapsed space is true, and collapse spaces is true, and the end offsetth
                 //    code unit of end node's data is a space (0x0020): call deleteData(end offset, 1)
@@ -556,16 +556,14 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
             // 1. Remove the first code unit from replacement whitespace, and let element be that
             //    code unit.
             // FIXME: Find a way to get code points directly from the UTF-8 string
-            auto replacement_whitespace_utf16 = MUST(AK::utf8_to_utf16(replacement_whitespace));
-            auto replacement_whitespace_utf16_view = Utf16View { replacement_whitespace_utf16 };
-            replacement_whitespace = MUST(String::from_utf16({ replacement_whitespace_utf16_view.substring_view(1) }));
-            auto element = replacement_whitespace_utf16_view.code_point_at(0);
+            auto replacement_whitespace_utf16 = Utf16String::from_utf8(replacement_whitespace);
+            replacement_whitespace = MUST(replacement_whitespace_utf16.substring_view(1).to_utf8());
+            auto element = replacement_whitespace_utf16.code_point_at(0);
 
             // 2. If element is not the same as the start offsetth code unit of start node's data:
-            auto start_node_data = *start_node->text_content();
-            auto start_node_utf16 = MUST(AK::utf8_to_utf16(start_node_data));
-            auto start_node_utf16_view = Utf16View { start_node_utf16 };
-            auto start_node_code_point = start_node_utf16_view.code_point_at(start_offset);
+            auto start_node_data = Utf16String::from_utf8(*start_node->text_content());
+            auto start_node_code_point = start_node_data.code_point_at(start_offset);
+
             if (element != start_node_code_point) {
                 // 1. Call insertData(start offset, element) on start node.
                 auto& start_node_character_data = static_cast<DOM::CharacterData&>(*start_node);
diff --git a/Libraries/LibWeb/FileAPI/FileReader.cpp b/Libraries/LibWeb/FileAPI/FileReader.cpp
index 067519e2575..39a02437700 100644
--- a/Libraries/LibWeb/FileAPI/FileReader.cpp
+++ b/Libraries/LibWeb/FileAPI/FileReader.cpp
@@ -106,11 +106,10 @@ WebIDL::ExceptionOr<FileReader::Result> FileReader::blob_package_data(JS::Realm&
         return JS::ArrayBuffer::create(realm, move(bytes));
     case Type::BinaryString:
         // Return bytes as a binary string, in which every byte is represented by a code unit of equal value [0..255].
-        Utf16Data builder;
-        builder.ensure_capacity(bytes.size());
+        StringBuilder builder(StringBuilder::Mode::UTF16, bytes.size());
         for (auto byte : bytes.bytes())
-            builder.unchecked_append(byte);
-        return MUST(Utf16View { builder }.to_utf8());
+            builder.append_code_unit(byte);
+        return MUST(builder.utf16_string_view().to_utf8());
     }
     VERIFY_NOT_REACHED();
 }
diff --git a/Libraries/LibWeb/Infra/Strings.cpp b/Libraries/LibWeb/Infra/Strings.cpp
index cbff0192f84..11c4ff45b17 100644
--- a/Libraries/LibWeb/Infra/Strings.cpp
+++ b/Libraries/LibWeb/Infra/Strings.cpp
@@ -12,6 +12,7 @@
 #include <AK/FlyString.h>
 #include <AK/GenericLexer.h>
 #include <AK/String.h>
+#include <AK/Utf16String.h>
 #include <AK/Utf16View.h>
 #include <AK/Utf8View.h>
 #include <LibWeb/Infra/CharacterTypes.h>
@@ -63,10 +64,8 @@ ErrorOr<String> strip_and_collapse_whitespace(StringView string)
 // https://infra.spec.whatwg.org/#code-unit-prefix
 bool is_code_unit_prefix(StringView potential_prefix_utf8, StringView input_utf8)
 {
-    auto potential_prefix_utf16_bytes = MUST(utf8_to_utf16(potential_prefix_utf8));
-    auto input_utf16_bytes = MUST(utf8_to_utf16(input_utf8));
-    Utf16View potential_prefix { potential_prefix_utf16_bytes };
-    Utf16View input { input_utf16_bytes };
+    auto potential_prefix = Utf16String::from_utf8(potential_prefix_utf8);
+    auto input = Utf16String::from_utf8(input_utf8);
 
     // 1. Let i be 0.
     size_t i = 0;
@@ -148,9 +147,10 @@ bool code_unit_less_than(StringView a, StringView b)
     if (a.is_ascii() && b.is_ascii())
         return a < b;
 
-    auto a_utf16 = MUST(utf8_to_utf16(a));
-    auto b_utf16 = MUST(utf8_to_utf16(b));
-    return Utf16View { a_utf16 }.is_code_unit_less_than(Utf16View { b_utf16 });
+    auto a_utf16 = Utf16String::from_utf8(a);
+    auto b_utf16 = Utf16String::from_utf8(b);
+
+    return a_utf16.utf16_view().is_code_unit_less_than(b_utf16);
 }
 
 }
diff --git a/Libraries/LibWeb/Layout/Viewport.cpp b/Libraries/LibWeb/Layout/Viewport.cpp
index 6fcc55ced36..8f0fac3bb3a 100644
--- a/Libraries/LibWeb/Layout/Viewport.cpp
+++ b/Libraries/LibWeb/Layout/Viewport.cpp
@@ -50,17 +50,18 @@ Vector<Viewport::TextBlock> const& Viewport::text_blocks()
 
 void Viewport::update_text_blocks()
 {
-    StringBuilder builder;
+    StringBuilder builder(StringBuilder::Mode::UTF16);
     size_t current_start_position = 0;
     Vector<TextPosition> text_positions;
     Vector<TextBlock> text_blocks;
+
     for_each_in_inclusive_subtree([&](auto const& layout_node) {
         if (layout_node.display().is_none() || !layout_node.first_paintable() || !layout_node.first_paintable()->is_visible())
             return TraversalDecision::Continue;
 
         if (layout_node.is_box() || layout_node.is_generated()) {
             if (!builder.is_empty()) {
-                text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
+                text_blocks.append({ builder.to_utf16_string(), text_positions });
                 current_start_position = 0;
                 text_positions.clear_with_capacity();
                 builder.clear();
@@ -79,10 +80,9 @@ void Viewport::update_text_blocks()
                     text_positions.empend(dom_node, current_start_position);
                 }
 
-                auto const& current_node_text = text_node->text_for_rendering();
-                auto const current_node_text_utf16 = MUST(AK::utf8_to_utf16(current_node_text));
-                current_start_position += current_node_text_utf16.data.size();
-                builder.append(move(current_node_text));
+                auto const& current_node_text = Utf16String::from_utf8(text_node->text_for_rendering());
+                current_start_position += current_node_text.length_in_code_units();
+                builder.append(current_node_text);
             }
         }
 
@@ -90,7 +90,7 @@ void Viewport::update_text_blocks()
     });
 
     if (!builder.is_empty())
-        text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
+        text_blocks.append({ builder.to_utf16_string(), text_positions });
 
     m_text_blocks = move(text_blocks);
 }
diff --git a/Libraries/LibWeb/Layout/Viewport.h b/Libraries/LibWeb/Layout/Viewport.h
index a39a2b1fd6b..fb5385e1dd1 100644
--- a/Libraries/LibWeb/Layout/Viewport.h
+++ b/Libraries/LibWeb/Layout/Viewport.h
@@ -6,6 +6,7 @@
 
 #pragma once
 
+#include <AK/Utf16String.h>
 #include <LibWeb/DOM/Document.h>
 #include <LibWeb/Layout/BlockContainer.h>
 
@@ -24,7 +25,7 @@ public:
         size_t start_offset { 0 };
     };
     struct TextBlock {
-        AK::Utf16ConversionResult text;
+        Utf16String text;
         Vector<TextPosition> positions;
     };
     Vector<TextBlock> const& text_blocks();
diff --git a/Libraries/LibWeb/Painting/PaintableFragment.cpp b/Libraries/LibWeb/Painting/PaintableFragment.cpp
index 00541bb278a..da4ac8c7df9 100644
--- a/Libraries/LibWeb/Painting/PaintableFragment.cpp
+++ b/Libraries/LibWeb/Painting/PaintableFragment.cpp
@@ -252,9 +252,8 @@ Utf16View PaintableFragment::utf16_view() const
         return {};
 
     if (!m_text_in_utf16.has_value())
-        m_text_in_utf16 = MUST(AK::utf8_to_utf16(utf8_view()));
-
-    return Utf16View { m_text_in_utf16.value() };
+        m_text_in_utf16 = Utf16String::from_utf8(utf8_view().as_string());
+    return *m_text_in_utf16;
 }
 
 }
diff --git a/Libraries/LibWeb/Painting/PaintableFragment.h b/Libraries/LibWeb/Painting/PaintableFragment.h
index 0bd69235cd8..bd4f68ec1a3 100644
--- a/Libraries/LibWeb/Painting/PaintableFragment.h
+++ b/Libraries/LibWeb/Painting/PaintableFragment.h
@@ -6,6 +6,7 @@
 
 #pragma once
 
+#include <AK/Utf16String.h>
 #include <LibGfx/TextLayout.h>
 #include <LibWeb/Layout/Node.h>
 #include <LibWeb/Painting/ShadowData.h>
@@ -64,7 +65,7 @@ private:
     CSS::WritingMode m_writing_mode;
     Vector<ShadowData> m_shadows;
     CSSPixels m_text_decoration_thickness { 0 };
-    mutable Optional<AK::Utf16ConversionResult> m_text_in_utf16;
+    mutable Optional<Utf16String> m_text_in_utf16;
 };
 
 }
diff --git a/Libraries/LibWeb/SVG/SVGTextContentElement.cpp b/Libraries/LibWeb/SVG/SVGTextContentElement.cpp
index 2c868316072..5ae4e36448f 100644
--- a/Libraries/LibWeb/SVG/SVGTextContentElement.cpp
+++ b/Libraries/LibWeb/SVG/SVGTextContentElement.cpp
@@ -48,8 +48,8 @@ ByteString SVGTextContentElement::text_contents() const
 // https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
 WebIDL::ExceptionOr<WebIDL::Long> SVGTextContentElement::get_number_of_chars() const
 {
-    auto chars = TRY_OR_THROW_OOM(vm(), utf8_to_utf16(text_contents())).data;
-    return static_cast<WebIDL::Long>(chars.size());
+    auto length_in_code_units = AK::utf16_code_unit_length_from_utf8(text_contents());
+    return static_cast<WebIDL::Long>(length_in_code_units);
 }
 
 GC::Ref<Geometry::DOMPoint> SVGTextContentElement::get_start_position_of_char(WebIDL::UnsignedLong charnum)
diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp
index ef7086e71c2..eed4f748fe0 100644
--- a/Tests/AK/TestUtf16View.cpp
+++ b/Tests/AK/TestUtf16View.cpp
@@ -15,7 +15,7 @@
 
 TEST_CASE(decode_ascii)
 {
-    auto string = MUST(AK::utf8_to_utf16("Hello World!11"sv));
+    auto string = Utf16String::from_utf8("Hello World!11"sv);
     Utf16View view { string };
 
     size_t valid_code_units = 0;
@@ -34,7 +34,7 @@ TEST_CASE(decode_ascii)
 
 TEST_CASE(decode_utf8)
 {
-    auto string = MUST(AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv));
+    auto string = Utf16String::from_utf8("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv);
     Utf16View view { string };
 
     size_t valid_code_units = 0;
@@ -55,7 +55,7 @@ TEST_CASE(encode_utf8)
 {
     {
         auto utf8_string = "Привет, мир! 😀 γειά σου κόσμος こんにちは世界"_string;
-        auto string = MUST(AK::utf8_to_utf16(utf8_string));
+        auto string = Utf16String::from_utf8(utf8_string);
         Utf16View view { string };
         EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::Yes)), utf8_string);
         EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::No)), utf8_string);
@@ -139,7 +139,7 @@ TEST_CASE(utf16_literal)
 
 TEST_CASE(iterate_utf16)
 {
-    auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
+    auto string = Utf16String::from_utf8("Привет 😀"sv);
     Utf16View view { string };
     auto iterator = view.begin();
 
@@ -371,16 +371,16 @@ TEST_CASE(to_ascii_titlecase)
 
 TEST_CASE(equals_ignoring_case)
 {
-    auto string1 = MUST(AK::utf8_to_utf16("foobar"sv));
-    auto string2 = MUST(AK::utf8_to_utf16("FooBar"sv));
+    auto string1 = Utf16String::from_utf8("foobar"sv);
+    auto string2 = Utf16String::from_utf8("FooBar"sv);
     EXPECT(Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
 
-    string1 = MUST(AK::utf8_to_utf16(""sv));
-    string2 = MUST(AK::utf8_to_utf16(""sv));
+    string1 = Utf16String::from_utf8(""sv);
+    string2 = Utf16String::from_utf8(""sv);
     EXPECT(Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
 
-    string1 = MUST(AK::utf8_to_utf16(""sv));
-    string2 = MUST(AK::utf8_to_utf16("FooBar"sv));
+    string1 = Utf16String::from_utf8(""sv);
+    string2 = Utf16String::from_utf8("FooBar"sv);
     EXPECT(!Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
 }
 
@@ -425,7 +425,7 @@ TEST_CASE(replace)
 
 TEST_CASE(substring_view)
 {
-    auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
+    auto string = Utf16String::from_utf8("Привет 😀"sv);
     {
         Utf16View view { string };
         view = view.substring_view(7, 2);
@@ -532,7 +532,7 @@ TEST_CASE(starts_with)
 
 TEST_CASE(find_code_unit_offset)
 {
-    auto conversion_result = MUST(AK::utf8_to_utf16("😀foo😀bar"sv));
+    auto conversion_result = Utf16String::from_utf8("😀foo😀bar"sv);
     Utf16View const view { conversion_result };
 
     EXPECT_EQ(0u, view.find_code_unit_offset(u""sv).value());
@@ -549,7 +549,7 @@ TEST_CASE(find_code_unit_offset)
 
 TEST_CASE(find_code_unit_offset_ignoring_case)
 {
-    auto conversion_result = MUST(AK::utf8_to_utf16("😀Foo😀Bar"sv));
+    auto conversion_result = Utf16String::from_utf8("😀Foo😀Bar"sv);
     Utf16View const view { conversion_result };
 
     EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u""sv).value());
diff --git a/Tests/LibRegex/TestRegex.cpp b/Tests/LibRegex/TestRegex.cpp
index 8bb355106ab..e0345476006 100644
--- a/Tests/LibRegex/TestRegex.cpp
+++ b/Tests/LibRegex/TestRegex.cpp
@@ -823,7 +823,7 @@ TEST_CASE(ECMA262_unicode_match)
     for (auto& test : tests) {
         Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
 
-        auto subject = MUST(AK::utf8_to_utf16(test.subject));
+        auto subject = Utf16String::from_utf8(test.subject);
         Utf16View view { subject };
 
         if constexpr (REGEX_DEBUG) {
@@ -956,7 +956,7 @@ TEST_CASE(ECMA262_property_match)
     for (auto& test : tests) {
         Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
 
-        auto subject = MUST(AK::utf8_to_utf16(test.subject));
+        auto subject = Utf16String::from_utf8(test.subject);
         Utf16View view { subject };
 
         if constexpr (REGEX_DEBUG) {
diff --git a/Tests/LibUnicode/TestSegmenter.cpp b/Tests/LibUnicode/TestSegmenter.cpp
index ccb17bd26ad..66bb870636c 100644
--- a/Tests/LibUnicode/TestSegmenter.cpp
+++ b/Tests/LibUnicode/TestSegmenter.cpp
@@ -9,6 +9,7 @@
 #include <AK/Array.h>
 #include <AK/String.h>
 #include <AK/StringView.h>
+#include <AK/Utf16String.h>
 #include <AK/Utf16View.h>
 #include <AK/Vector.h>
 #include <LibUnicode/Segmenter.h>
@@ -155,21 +156,21 @@ TEST_CASE(out_of_bounds)
         EXPECT(!result.has_value());
     }
     {
-        auto text = MUST(AK::utf8_to_utf16("foo"sv));
+        auto text = u"foo"_utf16;
 
         auto segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
-        segmenter->set_segmented_text(Utf16View { text });
+        segmenter->set_segmented_text(text);
 
-        auto result = segmenter->previous_boundary(text.data.size() + 1);
+        auto result = segmenter->previous_boundary(text.length_in_code_units() + 1);
         EXPECT(result.has_value());
 
-        result = segmenter->next_boundary(text.data.size() + 1);
+        result = segmenter->next_boundary(text.length_in_code_units() + 1);
         EXPECT(!result.has_value());
 
-        result = segmenter->previous_boundary(text.data.size());
+        result = segmenter->previous_boundary(text.length_in_code_units());
         EXPECT(result.has_value());
 
-        result = segmenter->next_boundary(text.data.size());
+        result = segmenter->next_boundary(text.length_in_code_units());
         EXPECT(!result.has_value());
 
         result = segmenter->next_boundary(0);