AK+LibJS+LibWeb+LibRegex: Replace AK::Utf16Data with AK::Utf16String

Author: https://github.com/trflynn89 Commit: 9582895759 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5388 Reviewed-by: https://github.com/shannonbooth ✅
2025-09-17 06:52:23 +00:00 · 2025-07-09 14:13:38 -04:00 · 2025-07-09 14:13:38 -04:00 · 9582895759 · 2025-07-18 16:46:53 +00:00
commit 9582895759
parent a43cb15e81
22 changed files with 101 additions and 222 deletions
--- a/AK/String.cpp
+++ b/AK/String.cpp
@ -67,11 +67,6 @@ ErrorOr<String> String::from_utf8(StringView view)
    return result;
 }

-ErrorOr<String> String::from_utf16(Utf16View const& utf16)
-{
-    return utf16.to_utf8();
-}
-
 ErrorOr<String> String::from_utf16_le_with_replacement_character(ReadonlyBytes bytes)
 {
    if (bytes.is_empty())
@ -80,7 +75,7 @@ ErrorOr<String> String::from_utf16_le_with_replacement_character(ReadonlyBytes b
    auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
    auto utf16_length = bytes.size() / 2;

-    Utf16Data well_formed_utf16;
+    Vector<char16_t> well_formed_utf16;

    if (!validate_utf16_le(bytes)) {
        well_formed_utf16.resize(bytes.size());
@ -109,7 +104,7 @@ ErrorOr<String> String::from_utf16_be_with_replacement_character(ReadonlyBytes b
    auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
    auto utf16_length = bytes.size() / 2;

-    Utf16Data well_formed_utf16;
+    Vector<char16_t> well_formed_utf16;

    if (!validate_utf16_le(bytes)) {
        well_formed_utf16.resize(bytes.size());
--- a/AK/String.h
+++ b/AK/String.h
@ -69,7 +69,6 @@ public:
    [[nodiscard]] static String from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder&);

    // Creates a new String from a sequence of UTF-16 encoded code points.
-    static ErrorOr<String> from_utf16(Utf16View const&);
    static ErrorOr<String> from_utf16_le_with_replacement_character(ReadonlyBytes);
    static ErrorOr<String> from_utf16_be_with_replacement_character(ReadonlyBytes);

--- a/AK/Utf16View.cpp
+++ b/AK/Utf16View.cpp
@ -10,77 +10,12 @@
 #include <AK/StringView.h>
 #include <AK/Utf16String.h>
 #include <AK/Utf16View.h>
-#include <AK/Utf32View.h>
 #include <AK/Utf8View.h>

 #include <simdutf.h>

 namespace AK {

-template<OneOf<Utf8View, Utf32View> UtfViewType>
-static ErrorOr<Utf16ConversionResult> to_utf16_slow(UtfViewType const& view)
-{
-    Utf16Data utf16_data;
-    TRY(utf16_data.try_ensure_capacity(view.length()));
-
-    size_t code_point_count = 0;
-    for (auto code_point : view) {
-        TRY(UnicodeUtils::try_code_point_to_utf16(code_point, [&](auto code_unit) -> ErrorOr<void> {
-            TRY(utf16_data.try_append(code_unit));
-            return {};
-        }));
-
-        code_point_count++;
-    }
-
-    return Utf16ConversionResult { move(utf16_data), code_point_count };
-}
-
-ErrorOr<Utf16ConversionResult> utf8_to_utf16(StringView utf8_view)
-{
-    return utf8_to_utf16(Utf8View { utf8_view });
-}
-
-ErrorOr<Utf16ConversionResult> utf8_to_utf16(Utf8View const& utf8_view)
-{
-    if (utf8_view.is_empty())
-        return Utf16ConversionResult { Utf16Data {}, 0 };
-
-    // All callers want to allow lonely surrogates, which simdutf does not permit.
-    if (!utf8_view.validate(AllowLonelySurrogates::No)) [[unlikely]]
-        return to_utf16_slow(utf8_view);
-
-    auto const* data = reinterpret_cast<char const*>(utf8_view.bytes());
-    auto length = utf8_view.byte_length();
-
-    Utf16Data utf16_data;
-    TRY(utf16_data.try_resize(simdutf::utf16_length_from_utf8(data, length)));
-    // FIXME: simdutf _could_ be telling us about this, but it doesn't -- so we have to compute it again.
-    auto code_point_length = simdutf::count_utf8(data, length);
-
-    [[maybe_unused]] auto result = simdutf::convert_utf8_to_utf16(data, length, reinterpret_cast<char16_t*>(utf16_data.data()));
-    ASSERT(result == utf16_data.size());
-
-    return Utf16ConversionResult { utf16_data, code_point_length };
-}
-
-ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const& utf32_view)
-{
-    if (utf32_view.is_empty())
-        return Utf16ConversionResult { Utf16Data {}, 0 };
-
-    auto const* data = reinterpret_cast<char32_t const*>(utf32_view.code_points());
-    auto length = utf32_view.length();
-
-    Utf16Data utf16_data;
-    TRY(utf16_data.try_resize(simdutf::utf16_length_from_utf32(data, length)));
-
-    [[maybe_unused]] auto result = simdutf::convert_utf32_to_utf16(data, length, reinterpret_cast<char16_t*>(utf16_data.data()));
-    ASSERT(result == utf16_data.size());
-
-    return Utf16ConversionResult { utf16_data, length };
-}
-
 bool validate_utf16_le(ReadonlyBytes bytes)
 {
    return simdutf::validate_utf16le(reinterpret_cast<char16_t const*>(bytes.data()), bytes.size() / 2);
--- a/AK/Utf16View.h
+++ b/AK/Utf16View.h
@ -23,16 +23,6 @@

 namespace AK {

-using Utf16Data = Vector<char16_t, 1>;
-
-struct Utf16ConversionResult {
-    Utf16Data data;
-    size_t code_point_count;
-};
-ErrorOr<Utf16ConversionResult> utf8_to_utf16(StringView);
-ErrorOr<Utf16ConversionResult> utf8_to_utf16(Utf8View const&);
-ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const&);
-
 [[nodiscard]] bool validate_utf16_le(ReadonlyBytes);
 [[nodiscard]] bool validate_utf16_be(ReadonlyBytes);

@ -156,13 +146,6 @@ public:
        m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
    }

-    constexpr Utf16View(Utf16Data const& string)
-        : m_string { .utf16 = string.data() }
-        , m_length_in_code_units(string.size())
-    {
-        m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
-    }
-
    consteval Utf16View(StringView string)
        : m_string { .ascii = string.characters_without_null_termination() }
        , m_length_in_code_units(string.length())
@ -170,15 +153,6 @@ public:
        VERIFY(all_of(string, AK::is_ascii));
    }

-    Utf16View(Utf16ConversionResult&&) = delete;
-    explicit Utf16View(Utf16ConversionResult const& conversion_result)
-        : m_string { .utf16 = conversion_result.data.data() }
-        , m_length_in_code_units(conversion_result.data.size())
-        , m_length_in_code_points(conversion_result.code_point_count)
-    {
-        m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
-    }
-
    ErrorOr<String> to_utf8(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
    ErrorOr<ByteString> to_byte_string(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;

@ -314,18 +288,6 @@ public:
        return m_length_in_code_points;
    }

-    constexpr Optional<size_t> length_in_code_points_if_known() const
-    {
-        if (has_ascii_storage())
-            return m_length_in_code_units;
-
-        if (m_length_in_code_points == NumericLimits<size_t>::max())
-            return {};
-        return m_length_in_code_points;
-    }
-
-    constexpr void unsafe_set_code_point_length(size_t length) const { m_length_in_code_points = length; }
-
    [[nodiscard]] constexpr char16_t code_unit_at(size_t index) const
    {
        VERIFY(index < length_in_code_units());
@ -591,6 +553,5 @@ inline constexpr bool IsHashCompatible<Utf16String, Utf16View> = true;
 }

 #if USING_AK_GLOBALLY
-using AK::Utf16Data;
 using AK::Utf16View;
 #endif
--- a/Libraries/LibCore/ProcessWindows.cpp
+++ b/Libraries/LibCore/ProcessWindows.cpp
@ -111,7 +111,7 @@ ErrorOr<String> Process::get_name()
    if (!length)
        return Error::from_windows_error();

-    return String::from_utf16(Utf16View { reinterpret_cast<char16_t const*>(path), length });
+    return MUST(Utf16View { reinterpret_cast<char16_t const*>(path), length }.to_utf8());
 }

 ErrorOr<void> Process::set_name(StringView, SetThreadName)
--- a/Libraries/LibJS/Runtime/GlobalObject.cpp
+++ b/Libraries/LibJS/Runtime/GlobalObject.cpp
@ -559,7 +559,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::encode_uri_component)
 JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
 {
    // 1. Set string to ? ToString(string).
-    auto string = TRY(vm.argument(0).to_byte_string(vm));
+    auto string = TRY(vm.argument(0).to_utf16_string(vm));

    // 3. Let R be the empty String.
    StringBuilder escaped;
@ -570,29 +570,29 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
    // 2. Let length be the length of string.
    // 5. Let k be 0.
    // 6. Repeat, while k < length,
-    auto utf16_conversion = TRY_OR_THROW_OOM(vm, utf8_to_utf16(string));
-    for (auto code_point : utf16_conversion.data) {
+    for (size_t k = 0; k < string.length_in_code_units(); ++k) {
        // a. Let char be the code unit at index k within string.
+        auto code_unit = string.code_unit_at(k);

        // b. If unescapedSet contains char, then
        // NOTE: We know unescapedSet is ASCII-only, so ensure we have an ASCII codepoint before casting to char.
-        if (is_ascii(code_point) && unescaped_set.contains(static_cast<char>(code_point))) {
+        if (is_ascii(code_unit) && unescaped_set.contains(static_cast<char>(code_unit))) {
            // i. Let S be the String value containing the single code unit char.
-            escaped.append(code_point);
+            escaped.append(static_cast<char>(code_unit));
        }
        // c. Else,
        // i. Let n be the numeric value of char.
        // ii. If n < 256, then
-        else if (code_point < 256) {
+        else if (code_unit < 256) {
            // 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
            // 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
-            escaped.appendff("%{:02X}", code_point);
+            escaped.appendff("%{:02X}", code_unit);
        }
        // iii. Else,
        else {
            // 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
            // 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
-            escaped.appendff("%u{:04X}", code_point);
+            escaped.appendff("%u{:04X}", code_unit);
        }

        // d. Set R to the string-concatenation of R and S.
--- a/Libraries/LibJS/Runtime/RegExpObject.cpp
+++ b/Libraries/LibJS/Runtime/RegExpObject.cpp
@ -93,26 +93,21 @@ ErrorOr<String, ParseRegexPatternError> parse_regex_pattern(StringView pattern,
    if (unicode && unicode_sets)
        return ParseRegexPatternError { MUST(String::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v')) };

-    auto utf16_pattern_result = AK::utf8_to_utf16(pattern);
-    if (utf16_pattern_result.is_error())
-        return ParseRegexPatternError { "Out of memory"_string };
-
-    auto utf16_result = utf16_pattern_result.release_value();
-    Utf16View utf16_pattern_view { utf16_result };
+    auto utf16_pattern = Utf16String::from_utf8(pattern);
    StringBuilder builder;

    // If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
    // code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
    auto previous_code_unit_was_backslash = false;
-    for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) {
+    for (size_t i = 0; i < utf16_pattern.length_in_code_units();) {
        if (unicode || unicode_sets) {
-            auto code_point = code_point_at(utf16_pattern_view, i);
+            auto code_point = code_point_at(utf16_pattern, i);
            builder.append_code_point(code_point.code_point);
            i += code_point.code_unit_count;
            continue;
        }

-        u16 code_unit = utf16_pattern_view.code_unit_at(i);
+        u16 code_unit = utf16_pattern.code_unit_at(i);
        ++i;

        if (code_unit > 0x7f) {
--- a/Libraries/LibRegex/RegexByteCode.cpp
+++ b/Libraries/LibRegex/RegexByteCode.cpp
@ -512,7 +512,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
                return ExecutionResult::Failed_ExecuteLowPrioForks;

            Optional<ByteString> str;
-            Utf16Data utf16;
+            Utf16String utf16;
            Vector<u32> data;
            data.ensure_capacity(length);
            for (size_t i = offset; i < offset + length; ++i)
--- a/Libraries/LibRegex/RegexMatch.h
+++ b/Libraries/LibRegex/RegexMatch.h
@ -8,14 +8,15 @@

 #include "Forward.h"
 #include "RegexOptions.h"
-#include <AK/Error.h>

 #include <AK/ByteString.h>
 #include <AK/COWVector.h>
+#include <AK/Error.h>
 #include <AK/FlyString.h>
 #include <AK/MemMem.h>
 #include <AK/StringBuilder.h>
 #include <AK/StringView.h>
+#include <AK/Utf16String.h>
 #include <AK/Utf16View.h>
 #include <AK/Utf32View.h>
 #include <AK/Utf8View.h>
@ -110,7 +111,7 @@ public:
        return view;
    }

-    RegexStringView construct_as_same(Span<u32> data, Optional<ByteString>& optional_string_storage, Utf16Data& optional_utf16_storage) const
+    RegexStringView construct_as_same(Span<u32> data, Optional<ByteString>& optional_string_storage, Utf16String& optional_utf16_storage) const
    {
        auto view = m_view.visit(
            [&optional_string_storage, data]<typename T>(T const&) {
@ -121,11 +122,8 @@ public:
                return RegexStringView { T { *optional_string_storage } };
            },
            [&optional_utf16_storage, data](Utf16View) {
-                auto conversion_result = utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors();
-                optional_utf16_storage = conversion_result.data;
-                auto view = Utf16View { optional_utf16_storage };
-                view.unsafe_set_code_point_length(conversion_result.code_point_count);
-                return RegexStringView { view };
+                optional_utf16_storage = Utf16String::from_utf32({ data.data(), data.size() });
+                return RegexStringView { optional_utf16_storage.utf16_view() };
            });

        view.set_unicode(unicode());
--- a/Libraries/LibWeb/DOM/CharacterData.cpp
+++ b/Libraries/LibWeb/DOM/CharacterData.cpp
@ -46,9 +46,8 @@ WebIDL::ExceptionOr<String> CharacterData::substring_data(size_t offset, size_t
 {
    // 1. Let length be node’s length.
    // FIXME: This is very inefficient!
-    auto utf16_result = MUST(AK::utf8_to_utf16(m_data));
-    Utf16View utf16_view { utf16_result };
-    auto length = utf16_view.length_in_code_units();
+    auto utf16_string = Utf16String::from_utf8(m_data);
+    auto length = utf16_string.length_in_code_units();

    // 2. If offset is greater than length, then throw an "IndexSizeError" DOMException.
    if (offset > length)
@ -57,10 +56,10 @@ WebIDL::ExceptionOr<String> CharacterData::substring_data(size_t offset, size_t
    // 3. If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit
    //    to the end of node’s data, and then return.
    if (offset + count > length)
-        return MUST(utf16_view.substring_view(offset).to_utf8());
+        return MUST(utf16_string.substring_view(offset).to_utf8());

    // 4. Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data.
-    return MUST(utf16_view.substring_view(offset, count).to_utf8());
+    return MUST(utf16_string.substring_view(offset, count).to_utf8());
 }

 // https://dom.spec.whatwg.org/#concept-cd-replace
@ -68,9 +67,8 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
 {
    // 1. Let length be node’s length.
    // FIXME: This is very inefficient!
-    auto utf16_data = MUST(AK::utf8_to_utf16(m_data));
-    Utf16View utf16_view { utf16_data };
-    auto length = utf16_view.length_in_code_units();
+    auto utf16_string = Utf16String::from_utf8(m_data);
+    auto length = utf16_string.length_in_code_units();

    // 2. If offset is greater than length, then throw an "IndexSizeError" DOMException.
    if (offset > length)
@ -83,17 +81,17 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
    // 5. Insert data into node’s data after offset code units.
    // 6. Let delete offset be offset + data’s length.
    // 7. Starting from delete offset code units, remove count code units from node’s data.
-    auto before_data = utf16_view.substring_view(0, offset);
-    auto inserted_data_result = MUST(AK::utf8_to_utf16(data));
-    auto after_data = utf16_view.substring_view(offset + count);
+    auto before_data = utf16_string.substring_view(0, offset);
+    auto inserted_data = Utf16String::from_utf8(data);
+    auto after_data = utf16_string.substring_view(offset + count);

-    StringBuilder full_data(StringBuilder::Mode::UTF16, before_data.length_in_code_units() + inserted_data_result.data.size() + after_data.length_in_code_units());
+    StringBuilder full_data(StringBuilder::Mode::UTF16, before_data.length_in_code_units() + inserted_data.length_in_code_units() + after_data.length_in_code_units());
    full_data.append(before_data);
-    full_data.append(inserted_data_result.data);
+    full_data.append(inserted_data);
    full_data.append(after_data);
-    auto full_view = full_data.utf16_string_view();

-    bool characters_are_the_same = utf16_view == full_view;
+    auto full_view = full_data.utf16_string_view();
+    bool characters_are_the_same = utf16_string == full_view;
    auto old_data = m_data;

    // OPTIMIZATION: Skip UTF-8 encoding if the characters are the same.
@ -123,14 +121,14 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
    //     start offset by data’s length and decrease it by count.
    for (auto* range : Range::live_ranges()) {
        if (range->start_container() == this && range->start_offset() > (offset + count))
-            range->set_start_offset(range->start_offset() + inserted_data_result.data.size() - count);
+            range->set_start_offset(range->start_offset() + inserted_data.length_in_code_units() - count);
    }

    // 11. For each live range whose end node is node and end offset is greater than offset plus count, increase its end
    //     offset by data’s length and decrease it by count.
    for (auto* range : Range::live_ranges()) {
        if (range->end_container() == this && range->end_offset() > (offset + count))
-            range->set_end_offset(range->end_offset() + inserted_data_result.data.size() - count);
+            range->set_end_offset(range->end_offset() + inserted_data.length_in_code_units() - count);
    }

    // 12. If node’s parent is non-null, then run the children changed steps for node’s parent.
--- a/Libraries/LibWeb/DOM/Document.cpp
+++ b/Libraries/LibWeb/DOM/Document.cpp
@ -6158,8 +6158,7 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
    if (text_blocks.is_empty())
        return {};

-    auto utf16_query = MUST(AK::utf8_to_utf16(query));
-    Utf16View query_view { utf16_query };
+    auto utf16_query = Utf16String::from_utf8(query);

    Vector<GC::Root<Range>> matches;
    for (auto const& text_block : text_blocks) {
@ -6169,8 +6168,8 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
        auto* match_start_position = text_block.positions.data();
        while (true) {
            auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
-                ? text_view.find_code_unit_offset_ignoring_case(query_view, offset)
-                : text_view.find_code_unit_offset(query_view, offset);
+                ? text_view.find_code_unit_offset_ignoring_case(utf16_query, offset)
+                : text_view.find_code_unit_offset(utf16_query, offset);
            if (!match_index.has_value())
                break;

@ -6181,15 +6180,15 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
            auto& start_dom_node = match_start_position->dom_node;

            auto* match_end_position = match_start_position;
-            for (; i < text_block.positions.size() - 1 && (match_index.value() + query_view.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
+            for (; i < text_block.positions.size() - 1 && (match_index.value() + utf16_query.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
                match_end_position = &text_block.positions[i + 1];

            auto& end_dom_node = match_end_position->dom_node;
-            auto end_position = match_index.value() + query_view.length_in_code_units() - match_end_position->start_offset;
+            auto end_position = match_index.value() + utf16_query.length_in_code_units() - match_end_position->start_offset;

            matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position));
            match_start_position = match_end_position;
-            offset = match_index.value() + query_view.length_in_code_units() + 1;
+            offset = match_index.value() + utf16_query.length_in_code_units() + 1;
            if (offset >= text_view.length_in_code_units())
                break;
        }
--- a/Libraries/LibWeb/Editing/Internal/Algorithms.cpp
+++ b/Libraries/LibWeb/Editing/Internal/Algorithms.cpp
@ -384,9 +384,9 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
            auto parent_white_space_collapse = resolved_keyword(*start_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);

            // FIXME: Find a way to get code points directly from the UTF-8 string
-            auto start_node_data = *start_node->text_content();
-            auto utf16_code_units = MUST(AK::utf8_to_utf16(start_node_data));
-            auto offset_minus_one_code_point = Utf16View { utf16_code_units }.code_point_at(start_offset - 1);
+            auto start_node_data = Utf16String::from_utf8(*start_node->text_content());
+            auto offset_minus_one_code_point = start_node_data.code_point_at(start_offset - 1);
+
            if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_minus_one_code_point == 0x20 || offset_minus_one_code_point == 0xA0)) {
                --start_offset;
                continue;
@ -437,9 +437,9 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
            auto parent_white_space_collapse = resolved_keyword(*end_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);

            // FIXME: Find a way to get code points directly from the UTF-8 string
-            auto end_node_data = *end_node->text_content();
-            auto utf16_code_units = MUST(AK::utf8_to_utf16(end_node_data));
-            auto offset_code_point = Utf16View { utf16_code_units }.code_point_at(end_offset);
+            auto end_node_data = Utf16String::from_utf8(*end_node->text_content());
+            auto offset_code_point = end_node_data.code_point_at(end_offset);
+
            if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_code_point == 0x20 || offset_code_point == 0xA0)) {
                // 1. If fix collapsed space is true, and collapse spaces is true, and the end offsetth
                //    code unit of end node's data is a space (0x0020): call deleteData(end offset, 1)
@ -556,16 +556,14 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
            // 1. Remove the first code unit from replacement whitespace, and let element be that
            //    code unit.
            // FIXME: Find a way to get code points directly from the UTF-8 string
-            auto replacement_whitespace_utf16 = MUST(AK::utf8_to_utf16(replacement_whitespace));
-            auto replacement_whitespace_utf16_view = Utf16View { replacement_whitespace_utf16 };
-            replacement_whitespace = MUST(String::from_utf16({ replacement_whitespace_utf16_view.substring_view(1) }));
-            auto element = replacement_whitespace_utf16_view.code_point_at(0);
+            auto replacement_whitespace_utf16 = Utf16String::from_utf8(replacement_whitespace);
+            replacement_whitespace = MUST(replacement_whitespace_utf16.substring_view(1).to_utf8());
+            auto element = replacement_whitespace_utf16.code_point_at(0);

            // 2. If element is not the same as the start offsetth code unit of start node's data:
-            auto start_node_data = *start_node->text_content();
-            auto start_node_utf16 = MUST(AK::utf8_to_utf16(start_node_data));
-            auto start_node_utf16_view = Utf16View { start_node_utf16 };
-            auto start_node_code_point = start_node_utf16_view.code_point_at(start_offset);
+            auto start_node_data = Utf16String::from_utf8(*start_node->text_content());
+            auto start_node_code_point = start_node_data.code_point_at(start_offset);
+
            if (element != start_node_code_point) {
                // 1. Call insertData(start offset, element) on start node.
                auto& start_node_character_data = static_cast<DOM::CharacterData&>(*start_node);
--- a/Libraries/LibWeb/FileAPI/FileReader.cpp
+++ b/Libraries/LibWeb/FileAPI/FileReader.cpp
@ -106,11 +106,10 @@ WebIDL::ExceptionOr<FileReader::Result> FileReader::blob_package_data(JS::Realm&
        return JS::ArrayBuffer::create(realm, move(bytes));
    case Type::BinaryString:
        // Return bytes as a binary string, in which every byte is represented by a code unit of equal value [0..255].
-        Utf16Data builder;
-        builder.ensure_capacity(bytes.size());
+        StringBuilder builder(StringBuilder::Mode::UTF16, bytes.size());
        for (auto byte : bytes.bytes())
-            builder.unchecked_append(byte);
-        return MUST(Utf16View { builder }.to_utf8());
+            builder.append_code_unit(byte);
+        return MUST(builder.utf16_string_view().to_utf8());
    }
    VERIFY_NOT_REACHED();
 }
--- a/Libraries/LibWeb/Infra/Strings.cpp
+++ b/Libraries/LibWeb/Infra/Strings.cpp
@ -12,6 +12,7 @@
 #include <AK/FlyString.h>
 #include <AK/GenericLexer.h>
 #include <AK/String.h>
+#include <AK/Utf16String.h>
 #include <AK/Utf16View.h>
 #include <AK/Utf8View.h>
 #include <LibWeb/Infra/CharacterTypes.h>
@ -63,10 +64,8 @@ ErrorOr<String> strip_and_collapse_whitespace(StringView string)
 // https://infra.spec.whatwg.org/#code-unit-prefix
 bool is_code_unit_prefix(StringView potential_prefix_utf8, StringView input_utf8)
 {
-    auto potential_prefix_utf16_bytes = MUST(utf8_to_utf16(potential_prefix_utf8));
-    auto input_utf16_bytes = MUST(utf8_to_utf16(input_utf8));
-    Utf16View potential_prefix { potential_prefix_utf16_bytes };
-    Utf16View input { input_utf16_bytes };
+    auto potential_prefix = Utf16String::from_utf8(potential_prefix_utf8);
+    auto input = Utf16String::from_utf8(input_utf8);

    // 1. Let i be 0.
    size_t i = 0;
@ -148,9 +147,10 @@ bool code_unit_less_than(StringView a, StringView b)
    if (a.is_ascii() && b.is_ascii())
        return a < b;

-    auto a_utf16 = MUST(utf8_to_utf16(a));
-    auto b_utf16 = MUST(utf8_to_utf16(b));
-    return Utf16View { a_utf16 }.is_code_unit_less_than(Utf16View { b_utf16 });
+    auto a_utf16 = Utf16String::from_utf8(a);
+    auto b_utf16 = Utf16String::from_utf8(b);
+
+    return a_utf16.utf16_view().is_code_unit_less_than(b_utf16);
 }

 }
--- a/Libraries/LibWeb/Layout/Viewport.cpp
+++ b/Libraries/LibWeb/Layout/Viewport.cpp
@ -50,17 +50,18 @@ Vector<Viewport::TextBlock> const& Viewport::text_blocks()

 void Viewport::update_text_blocks()
 {
-    StringBuilder builder;
+    StringBuilder builder(StringBuilder::Mode::UTF16);
    size_t current_start_position = 0;
    Vector<TextPosition> text_positions;
    Vector<TextBlock> text_blocks;
+
    for_each_in_inclusive_subtree([&](auto const& layout_node) {
        if (layout_node.display().is_none() || !layout_node.first_paintable() || !layout_node.first_paintable()->is_visible())
            return TraversalDecision::Continue;

        if (layout_node.is_box() || layout_node.is_generated()) {
            if (!builder.is_empty()) {
-                text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
+                text_blocks.append({ builder.to_utf16_string(), text_positions });
                current_start_position = 0;
                text_positions.clear_with_capacity();
                builder.clear();
@ -79,10 +80,9 @@ void Viewport::update_text_blocks()
                    text_positions.empend(dom_node, current_start_position);
                }

-                auto const& current_node_text = text_node->text_for_rendering();
-                auto const current_node_text_utf16 = MUST(AK::utf8_to_utf16(current_node_text));
-                current_start_position += current_node_text_utf16.data.size();
-                builder.append(move(current_node_text));
+                auto const& current_node_text = Utf16String::from_utf8(text_node->text_for_rendering());
+                current_start_position += current_node_text.length_in_code_units();
+                builder.append(current_node_text);
            }
        }

@ -90,7 +90,7 @@ void Viewport::update_text_blocks()
    });

    if (!builder.is_empty())
-        text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
+        text_blocks.append({ builder.to_utf16_string(), text_positions });

    m_text_blocks = move(text_blocks);
 }
--- a/Libraries/LibWeb/Layout/Viewport.h
+++ b/Libraries/LibWeb/Layout/Viewport.h
@ -6,6 +6,7 @@

 #pragma once

+#include <AK/Utf16String.h>
 #include <LibWeb/DOM/Document.h>
 #include <LibWeb/Layout/BlockContainer.h>

@ -24,7 +25,7 @@ public:
        size_t start_offset { 0 };
    };
    struct TextBlock {
-        AK::Utf16ConversionResult text;
+        Utf16String text;
        Vector<TextPosition> positions;
    };
    Vector<TextBlock> const& text_blocks();
--- a/Libraries/LibWeb/Painting/PaintableFragment.cpp
+++ b/Libraries/LibWeb/Painting/PaintableFragment.cpp
@ -252,9 +252,8 @@ Utf16View PaintableFragment::utf16_view() const
        return {};

    if (!m_text_in_utf16.has_value())
-        m_text_in_utf16 = MUST(AK::utf8_to_utf16(utf8_view()));
-
-    return Utf16View { m_text_in_utf16.value() };
+        m_text_in_utf16 = Utf16String::from_utf8(utf8_view().as_string());
+    return *m_text_in_utf16;
 }

 }
--- a/Libraries/LibWeb/Painting/PaintableFragment.h
+++ b/Libraries/LibWeb/Painting/PaintableFragment.h
@ -6,6 +6,7 @@

 #pragma once

+#include <AK/Utf16String.h>
 #include <LibGfx/TextLayout.h>
 #include <LibWeb/Layout/Node.h>
 #include <LibWeb/Painting/ShadowData.h>
@ -64,7 +65,7 @@ private:
    CSS::WritingMode m_writing_mode;
    Vector<ShadowData> m_shadows;
    CSSPixels m_text_decoration_thickness { 0 };
-    mutable Optional<AK::Utf16ConversionResult> m_text_in_utf16;
+    mutable Optional<Utf16String> m_text_in_utf16;
 };

 }
--- a/Libraries/LibWeb/SVG/SVGTextContentElement.cpp
+++ b/Libraries/LibWeb/SVG/SVGTextContentElement.cpp
@ -48,8 +48,8 @@ ByteString SVGTextContentElement::text_contents() const
 // https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
 WebIDL::ExceptionOr<WebIDL::Long> SVGTextContentElement::get_number_of_chars() const
 {
-    auto chars = TRY_OR_THROW_OOM(vm(), utf8_to_utf16(text_contents())).data;
-    return static_cast<WebIDL::Long>(chars.size());
+    auto length_in_code_units = AK::utf16_code_unit_length_from_utf8(text_contents());
+    return static_cast<WebIDL::Long>(length_in_code_units);
 }

 GC::Ref<Geometry::DOMPoint> SVGTextContentElement::get_start_position_of_char(WebIDL::UnsignedLong charnum)
--- a/Tests/AK/TestUtf16View.cpp
+++ b/Tests/AK/TestUtf16View.cpp
@ -15,7 +15,7 @@

 TEST_CASE(decode_ascii)
 {
-    auto string = MUST(AK::utf8_to_utf16("Hello World!11"sv));
+    auto string = Utf16String::from_utf8("Hello World!11"sv);
    Utf16View view { string };

    size_t valid_code_units = 0;
@ -34,7 +34,7 @@ TEST_CASE(decode_ascii)

 TEST_CASE(decode_utf8)
 {
-    auto string = MUST(AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv));
+    auto string = Utf16String::from_utf8("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv);
    Utf16View view { string };

    size_t valid_code_units = 0;
@ -55,7 +55,7 @@ TEST_CASE(encode_utf8)
 {
    {
        auto utf8_string = "Привет, мир! 😀 γειά σου κόσμος こんにちは世界"_string;
-        auto string = MUST(AK::utf8_to_utf16(utf8_string));
+        auto string = Utf16String::from_utf8(utf8_string);
        Utf16View view { string };
        EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::Yes)), utf8_string);
        EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::No)), utf8_string);
@ -139,7 +139,7 @@ TEST_CASE(utf16_literal)

 TEST_CASE(iterate_utf16)
 {
-    auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
+    auto string = Utf16String::from_utf8("Привет 😀"sv);
    Utf16View view { string };
    auto iterator = view.begin();

@ -371,16 +371,16 @@ TEST_CASE(to_ascii_titlecase)

 TEST_CASE(equals_ignoring_case)
 {
-    auto string1 = MUST(AK::utf8_to_utf16("foobar"sv));
-    auto string2 = MUST(AK::utf8_to_utf16("FooBar"sv));
+    auto string1 = Utf16String::from_utf8("foobar"sv);
+    auto string2 = Utf16String::from_utf8("FooBar"sv);
    EXPECT(Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));

-    string1 = MUST(AK::utf8_to_utf16(""sv));
-    string2 = MUST(AK::utf8_to_utf16(""sv));
+    string1 = Utf16String::from_utf8(""sv);
+    string2 = Utf16String::from_utf8(""sv);
    EXPECT(Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));

-    string1 = MUST(AK::utf8_to_utf16(""sv));
-    string2 = MUST(AK::utf8_to_utf16("FooBar"sv));
+    string1 = Utf16String::from_utf8(""sv);
+    string2 = Utf16String::from_utf8("FooBar"sv);
    EXPECT(!Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
 }

@ -425,7 +425,7 @@ TEST_CASE(replace)

 TEST_CASE(substring_view)
 {
-    auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
+    auto string = Utf16String::from_utf8("Привет 😀"sv);
    {
        Utf16View view { string };
        view = view.substring_view(7, 2);
@ -532,7 +532,7 @@ TEST_CASE(starts_with)

 TEST_CASE(find_code_unit_offset)
 {
-    auto conversion_result = MUST(AK::utf8_to_utf16("😀foo😀bar"sv));
+    auto conversion_result = Utf16String::from_utf8("😀foo😀bar"sv);
    Utf16View const view { conversion_result };

    EXPECT_EQ(0u, view.find_code_unit_offset(u""sv).value());
@ -549,7 +549,7 @@ TEST_CASE(find_code_unit_offset)

 TEST_CASE(find_code_unit_offset_ignoring_case)
 {
-    auto conversion_result = MUST(AK::utf8_to_utf16("😀Foo😀Bar"sv));
+    auto conversion_result = Utf16String::from_utf8("😀Foo😀Bar"sv);
    Utf16View const view { conversion_result };

    EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u""sv).value());
--- a/Tests/LibRegex/TestRegex.cpp
+++ b/Tests/LibRegex/TestRegex.cpp
@ -823,7 +823,7 @@ TEST_CASE(ECMA262_unicode_match)
    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);

-        auto subject = MUST(AK::utf8_to_utf16(test.subject));
+        auto subject = Utf16String::from_utf8(test.subject);
        Utf16View view { subject };

        if constexpr (REGEX_DEBUG) {
@ -956,7 +956,7 @@ TEST_CASE(ECMA262_property_match)
    for (auto& test : tests) {
        Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);

-        auto subject = MUST(AK::utf8_to_utf16(test.subject));
+        auto subject = Utf16String::from_utf8(test.subject);
        Utf16View view { subject };

        if constexpr (REGEX_DEBUG) {
--- a/Tests/LibUnicode/TestSegmenter.cpp
+++ b/Tests/LibUnicode/TestSegmenter.cpp
@ -9,6 +9,7 @@
 #include <AK/Array.h>
 #include <AK/String.h>
 #include <AK/StringView.h>
+#include <AK/Utf16String.h>
 #include <AK/Utf16View.h>
 #include <AK/Vector.h>
 #include <LibUnicode/Segmenter.h>
@ -155,21 +156,21 @@ TEST_CASE(out_of_bounds)
        EXPECT(!result.has_value());
    }
    {
-        auto text = MUST(AK::utf8_to_utf16("foo"sv));
+        auto text = u"foo"_utf16;

        auto segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
-        segmenter->set_segmented_text(Utf16View { text });
+        segmenter->set_segmented_text(text);

-        auto result = segmenter->previous_boundary(text.data.size() + 1);
+        auto result = segmenter->previous_boundary(text.length_in_code_units() + 1);
        EXPECT(result.has_value());

-        result = segmenter->next_boundary(text.data.size() + 1);
+        result = segmenter->next_boundary(text.length_in_code_units() + 1);
        EXPECT(!result.has_value());

-        result = segmenter->previous_boundary(text.data.size());
+        result = segmenter->previous_boundary(text.length_in_code_units());
        EXPECT(result.has_value());

-        result = segmenter->next_boundary(text.data.size());
+        result = segmenter->next_boundary(text.length_in_code_units());
        EXPECT(!result.has_value());

        result = segmenter->next_boundary(0);