From 9fc3e72db23cd7960c5fff76c5edbc94eb14b107 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 26 Jun 2025 19:52:09 -0400 Subject: [PATCH] AK+Everywhere: Allow lonely UTF-16 surrogates by default By definition, the web allows lonely surrogates by default. Let's have our string APIs reflect this, so we don't have to pass an allow option all over the place. --- AK/UnicodeUtils.h | 9 +++ AK/Utf16View.cpp | 22 ++++---- AK/Utf16View.h | 15 ++--- AK/Utf8View.cpp | 6 +- AK/Utf8View.h | 13 ++--- .../LibJS/Runtime/AbstractOperations.cpp | 4 +- Libraries/LibJS/Runtime/StringPrototype.cpp | 2 +- Libraries/LibJS/Runtime/Utf16String.cpp | 4 +- Libraries/LibRegex/RegexMatch.h | 4 +- Libraries/LibWasm/Parser/Parser.cpp | 2 +- Libraries/LibWeb/DOM/CharacterData.cpp | 6 +- Libraries/LibWeb/FileAPI/FileReader.cpp | 2 +- Tests/AK/TestUtf16View.cpp | 56 +++++++++---------- Tests/AK/TestUtf8View.cpp | 6 +- 14 files changed, 74 insertions(+), 77 deletions(-) diff --git a/AK/UnicodeUtils.h b/AK/UnicodeUtils.h index 6c1cb6fc906..3d037c6275c 100644 --- a/AK/UnicodeUtils.h +++ b/AK/UnicodeUtils.h @@ -85,6 +85,11 @@ constexpr inline u16 LOW_SURROGATE_MAX = 0xdfff; constexpr inline u32 REPLACEMENT_CODE_POINT = 0xfffd; constexpr inline u32 FIRST_SUPPLEMENTARY_PLANE_CODE_POINT = 0x10000; +enum class AllowLonelySurrogates { + No, + Yes, +}; + [[nodiscard]] constexpr size_t code_unit_length_for_code_point(u32 code_point) { return code_point < FIRST_SUPPLEMENTARY_PLANE_CODE_POINT ? 1uz : 2uz; @@ -201,3 +206,7 @@ constexpr ErrorOr try_code_point_to_utf16(u32 code_point, Callback callb } } + +#if USING_AK_GLOBALLY +using AK::UnicodeUtils::AllowLonelySurrogates; +#endif diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp index 84a43c939c6..d9fd20c50ef 100644 --- a/AK/Utf16View.cpp +++ b/AK/Utf16View.cpp @@ -46,7 +46,7 @@ ErrorOr utf8_to_utf16(Utf8View const& utf8_view) return Utf16ConversionResult { Utf16Data {}, 0 }; // All callers want to allow lonely surrogates, which simdutf does not permit. - if (!utf8_view.validate(Utf8View::AllowSurrogates::No)) [[unlikely]] + if (!utf8_view.validate(AllowLonelySurrogates::No)) [[unlikely]] return to_utf16_slow(utf8_view); auto const* data = reinterpret_cast(utf8_view.bytes()); @@ -95,14 +95,14 @@ size_t utf16_code_unit_length_from_utf8(StringView string) return simdutf::utf16_length_from_utf8(string.characters_without_null_termination(), string.length()); } -ErrorOr Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_units) const +ErrorOr Utf16View::to_utf8(AllowLonelySurrogates allow_lonely_surrogates) const { if (is_empty()) return String {}; - if (!validate(allow_invalid_code_units)) + if (!validate(allow_lonely_surrogates)) return Error::from_string_literal("Input was not valid UTF-16"); - if (allow_invalid_code_units == AllowInvalidCodeUnits::No) { + if (allow_lonely_surrogates == AllowLonelySurrogates::No) { String result; auto utf8_length = simdutf::utf8_length_from_utf16(m_string, length_in_code_units()); @@ -120,9 +120,9 @@ ErrorOr Utf16View::to_utf8(AllowInvalidCodeUnits allow_invalid_code_unit return builder.to_string(); } -ErrorOr Utf16View::to_byte_string(AllowInvalidCodeUnits allow_invalid_code_units) const +ErrorOr Utf16View::to_byte_string(AllowLonelySurrogates allow_lonely_surrogates) const { - return TRY(to_utf8(allow_invalid_code_units)).to_byte_string(); + return TRY(to_utf8(allow_lonely_surrogates)).to_byte_string(); } bool Utf16View::is_ascii() const @@ -130,7 +130,7 @@ bool Utf16View::is_ascii() const return simdutf::validate_ascii(reinterpret_cast(m_string), length_in_code_units() * sizeof(char16_t)); } -bool Utf16View::validate(size_t& valid_code_units, AllowInvalidCodeUnits allow_invalid_code_units) const +bool Utf16View::validate(size_t& valid_code_units, AllowLonelySurrogates allow_lonely_surrogates) const { auto view = *this; valid_code_units = 0; @@ -141,7 +141,7 @@ bool Utf16View::validate(size_t& valid_code_units, AllowInvalidCodeUnits allow_i if (result.error == simdutf::SUCCESS) return true; - if (allow_invalid_code_units == AllowInvalidCodeUnits::No || result.error != simdutf::SURROGATE) + if (allow_lonely_surrogates == AllowLonelySurrogates::No || result.error != simdutf::SURROGATE) return false; view = view.substring_view(result.count + 1); @@ -219,10 +219,8 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod size_t Utf16View::calculate_length_in_code_points() const { - // FIXME: simdutf's code point length method assumes valid UTF-16, whereas Utf16View uses U+FFFD as a replacement - // for invalid code points. If we change Utf16View to only accept valid encodings as an invariant, we can - // remove this branch. - if (validate()) [[likely]] + // simdutf's code point length method assumes valid UTF-16, whereas we allow lonely surrogates. + if (validate(AllowLonelySurrogates::No)) [[likely]] return simdutf::count_utf16(m_string, length_in_code_units()); size_t code_points = 0; diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 2eb04632a1e..0c396983ac2 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -102,11 +102,6 @@ class Utf16View { public: using Iterator = Utf16CodePointIterator; - enum class AllowInvalidCodeUnits { - No, - Yes, - }; - Utf16View() = default; ~Utf16View() = default; @@ -130,8 +125,8 @@ public: { } - ErrorOr to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const; - ErrorOr to_byte_string(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const; + ErrorOr to_utf8(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const; + ErrorOr to_byte_string(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const; [[nodiscard]] constexpr ReadonlySpan span() const { @@ -187,13 +182,13 @@ public: [[nodiscard]] constexpr bool is_empty() const { return length_in_code_units() == 0; } [[nodiscard]] bool is_ascii() const; - [[nodiscard]] ALWAYS_INLINE bool validate(AllowInvalidCodeUnits allow_invalid_code_units = AllowInvalidCodeUnits::No) const + [[nodiscard]] ALWAYS_INLINE bool validate(AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const { size_t valid_code_units = 0; - return validate(valid_code_units, allow_invalid_code_units); + return validate(valid_code_units, allow_lonely_surrogates); } - [[nodiscard]] bool validate(size_t& valid_code_units, AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const; + [[nodiscard]] bool validate(size_t& valid_code_units, AllowLonelySurrogates = AllowLonelySurrogates::Yes) const; [[nodiscard]] constexpr size_t length_in_code_units() const { return m_length_in_code_units; } diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 648516c281f..18535cefd34 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -185,16 +185,16 @@ Utf8View Utf8View::trim(Utf8View const& characters, TrimMode mode) const return substring_view(substring_start, substring_length); } -bool Utf8View::validate(size_t& valid_bytes, AllowSurrogates allow_surrogates) const +bool Utf8View::validate(size_t& valid_bytes, AllowLonelySurrogates allow_lonely_surrogates) const { auto result = simdutf::validate_utf8_with_errors(m_string.characters_without_null_termination(), m_string.length()); valid_bytes = result.count; - if (result.error == simdutf::SURROGATE && allow_surrogates == AllowSurrogates::Yes) { + if (result.error == simdutf::SURROGATE && allow_lonely_surrogates == AllowLonelySurrogates::Yes) { valid_bytes += 3; // All surrogates have a UTF-8 byte length of 3. size_t substring_valid_bytes = 0; - auto is_valid = substring_view(valid_bytes).validate(substring_valid_bytes, allow_surrogates); + auto is_valid = substring_view(valid_bytes).validate(substring_valid_bytes, allow_lonely_surrogates); valid_bytes += substring_valid_bytes; return is_valid; diff --git a/AK/Utf8View.h b/AK/Utf8View.h index 81c4a7ce506..3b8d0cd63a9 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace AK { @@ -77,12 +78,6 @@ public: } explicit Utf8View(ByteString&&) = delete; - - enum class AllowSurrogates { - Yes, - No, - }; - ~Utf8View() = default; StringView as_string() const { return m_string; } @@ -135,13 +130,13 @@ public: return m_length; } - bool validate(AllowSurrogates allow_surrogates = AllowSurrogates::Yes) const + bool validate(AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const { size_t valid_bytes = 0; - return validate(valid_bytes, allow_surrogates); + return validate(valid_bytes, allow_lonely_surrogates); } - bool validate(size_t& valid_bytes, AllowSurrogates allow_surrogates = AllowSurrogates::Yes) const; + bool validate(size_t& valid_bytes, AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const; template auto for_each_split_view(Function splitter, SplitBehavior split_behavior, Callback callback) const diff --git a/Libraries/LibJS/Runtime/AbstractOperations.cpp b/Libraries/LibJS/Runtime/AbstractOperations.cpp index 80b90c65a94..0908bd6e42b 100644 --- a/Libraries/LibJS/Runtime/AbstractOperations.cpp +++ b/Libraries/LibJS/Runtime/AbstractOperations.cpp @@ -1393,7 +1393,7 @@ ThrowCompletionOr get_substitution(VM& vm, Utf16View const& matched, Utf // 2. Let groupName be the substring of templateRemainder from 2 to gtPos. auto group_name_view = template_remainder.substring_view(2, *greater_than_position - 2); - auto group_name = MUST(group_name_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + auto group_name = MUST(group_name_view.to_utf8()); // 3. Assert: namedCaptures is an Object. VERIFY(named_captures.is_object()); @@ -1435,7 +1435,7 @@ ThrowCompletionOr get_substitution(VM& vm, Utf16View const& matched, Utf } // 6. Return result. - return MUST(Utf16View { result }.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + return MUST(Utf16View { result }.to_utf8()); } void DisposeCapability::visit_edges(GC::Cell::Visitor& visitor) const diff --git a/Libraries/LibJS/Runtime/StringPrototype.cpp b/Libraries/LibJS/Runtime/StringPrototype.cpp index f027b26cdd8..6ec4f3c905a 100644 --- a/Libraries/LibJS/Runtime/StringPrototype.cpp +++ b/Libraries/LibJS/Runtime/StringPrototype.cpp @@ -99,7 +99,7 @@ Optional string_index_of(Utf16View const& string, Utf16View const& searc static bool is_string_well_formed_unicode(Utf16View string) { // OPTIMIZATION: simdutf can do this much faster. - return string.validate(); + return string.validate(AllowLonelySurrogates::No); } // 11.1.4 CodePointAt ( string, position ), https://tc39.es/ecma262/#sec-codepointat diff --git a/Libraries/LibJS/Runtime/Utf16String.cpp b/Libraries/LibJS/Runtime/Utf16String.cpp index 461590b53ea..9acb9937fa0 100644 --- a/Libraries/LibJS/Runtime/Utf16String.cpp +++ b/Libraries/LibJS/Runtime/Utf16String.cpp @@ -125,12 +125,12 @@ Utf16View Utf16String::substring_view(size_t code_unit_offset) const String Utf16String::to_utf8() const { - return MUST(view().to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + return MUST(view().to_utf8()); } ByteString Utf16String::to_byte_string() const { - return MUST(view().to_byte_string(Utf16View::AllowInvalidCodeUnits::Yes)); + return MUST(view().to_byte_string()); } u16 Utf16String::code_unit_at(size_t index) const diff --git a/Libraries/LibRegex/RegexMatch.h b/Libraries/LibRegex/RegexMatch.h index f1860ae13c3..b5ed2d5ca80 100644 --- a/Libraries/LibRegex/RegexMatch.h +++ b/Libraries/LibRegex/RegexMatch.h @@ -181,7 +181,7 @@ public: { return m_view.visit( [](StringView view) { return view.to_byte_string(); }, - [](Utf16View view) { return view.to_byte_string(Utf16View::AllowInvalidCodeUnits::Yes).release_value_but_fixme_should_propagate_errors(); }, + [](Utf16View view) { return view.to_byte_string().release_value_but_fixme_should_propagate_errors(); }, [](auto& view) { StringBuilder builder; for (auto it = view.begin(); it != view.end(); ++it) @@ -194,7 +194,7 @@ public: { return m_view.visit( [](StringView view) { return String::from_utf8(view); }, - [](Utf16View view) { return view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); }, + [](Utf16View view) { return view.to_utf8(); }, [](auto& view) -> ErrorOr { StringBuilder builder; for (auto it = view.begin(); it != view.end(); ++it) diff --git a/Libraries/LibWasm/Parser/Parser.cpp b/Libraries/LibWasm/Parser/Parser.cpp index e61784dce47..62c07d5b136 100644 --- a/Libraries/LibWasm/Parser/Parser.cpp +++ b/Libraries/LibWasm/Parser/Parser.cpp @@ -93,7 +93,7 @@ static ParseResult parse_name(Stream& stream) ScopeLogger logger; auto data = TRY(parse_vector(stream)); auto string = ByteString::copy(data); - if (!Utf8View(string).validate(Utf8View::AllowSurrogates::No)) + if (!Utf8View(string).validate(AllowLonelySurrogates::No)) return ParseError::InvalidUtf8; return string; } diff --git a/Libraries/LibWeb/DOM/CharacterData.cpp b/Libraries/LibWeb/DOM/CharacterData.cpp index 5125ab2b009..a2419de9a10 100644 --- a/Libraries/LibWeb/DOM/CharacterData.cpp +++ b/Libraries/LibWeb/DOM/CharacterData.cpp @@ -57,10 +57,10 @@ WebIDL::ExceptionOr CharacterData::substring_data(size_t offset, size_t // 3. If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit // to the end of node’s data, and then return. if (offset + count > length) - return MUST(utf16_view.substring_view(offset).to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + return MUST(utf16_view.substring_view(offset).to_utf8()); // 4. Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data. - return MUST(utf16_view.substring_view(offset, count).to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + return MUST(utf16_view.substring_view(offset, count).to_utf8()); } // https://dom.spec.whatwg.org/#concept-cd-replace @@ -99,7 +99,7 @@ WebIDL::ExceptionOr CharacterData::replace_data(size_t offset, size_t coun // OPTIMIZATION: Skip UTF-8 encoding if the characters are the same. if (!characters_are_the_same) { - m_data = MUST(full_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + m_data = MUST(full_view.to_utf8()); } // 4. Queue a mutation record of "characterData" for node with null, null, node’s data, « », « », null, and null. diff --git a/Libraries/LibWeb/FileAPI/FileReader.cpp b/Libraries/LibWeb/FileAPI/FileReader.cpp index 6d6ec1baac6..067519e2575 100644 --- a/Libraries/LibWeb/FileAPI/FileReader.cpp +++ b/Libraries/LibWeb/FileAPI/FileReader.cpp @@ -110,7 +110,7 @@ WebIDL::ExceptionOr FileReader::blob_package_data(JS::Realm& builder.ensure_capacity(bytes.size()); for (auto byte : bytes.bytes()) builder.unchecked_append(byte); - return MUST(Utf16View { builder }.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)); + return MUST(Utf16View { builder }.to_utf8()); } VERIFY_NOT_REACHED(); } diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp index ba9cfe63aae..95df2be7a9f 100644 --- a/Tests/AK/TestUtf16View.cpp +++ b/Tests/AK/TestUtf16View.cpp @@ -56,13 +56,13 @@ TEST_CASE(encode_utf8) auto utf8_string = "Привет, мир! 😀 γειά σου κόσμος こんにちは世界"_string; auto string = MUST(AK::utf8_to_utf16(utf8_string)); Utf16View view { string }; - EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), utf8_string); - EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), utf8_string); + EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::Yes)), utf8_string); + EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::No)), utf8_string); } { Utf16View view { u"\xd83d"sv }; - EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv); - EXPECT(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No).is_error()); + EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::Yes)), "\xed\xa0\xbd"sv); + EXPECT(view.to_utf8(AllowLonelySurrogates::No).is_error()); } } @@ -99,8 +99,8 @@ TEST_CASE(null_view) EXPECT(view.validate()); EXPECT_EQ(view.length_in_code_units(), 0zu); EXPECT_EQ(view.length_in_code_points(), 0zu); - EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No)), ""sv); - EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), ""sv); + EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::No)), ""sv); + EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::Yes)), ""sv); for ([[maybe_unused]] auto it : view) FAIL("Iterating a null UTF-16 string should not produce any values"); @@ -187,81 +187,81 @@ TEST_CASE(validate_invalid_utf16) { // Lonely high surrogate. invalid = u"\xd800"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 0uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 1uz); invalid = u"\xdbff"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 0uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 1uz); } { // Lonely low surrogate. invalid = u"\xdc00"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 0uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 1uz); invalid = u"\xdfff"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 0uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 1uz); } { // High surrogate followed by non-surrogate. invalid = u"\xd800\x0000"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 0uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 2uz); invalid = u"\xd800\xe000"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 0uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 2uz); } { // High surrogate followed by high surrogate. invalid = u"\xd800\xd800"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 0uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 2uz); invalid = u"\xd800\xdbff"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 0uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 2uz); } { // Valid UTF-16 followed by invalid code units. invalid = u"\x0041\x0041\xd800"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 2uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 3uz); invalid = u"\x0041\x0041\xd800"sv; - EXPECT(!invalid.validate(valid_code_units)); + EXPECT(!invalid.validate(valid_code_units, AllowLonelySurrogates::No)); EXPECT_EQ(valid_code_units, 2uz); - EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes)); + EXPECT(invalid.validate(valid_code_units, AllowLonelySurrogates::Yes)); EXPECT_EQ(valid_code_units, 3uz); } } @@ -368,8 +368,8 @@ TEST_CASE(substring_view) view = view.substring_view(7, 1); EXPECT(view.length_in_code_units() == 1); - EXPECT_EQ(MUST(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes)), "\xed\xa0\xbd"sv); - EXPECT(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No).is_error()); + EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::Yes)), "\xed\xa0\xbd"sv); + EXPECT(view.to_utf8(AllowLonelySurrogates::No).is_error()); } } diff --git a/Tests/AK/TestUtf8View.cpp b/Tests/AK/TestUtf8View.cpp index 3c71659c9ff..10775410611 100644 --- a/Tests/AK/TestUtf8View.cpp +++ b/Tests/AK/TestUtf8View.cpp @@ -51,8 +51,8 @@ TEST_CASE(decode_utf8) TEST_CASE(null_view) { Utf8View view; - EXPECT(view.validate(Utf8View::AllowSurrogates::No)); - EXPECT(view.validate(Utf8View::AllowSurrogates::Yes)); + EXPECT(view.validate(AllowLonelySurrogates::No)); + EXPECT(view.validate(AllowLonelySurrogates::Yes)); EXPECT_EQ(view.byte_length(), 0zu); EXPECT_EQ(view.length(), 0zu); @@ -95,7 +95,7 @@ TEST_CASE(validate_invalid_ut8) char invalid_utf8_7[] = { (char)0xed, (char)0xa0, (char)0x80 }; // U+d800 Utf8View utf8_7 { StringView { invalid_utf8_7, 3 } }; - EXPECT(!utf8_7.validate(valid_bytes, Utf8View::AllowSurrogates::No)); + EXPECT(!utf8_7.validate(valid_bytes, AllowLonelySurrogates::No)); EXPECT(valid_bytes == 0); }