From 319e7aa03bbeb03fbb3f3bb14cf0e24a7c178aba Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 2 Aug 2025 19:08:20 -0400 Subject: [PATCH] AK: Do not replace lonely surragates with U+FFFD while iterating Utf8View doesn't do this either. The wobbly format is expected by JS. --- AK/Utf16View.h | 5 ----- Tests/AK/TestUtf16View.cpp | 8 ++++---- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/AK/Utf16View.h b/AK/Utf16View.h index d752e602db3..715a66582e3 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -78,13 +78,8 @@ public: if (UnicodeUtils::is_utf16_low_surrogate(next_code_unit)) return UnicodeUtils::decode_utf16_surrogate_pair(code_unit, next_code_unit); } - - return UnicodeUtils::REPLACEMENT_CODE_POINT; } - if (UnicodeUtils::is_utf16_low_surrogate(code_unit)) - return UnicodeUtils::REPLACEMENT_CODE_POINT; - return static_cast(code_unit); } diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp index 7db9edad5bd..00137a8ea49 100644 --- a/Tests/AK/TestUtf16View.cpp +++ b/Tests/AK/TestUtf16View.cpp @@ -331,7 +331,7 @@ TEST_CASE(decode_invalid_utf16) Utf16View view { u"AB\xd800"sv }; EXPECT_EQ(view.length_in_code_units(), 3uz); - auto expected = Array { (u32)0x41, 0x42, 0xfffd }; + auto expected = Array { (u32)0x41, 0x42, 0xd800 }; EXPECT_EQ(expected.size(), view.length_in_code_points()); size_t i = 0; @@ -345,7 +345,7 @@ TEST_CASE(decode_invalid_utf16) Utf16View view { u"AB\xdc00"sv }; EXPECT_EQ(view.length_in_code_units(), 3uz); - auto expected = Array { (u32)0x41, 0x42, 0xfffd }; + auto expected = Array { (u32)0x41, 0x42, 0xdc00 }; EXPECT_EQ(expected.size(), view.length_in_code_points()); size_t i = 0; @@ -359,7 +359,7 @@ TEST_CASE(decode_invalid_utf16) Utf16View view { u"AB\xd800\x0000"sv }; EXPECT_EQ(view.length_in_code_units(), 4uz); - auto expected = Array { (u32)0x41, 0x42, 0xfffd, 0 }; + auto expected = Array { (u32)0x41, 0x42, 0xd800, 0 }; EXPECT_EQ(expected.size(), view.length_in_code_points()); size_t i = 0; @@ -373,7 +373,7 @@ TEST_CASE(decode_invalid_utf16) Utf16View view { u"AB\xd800\xd800"sv }; EXPECT_EQ(view.length_in_code_units(), 4uz); - auto expected = Array { (u32)0x41, 0x42, 0xfffd, 0xfffd }; + auto expected = Array { (u32)0x41, 0x42, 0xd800, 0xd800 }; EXPECT_EQ(expected.size(), view.length_in_code_points()); size_t i = 0;