AK: Do not replace lonely surragates with U+FFFD while iterating

Utf8View doesn't do this either. The wobbly format is expected by JS.
This commit is contained in:
Timothy Flynn 2025-08-02 19:08:20 -04:00 committed by Tim Flynn
commit 319e7aa03b
Notes: github-actions[bot] 2025-08-05 11:09:04 +00:00
2 changed files with 4 additions and 9 deletions

View file

@ -78,13 +78,8 @@ public:
if (UnicodeUtils::is_utf16_low_surrogate(next_code_unit))
return UnicodeUtils::decode_utf16_surrogate_pair(code_unit, next_code_unit);
}
return UnicodeUtils::REPLACEMENT_CODE_POINT;
}
if (UnicodeUtils::is_utf16_low_surrogate(code_unit))
return UnicodeUtils::REPLACEMENT_CODE_POINT;
return static_cast<u32>(code_unit);
}

View file

@ -331,7 +331,7 @@ TEST_CASE(decode_invalid_utf16)
Utf16View view { u"AB\xd800"sv };
EXPECT_EQ(view.length_in_code_units(), 3uz);
auto expected = Array { (u32)0x41, 0x42, 0xfffd };
auto expected = Array { (u32)0x41, 0x42, 0xd800 };
EXPECT_EQ(expected.size(), view.length_in_code_points());
size_t i = 0;
@ -345,7 +345,7 @@ TEST_CASE(decode_invalid_utf16)
Utf16View view { u"AB\xdc00"sv };
EXPECT_EQ(view.length_in_code_units(), 3uz);
auto expected = Array { (u32)0x41, 0x42, 0xfffd };
auto expected = Array { (u32)0x41, 0x42, 0xdc00 };
EXPECT_EQ(expected.size(), view.length_in_code_points());
size_t i = 0;
@ -359,7 +359,7 @@ TEST_CASE(decode_invalid_utf16)
Utf16View view { u"AB\xd800\x0000"sv };
EXPECT_EQ(view.length_in_code_units(), 4uz);
auto expected = Array { (u32)0x41, 0x42, 0xfffd, 0 };
auto expected = Array { (u32)0x41, 0x42, 0xd800, 0 };
EXPECT_EQ(expected.size(), view.length_in_code_points());
size_t i = 0;
@ -373,7 +373,7 @@ TEST_CASE(decode_invalid_utf16)
Utf16View view { u"AB\xd800\xd800"sv };
EXPECT_EQ(view.length_in_code_units(), 4uz);
auto expected = Array { (u32)0x41, 0x42, 0xfffd, 0xfffd };
auto expected = Array { (u32)0x41, 0x42, 0xd800, 0xd800 };
EXPECT_EQ(expected.size(), view.length_in_code_points());
size_t i = 0;