mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-05 15:49:11 +00:00
AK: Allow treating UTF-16 views with lonely surrogates as valid
Much of the web requires us to allow lonely surrogates in UTF-16 data. The default behavior to disallow such code units has not been changed here - that will be changed in an upcoming commit.
This commit is contained in:
parent
d978a582a0
commit
2abc955ca9
Notes:
github-actions[bot]
2025-07-03 13:53:41 +00:00
Author: https://github.com/trflynn89
Commit: 2abc955ca9
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5228
Reviewed-by: https://github.com/ADKaster ✅
Reviewed-by: https://github.com/shannonbooth
3 changed files with 84 additions and 38 deletions
|
@ -187,55 +187,86 @@ TEST_CASE(iterate_utf16)
|
|||
TEST_CASE(validate_invalid_utf16)
|
||||
{
|
||||
size_t valid_code_units = 0;
|
||||
Utf16View invalid;
|
||||
{
|
||||
// Lonely high surrogate.
|
||||
auto invalid = Array { (u16)0xd800 };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 0);
|
||||
invalid = u"\xd800";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
invalid = Array { (u16)0xdbff };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 0);
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
|
||||
invalid = u"\xdbff";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
}
|
||||
{
|
||||
// Lonely low surrogate.
|
||||
auto invalid = Array { (u16)0xdc00 };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 0);
|
||||
invalid = u"\xdc00";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
invalid = Array { (u16)0xdfff };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 0);
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
|
||||
invalid = u"\xdfff";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 1uz);
|
||||
}
|
||||
{
|
||||
// High surrogate followed by non-surrogate.
|
||||
auto invalid = Array { (u16)0xd800, 0 };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 0);
|
||||
invalid = u"\xd800\x0000";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
invalid = Array { (u16)0xd800, 0xe000 };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 0);
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
invalid = u"\xd800\xe000";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
}
|
||||
{
|
||||
// High surrogate followed by high surrogate.
|
||||
auto invalid = Array { (u16)0xd800, 0xd800 };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 0);
|
||||
invalid = u"\xd800\xd800";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
invalid = Array { (u16)0xd800, 0xdbff };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 0);
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
invalid = u"\xd800\xdbff";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 0uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
}
|
||||
{
|
||||
// Valid UTF-16 followed by invalid code units.
|
||||
auto invalid = Array { (u16)0x41, 0x41, 0xd800 };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 2);
|
||||
invalid = u"\x0041\x0041\xd800";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
invalid = Array { (u16)0x41, 0x41, 0xd800 };
|
||||
EXPECT(!Utf16View(invalid).validate(valid_code_units));
|
||||
EXPECT(valid_code_units == 2);
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 3uz);
|
||||
|
||||
invalid = u"\x0041\x0041\xd800";
|
||||
EXPECT(!invalid.validate(valid_code_units));
|
||||
EXPECT_EQ(valid_code_units, 2uz);
|
||||
|
||||
EXPECT(invalid.validate(valid_code_units, Utf16View::AllowInvalidCodeUnits::Yes));
|
||||
EXPECT_EQ(valid_code_units, 3uz);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue