mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-06 09:36:08 +00:00
AK: Do not fall back to simdutf for UTF-16 ASCII validation
This was a mistake. Consider U+201C (LEFT DOUBLE QUOTATION MARK). This code point is encoded as the bytes 0x1c 0x20 in UTF-16LE. Both of these bytes are ASCII if interpreted as UTF-8. But the string itself is most certainly not ASCII.
This commit is contained in:
parent
4ee8110449
commit
8fbb80fffc
Notes:
github-actions[bot]
2025-07-18 16:47:36 +00:00
Author: https://github.com/trflynn89
Commit: 8fbb80fffc
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5388
Reviewed-by: https://github.com/shannonbooth ✅
2 changed files with 4 additions and 1 deletions
|
@ -127,7 +127,8 @@ ErrorOr<ByteString> Utf16View::to_byte_string(AllowLonelySurrogates allow_lonely
|
|||
|
||||
bool Utf16View::is_ascii() const
|
||||
{
|
||||
return simdutf::validate_ascii(reinterpret_cast<char const*>(m_string), length_in_code_units() * sizeof(char16_t));
|
||||
// FIXME: Petition simdutf to implement an ASCII validator for UTF-16.
|
||||
return all_of(span(), AK::is_ascii);
|
||||
}
|
||||
|
||||
bool Utf16View::validate(size_t& valid_code_units, AllowLonelySurrogates allow_lonely_surrogates) const
|
||||
|
|
|
@ -332,10 +332,12 @@ TEST_CASE(is_ascii)
|
|||
EXPECT(u"a"sv.is_ascii());
|
||||
EXPECT(u"foo"sv.is_ascii());
|
||||
EXPECT(u"foo\t\n\rbar\v\b123"sv.is_ascii());
|
||||
EXPECT(u"The quick (\"brown\") fox can't jump 32.3 feet, right?"sv.is_ascii());
|
||||
|
||||
EXPECT(!u"😀"sv.is_ascii());
|
||||
EXPECT(!u"foo 😀"sv.is_ascii());
|
||||
EXPECT(!u"😀 foo"sv.is_ascii());
|
||||
EXPECT(!u"The quick (“brown”) fox can’t jump 32.3 feet, right?"sv.is_ascii());
|
||||
}
|
||||
|
||||
TEST_CASE(equals_ignoring_case)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue