AK: Do not fall back to simdutf for UTF-16 ASCII validation

This was a mistake. Consider U+201C (LEFT DOUBLE QUOTATION MARK). This
code point is encoded as the bytes 0x1c 0x20 in UTF-16LE. Both of these
bytes are ASCII if interpreted as UTF-8. But the string itself is most
certainly not ASCII.
This commit is contained in:
Timothy Flynn 2025-07-09 13:54:56 -04:00 committed by Tim Flynn
commit 8fbb80fffc
Notes: github-actions[bot] 2025-07-18 16:47:36 +00:00
2 changed files with 4 additions and 1 deletions

View file

@ -127,7 +127,8 @@ ErrorOr<ByteString> Utf16View::to_byte_string(AllowLonelySurrogates allow_lonely
bool Utf16View::is_ascii() const
{
return simdutf::validate_ascii(reinterpret_cast<char const*>(m_string), length_in_code_units() * sizeof(char16_t));
// FIXME: Petition simdutf to implement an ASCII validator for UTF-16.
return all_of(span(), AK::is_ascii);
}
bool Utf16View::validate(size_t& valid_code_units, AllowLonelySurrogates allow_lonely_surrogates) const

View file

@ -332,10 +332,12 @@ TEST_CASE(is_ascii)
EXPECT(u"a"sv.is_ascii());
EXPECT(u"foo"sv.is_ascii());
EXPECT(u"foo\t\n\rbar\v\b123"sv.is_ascii());
EXPECT(u"The quick (\"brown\") fox can't jump 32.3 feet, right?"sv.is_ascii());
EXPECT(!u"😀"sv.is_ascii());
EXPECT(!u"foo 😀"sv.is_ascii());
EXPECT(!u"😀 foo"sv.is_ascii());
EXPECT(!u"The quick (“brown”) fox cant jump 32.3 feet, right?"sv.is_ascii());
}
TEST_CASE(equals_ignoring_case)