From 8fbb80fffc0aacd7918dbf9b1d3c1f3ec4f1ec22 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 9 Jul 2025 13:54:56 -0400 Subject: [PATCH] AK: Do not fall back to simdutf for UTF-16 ASCII validation This was a mistake. Consider U+201C (LEFT DOUBLE QUOTATION MARK). This code point is encoded as the bytes 0x1c 0x20 in UTF-16LE. Both of these bytes are ASCII if interpreted as UTF-8. But the string itself is most certainly not ASCII. --- AK/Utf16View.cpp | 3 ++- Tests/AK/TestUtf16View.cpp | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp index d9fd20c50ef..1ab94e296be 100644 --- a/AK/Utf16View.cpp +++ b/AK/Utf16View.cpp @@ -127,7 +127,8 @@ ErrorOr Utf16View::to_byte_string(AllowLonelySurrogates allow_lonely bool Utf16View::is_ascii() const { - return simdutf::validate_ascii(reinterpret_cast(m_string), length_in_code_units() * sizeof(char16_t)); + // FIXME: Petition simdutf to implement an ASCII validator for UTF-16. + return all_of(span(), AK::is_ascii); } bool Utf16View::validate(size_t& valid_code_units, AllowLonelySurrogates allow_lonely_surrogates) const diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp index 95df2be7a9f..818ef7b197e 100644 --- a/Tests/AK/TestUtf16View.cpp +++ b/Tests/AK/TestUtf16View.cpp @@ -332,10 +332,12 @@ TEST_CASE(is_ascii) EXPECT(u"a"sv.is_ascii()); EXPECT(u"foo"sv.is_ascii()); EXPECT(u"foo\t\n\rbar\v\b123"sv.is_ascii()); + EXPECT(u"The quick (\"brown\") fox can't jump 32.3 feet, right?"sv.is_ascii()); EXPECT(!u"๐Ÿ˜€"sv.is_ascii()); EXPECT(!u"foo ๐Ÿ˜€"sv.is_ascii()); EXPECT(!u"๐Ÿ˜€ foo"sv.is_ascii()); + EXPECT(!u"The quick (โ€œbrownโ€) fox canโ€™t jump 32.3 feet, right?"sv.is_ascii()); } TEST_CASE(equals_ignoring_case)