From 6c73dff120ad9fd7a37d81f5900cd74e84ebe0e9 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 24 Jul 2025 11:44:19 -0400 Subject: [PATCH] AK: Implement a UTF-16 method to check if a string is ASCII whitespace --- AK/Utf16StringBase.h | 1 + AK/Utf16View.h | 8 ++++++++ Tests/AK/TestUtf16View.cpp | 16 ++++++++++++++++ 3 files changed, 25 insertions(+) diff --git a/AK/Utf16StringBase.h b/AK/Utf16StringBase.h index f245c99ecf3..120e056e48e 100644 --- a/AK/Utf16StringBase.h +++ b/AK/Utf16StringBase.h @@ -156,6 +156,7 @@ public: [[nodiscard]] ALWAYS_INLINE bool is_empty() const { return length_in_code_units() == 0uz; } [[nodiscard]] ALWAYS_INLINE bool is_ascii() const { return utf16_view().is_ascii(); } + [[nodiscard]] ALWAYS_INLINE bool is_ascii_whitespace() const { return utf16_view().is_ascii_whitespace(); } [[nodiscard]] ALWAYS_INLINE size_t length_in_code_units() const { diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 22d9c0a0dea..566821b6128 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -268,8 +268,16 @@ public: } [[nodiscard]] constexpr bool is_empty() const { return length_in_code_units() == 0; } + [[nodiscard]] bool is_ascii() const; + [[nodiscard]] constexpr bool is_ascii_whitespace() const + { + if (has_ascii_storage()) + return all_of(ascii_span(), AK::is_ascii_space); + return all_of(utf16_span(), AK::is_ascii_space); + } + [[nodiscard]] ALWAYS_INLINE bool validate(AllowLonelySurrogates allow_lonely_surrogates = AllowLonelySurrogates::Yes) const { size_t valid_code_units = 0; diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp index 3a3bd759f9c..1313ad99f48 100644 --- a/Tests/AK/TestUtf16View.cpp +++ b/Tests/AK/TestUtf16View.cpp @@ -341,6 +341,22 @@ TEST_CASE(is_ascii) EXPECT(!u"The quick (“brown”) fox can’t jump 32.3 feet, right?"sv.is_ascii()); } +TEST_CASE(is_ascii_whitespace) +{ + EXPECT(Utf16View {}.is_ascii_whitespace()); + EXPECT(u" "sv.is_ascii_whitespace()); + EXPECT(u"\t"sv.is_ascii_whitespace()); + EXPECT(u"\r"sv.is_ascii_whitespace()); + EXPECT(u"\n"sv.is_ascii_whitespace()); + EXPECT(u" \t\r\n\v "sv.is_ascii_whitespace()); + + EXPECT(!u"a"sv.is_ascii_whitespace()); + EXPECT(!u"😀"sv.is_ascii_whitespace()); + EXPECT(!u"\u00a0"sv.is_ascii_whitespace()); + EXPECT(!u"\ufeff"sv.is_ascii_whitespace()); + EXPECT(!u" \t \u00a0 \ufeff "sv.is_ascii_whitespace()); +} + TEST_CASE(to_ascii_lowercase) { EXPECT_EQ(u""sv.to_ascii_lowercase(), u""sv);