diff --git a/AK/String.h b/AK/String.h index 9d5d3b5edf3..72224caff41 100644 --- a/AK/String.h +++ b/AK/String.h @@ -112,6 +112,7 @@ public: [[nodiscard]] String to_ascii_lowercase() const; [[nodiscard]] String to_ascii_uppercase() const; + [[nodiscard]] bool is_ascii() const { return bytes_as_string_view().is_ascii(); } // Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application. [[nodiscard]] bool equals_ignoring_case(String const&) const; diff --git a/AK/StringView.cpp b/AK/StringView.cpp index 9db439acdb6..92b1cd30655 100644 --- a/AK/StringView.cpp +++ b/AK/StringView.cpp @@ -15,6 +15,8 @@ #include #include +#include + namespace AK { StringView::StringView(String const& string) @@ -195,6 +197,13 @@ bool StringView::equals_ignoring_ascii_case(StringView other) const return StringUtils::equals_ignoring_ascii_case(*this, other); } +bool StringView::is_ascii() const +{ + if (is_empty()) + return true; + return simdutf::validate_ascii(characters_without_null_termination(), length()); +} + ByteString StringView::to_lowercase_string() const { return StringImpl::create_lowercased(characters_without_null_termination(), length()).release_nonnull(); diff --git a/AK/StringView.h b/AK/StringView.h index f58827fdd19..7338acdba65 100644 --- a/AK/StringView.h +++ b/AK/StringView.h @@ -100,6 +100,7 @@ public: [[nodiscard]] bool contains(u32) const; [[nodiscard]] bool contains(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const; [[nodiscard]] bool equals_ignoring_ascii_case(StringView) const; + [[nodiscard]] bool is_ascii() const; [[nodiscard]] StringView trim(StringView characters, TrimMode mode = TrimMode::Both) const { return StringUtils::trim(*this, characters, mode); } [[nodiscard]] StringView trim_whitespace(TrimMode mode = TrimMode::Both) const { return StringUtils::trim_whitespace(*this, mode); } diff --git a/Libraries/LibURL/Host.cpp b/Libraries/LibURL/Host.cpp index e86446e2f27..589004ac855 100644 --- a/Libraries/LibURL/Host.cpp +++ b/Libraries/LibURL/Host.cpp @@ -196,7 +196,7 @@ Optional Host::public_suffix() const auto public_suffix = get_public_suffix(host_string.bytes_as_string_view()).value_or("*"_string); // 4. Assert: publicSuffix is an ASCII string that does not end with ".". - VERIFY(all_of(public_suffix.code_points(), is_ascii)); + VERIFY(public_suffix.is_ascii()); VERIFY(!public_suffix.ends_with('.')); // 5. Return publicSuffix and trailingDot concatenated. @@ -223,7 +223,7 @@ Optional Host::registrable_domain() const auto registrable_domain = get_registrable_domain(host_string).value_or("*"_string); // 4. Assert: registrableDomain is an ASCII string that does not end with ".". - VERIFY(all_of(registrable_domain.code_points(), is_ascii)); + VERIFY(registrable_domain.is_ascii()); VERIFY(!registrable_domain.ends_with('.')); // 5. Return registrableDomain and trailingDot concatenated. diff --git a/Libraries/LibURL/Parser.cpp b/Libraries/LibURL/Parser.cpp index 13c14819765..c72249cad33 100644 --- a/Libraries/LibURL/Parser.cpp +++ b/Libraries/LibURL/Parser.cpp @@ -514,7 +514,7 @@ static ErrorOr domain_to_ascii(StringView domain, bool be_strict) // OPTIMIZATION: If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.) // does not produce any item that starts with an ASCII case-insensitive match for "xn--", this // step is equivalent to ASCII lowercasing domain. - if (!be_strict && all_of(domain, is_ascii)) { + if (!be_strict && domain.is_ascii()) { // 3. If result is the empty string, domain-to-ASCII validation error, return failure. if (domain.is_empty()) return Error::from_string_literal("Empty domain"); diff --git a/Libraries/LibURL/Pattern/String.cpp b/Libraries/LibURL/Pattern/String.cpp index f797c6afacf..d22c288ce59 100644 --- a/Libraries/LibURL/Pattern/String.cpp +++ b/Libraries/LibURL/Pattern/String.cpp @@ -14,7 +14,7 @@ namespace URL::Pattern { String escape_a_pattern_string(String const& input) { // 1. Assert: input is an ASCII string. - VERIFY(all_of(input.code_points(), is_ascii)); + VERIFY(input.is_ascii()); // 2. Let result be the empty string. StringBuilder result; @@ -51,7 +51,7 @@ String escape_a_pattern_string(String const& input) String escape_a_regexp_string(String const& input) { // 1. Assert: input is an ASCII string. - VERIFY(all_of(input.code_points(), is_ascii)); + VERIFY(input.is_ascii()); // 2. Let result be the empty string. StringBuilder builder; diff --git a/Libraries/LibWeb/ContentSecurityPolicy/Policy.cpp b/Libraries/LibWeb/ContentSecurityPolicy/Policy.cpp index 16a2e50c61b..b5b55b6c27a 100644 --- a/Libraries/LibWeb/ContentSecurityPolicy/Policy.cpp +++ b/Libraries/LibWeb/ContentSecurityPolicy/Policy.cpp @@ -47,7 +47,7 @@ GC::Ref Policy::parse_a_serialized_csp(JS::Realm& realm, Variant> parse_multipart_form_data(JS header.content_type = "text/plain"_string; // 2. If contentType is not an ASCII string, set contentType to the empty string. - if (!all_of(header.content_type->code_points(), is_ascii)) { + if (!header.content_type->is_ascii()) { header.content_type = ""_string; } diff --git a/Libraries/LibWebView/CookieJar.cpp b/Libraries/LibWebView/CookieJar.cpp index 9003709f957..0ec4bbcdeaf 100644 --- a/Libraries/LibWebView/CookieJar.cpp +++ b/Libraries/LibWebView/CookieJar.cpp @@ -333,10 +333,8 @@ void CookieJar::store_cookie(Web::Cookie::ParsedCookie const& parsed_cookie, con // 8. If the domain-attribute contains a character that is not in the range of [USASCII] characters, abort these // steps and ignore the cookie entirely. - for (auto code_point : domain_attribute.code_points()) { - if (!is_ascii(code_point)) - return; - } + if (!domain_attribute.is_ascii()) + return; // 9. If the user agent is configured to reject "public suffixes" and the domain-attribute is a public suffix: if (URL::is_public_suffix(domain_attribute)) { diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index 59b89b59af1..d94dfd80efb 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -1516,3 +1516,15 @@ TEST_CASE(to_ascii_uppercase) auto uppercased = long_string.to_ascii_uppercase(); EXPECT_EQ(long_string.bytes().data(), uppercased.bytes().data()); } + +TEST_CASE(is_ascii) +{ + EXPECT(String {}.is_ascii()); + EXPECT(" "_string.is_ascii()); + EXPECT("abc"_string.is_ascii()); + EXPECT("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()"_string.is_ascii()); + + EXPECT(!"€"_string.is_ascii()); + EXPECT(!"😀"_string.is_ascii()); + EXPECT(!"abcdefghijklmnopqrstuvwxyz😀ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789😀!@#$%^&*()"_string.is_ascii()); +}