mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-21 12:05:15 +00:00
AK+LibURL+LibWeb: Use simdutf to validate ASCII strings
simdutf provides a vectorized ASCII validator, so let's use that instead of looping over strings manually.
This commit is contained in:
parent
212095e1c2
commit
ed3a677f08
10 changed files with 32 additions and 11 deletions
|
@ -112,6 +112,7 @@ public:
|
|||
|
||||
[[nodiscard]] String to_ascii_lowercase() const;
|
||||
[[nodiscard]] String to_ascii_uppercase() const;
|
||||
[[nodiscard]] bool is_ascii() const { return bytes_as_string_view().is_ascii(); }
|
||||
|
||||
// Compare this String against another string with caseless matching. Using this method requires linking LibUnicode into your application.
|
||||
[[nodiscard]] bool equals_ignoring_case(String const&) const;
|
||||
|
|
|
@ -15,6 +15,8 @@
|
|||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
#include <simdutf.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
StringView::StringView(String const& string)
|
||||
|
@ -195,6 +197,13 @@ bool StringView::equals_ignoring_ascii_case(StringView other) const
|
|||
return StringUtils::equals_ignoring_ascii_case(*this, other);
|
||||
}
|
||||
|
||||
bool StringView::is_ascii() const
|
||||
{
|
||||
if (is_empty())
|
||||
return true;
|
||||
return simdutf::validate_ascii(characters_without_null_termination(), length());
|
||||
}
|
||||
|
||||
ByteString StringView::to_lowercase_string() const
|
||||
{
|
||||
return StringImpl::create_lowercased(characters_without_null_termination(), length()).release_nonnull();
|
||||
|
|
|
@ -100,6 +100,7 @@ public:
|
|||
[[nodiscard]] bool contains(u32) const;
|
||||
[[nodiscard]] bool contains(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
|
||||
[[nodiscard]] bool equals_ignoring_ascii_case(StringView) const;
|
||||
[[nodiscard]] bool is_ascii() const;
|
||||
|
||||
[[nodiscard]] StringView trim(StringView characters, TrimMode mode = TrimMode::Both) const { return StringUtils::trim(*this, characters, mode); }
|
||||
[[nodiscard]] StringView trim_whitespace(TrimMode mode = TrimMode::Both) const { return StringUtils::trim_whitespace(*this, mode); }
|
||||
|
|
|
@ -196,7 +196,7 @@ Optional<String> Host::public_suffix() const
|
|||
auto public_suffix = get_public_suffix(host_string.bytes_as_string_view()).value_or("*"_string);
|
||||
|
||||
// 4. Assert: publicSuffix is an ASCII string that does not end with ".".
|
||||
VERIFY(all_of(public_suffix.code_points(), is_ascii));
|
||||
VERIFY(public_suffix.is_ascii());
|
||||
VERIFY(!public_suffix.ends_with('.'));
|
||||
|
||||
// 5. Return publicSuffix and trailingDot concatenated.
|
||||
|
@ -223,7 +223,7 @@ Optional<String> Host::registrable_domain() const
|
|||
auto registrable_domain = get_registrable_domain(host_string).value_or("*"_string);
|
||||
|
||||
// 4. Assert: registrableDomain is an ASCII string that does not end with ".".
|
||||
VERIFY(all_of(registrable_domain.code_points(), is_ascii));
|
||||
VERIFY(registrable_domain.is_ascii());
|
||||
VERIFY(!registrable_domain.ends_with('.'));
|
||||
|
||||
// 5. Return registrableDomain and trailingDot concatenated.
|
||||
|
|
|
@ -514,7 +514,7 @@ static ErrorOr<String> domain_to_ascii(StringView domain, bool be_strict)
|
|||
// OPTIMIZATION: If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.)
|
||||
// does not produce any item that starts with an ASCII case-insensitive match for "xn--", this
|
||||
// step is equivalent to ASCII lowercasing domain.
|
||||
if (!be_strict && all_of(domain, is_ascii)) {
|
||||
if (!be_strict && domain.is_ascii()) {
|
||||
// 3. If result is the empty string, domain-to-ASCII validation error, return failure.
|
||||
if (domain.is_empty())
|
||||
return Error::from_string_literal("Empty domain");
|
||||
|
|
|
@ -14,7 +14,7 @@ namespace URL::Pattern {
|
|||
String escape_a_pattern_string(String const& input)
|
||||
{
|
||||
// 1. Assert: input is an ASCII string.
|
||||
VERIFY(all_of(input.code_points(), is_ascii));
|
||||
VERIFY(input.is_ascii());
|
||||
|
||||
// 2. Let result be the empty string.
|
||||
StringBuilder result;
|
||||
|
@ -51,7 +51,7 @@ String escape_a_pattern_string(String const& input)
|
|||
String escape_a_regexp_string(String const& input)
|
||||
{
|
||||
// 1. Assert: input is an ASCII string.
|
||||
VERIFY(all_of(input.code_points(), is_ascii));
|
||||
VERIFY(input.is_ascii());
|
||||
|
||||
// 2. Let result be the empty string.
|
||||
StringBuilder builder;
|
||||
|
|
|
@ -47,7 +47,7 @@ GC::Ref<Policy> Policy::parse_a_serialized_csp(JS::Realm& realm, Variant<ByteBuf
|
|||
auto stripped_token_view = stripped_token.bytes_as_string_view();
|
||||
|
||||
// 2. If token is an empty string, or if token is not an ASCII string, continue.
|
||||
if (stripped_token.is_empty() || !all_of(stripped_token_view, is_ascii))
|
||||
if (stripped_token.is_empty() || !stripped_token_view.is_ascii())
|
||||
continue;
|
||||
|
||||
// 3. Let directive name be the result of collecting a sequence of code points from token which are not
|
||||
|
|
|
@ -436,7 +436,7 @@ MultipartParsingErrorOr<Vector<XHR::FormDataEntry>> parse_multipart_form_data(JS
|
|||
header.content_type = "text/plain"_string;
|
||||
|
||||
// 2. If contentType is not an ASCII string, set contentType to the empty string.
|
||||
if (!all_of(header.content_type->code_points(), is_ascii)) {
|
||||
if (!header.content_type->is_ascii()) {
|
||||
header.content_type = ""_string;
|
||||
}
|
||||
|
||||
|
|
|
@ -333,10 +333,8 @@ void CookieJar::store_cookie(Web::Cookie::ParsedCookie const& parsed_cookie, con
|
|||
|
||||
// 8. If the domain-attribute contains a character that is not in the range of [USASCII] characters, abort these
|
||||
// steps and ignore the cookie entirely.
|
||||
for (auto code_point : domain_attribute.code_points()) {
|
||||
if (!is_ascii(code_point))
|
||||
return;
|
||||
}
|
||||
if (!domain_attribute.is_ascii())
|
||||
return;
|
||||
|
||||
// 9. If the user agent is configured to reject "public suffixes" and the domain-attribute is a public suffix:
|
||||
if (URL::is_public_suffix(domain_attribute)) {
|
||||
|
|
|
@ -1516,3 +1516,15 @@ TEST_CASE(to_ascii_uppercase)
|
|||
auto uppercased = long_string.to_ascii_uppercase();
|
||||
EXPECT_EQ(long_string.bytes().data(), uppercased.bytes().data());
|
||||
}
|
||||
|
||||
TEST_CASE(is_ascii)
|
||||
{
|
||||
EXPECT(String {}.is_ascii());
|
||||
EXPECT(" "_string.is_ascii());
|
||||
EXPECT("abc"_string.is_ascii());
|
||||
EXPECT("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()"_string.is_ascii());
|
||||
|
||||
EXPECT(!"€"_string.is_ascii());
|
||||
EXPECT(!"😀"_string.is_ascii());
|
||||
EXPECT(!"abcdefghijklmnopqrstuvwxyz😀ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789😀!@#$%^&*()"_string.is_ascii());
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue