From 1bc80848fb8f5fa15bd856863ce2d7040f29c4a4 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 6 Aug 2025 07:43:11 -0400 Subject: [PATCH] AK+LibWeb: Add a UTF-16 starts/ends with wrapper for a single code unit --- AK/Utf16StringBase.h | 3 +++ AK/Utf16View.h | 14 ++++++++++++++ Libraries/LibWeb/Editing/Commands.cpp | 8 ++++---- Libraries/LibWeb/Editing/Internal/Algorithms.cpp | 2 +- Libraries/LibWeb/HTML/Dates.cpp | 2 +- Libraries/LibWeb/HTML/HTMLInputElement.cpp | 2 +- Libraries/LibWeb/HTML/Scripting/ImportMap.cpp | 2 +- Libraries/LibWeb/Infra/Strings.cpp | 2 +- 8 files changed, 26 insertions(+), 9 deletions(-) diff --git a/AK/Utf16StringBase.h b/AK/Utf16StringBase.h index 48bb14f25c8..e3d74c2502c 100644 --- a/AK/Utf16StringBase.h +++ b/AK/Utf16StringBase.h @@ -254,7 +254,10 @@ public: [[nodiscard]] ALWAYS_INLINE size_t count(Utf16View const& needle) const { return utf16_view().count(needle); } + [[nodiscard]] ALWAYS_INLINE bool starts_with(char16_t needle) const { return utf16_view().starts_with(needle); } [[nodiscard]] ALWAYS_INLINE bool starts_with(Utf16View const& needle) const { return utf16_view().starts_with(needle); } + + [[nodiscard]] ALWAYS_INLINE bool ends_with(char16_t needle) const { return utf16_view().ends_with(needle); } [[nodiscard]] ALWAYS_INLINE bool ends_with(Utf16View const& needle) const { return utf16_view().ends_with(needle); } [[nodiscard]] ALWAYS_INLINE Vector split_view(char16_t needle, SplitBehavior split_behavior) const { return utf16_view().split_view(needle, split_behavior); } diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 4304a24bdb6..646f6d64a6b 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -579,6 +579,13 @@ public: return count; } + [[nodiscard]] constexpr bool starts_with(char16_t needle) const + { + if (is_empty()) + return false; + return code_unit_at(0) == needle; + } + [[nodiscard]] constexpr bool starts_with(Utf16View const& needle) const { auto needle_length = needle.length_in_code_units(); @@ -590,6 +597,13 @@ public: return substring_view(0, needle_length) == needle; } + [[nodiscard]] constexpr bool ends_with(char16_t needle) const + { + if (is_empty()) + return false; + return code_unit_at(length_in_code_units() - 1) == needle; + } + [[nodiscard]] constexpr bool ends_with(Utf16View const& needle) const { auto needle_length = needle.length_in_code_units(); diff --git a/Libraries/LibWeb/Editing/Commands.cpp b/Libraries/LibWeb/Editing/Commands.cpp index 2c521022452..cc2eebd1c51 100644 --- a/Libraries/LibWeb/Editing/Commands.cpp +++ b/Libraries/LibWeb/Editing/Commands.cpp @@ -517,20 +517,20 @@ bool command_font_size_action(DOM::Document& document, Utf16String const& value) // 2. If value is not a valid floating point number, and would not be a valid floating point number if a single // leading "+" character were stripped, return false. if (!HTML::is_valid_floating_point_number(resulting_value.to_utf8_but_should_be_ported_to_utf16())) { - if (!resulting_value.starts_with("+"sv) + if (!resulting_value.starts_with('+') || !HTML::is_valid_floating_point_number(resulting_value.substring_view(1).to_utf8_but_should_be_ported_to_utf16())) return false; } // 3. If the first character of value is "+", delete the character and let mode be "relative-plus". auto mode = FontSizeMode::Absolute; - if (resulting_value.starts_with("+"sv)) { + if (resulting_value.starts_with('+')) { resulting_value = resulting_value.substring_view(1); mode = FontSizeMode::RelativePlus; } // 4. Otherwise, if the first character of value is "-", delete the character and let mode be "relative-minus". - else if (resulting_value.starts_with("-"sv)) { + else if (resulting_value.starts_with('-')) { resulting_value = resulting_value.substring_view(1); mode = FontSizeMode::RelativeMinus; } @@ -623,7 +623,7 @@ bool command_format_block_action(DOM::Document& document, Utf16String const& val // 1. If value begins with a "<" character and ends with a ">" character, remove the first and last characters from // it. auto resulting_value = Utf16String::from_utf16_without_validation( - value.starts_with("<"sv) && value.ends_with(">"sv) + value.starts_with('<') && value.ends_with('>') ? value.substring_view(1, value.length_in_code_units() - 2) : value); diff --git a/Libraries/LibWeb/Editing/Internal/Algorithms.cpp b/Libraries/LibWeb/Editing/Internal/Algorithms.cpp index 532a448ff74..e4d09cb1b40 100644 --- a/Libraries/LibWeb/Editing/Internal/Algorithms.cpp +++ b/Libraries/LibWeb/Editing/Internal/Algorithms.cpp @@ -498,7 +498,7 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa // AD-HOC: Use the white-space-collapse longhand instead of "white-space" shorthand: https://github.com/w3c/editing/issues/486. if (is(*end_node) && end_offset == end_node->length() && precedes_a_line_break(end_node)) { auto parent_white_space_collapse = resolved_keyword(*end_node->parent(), CSS::PropertyID::WhiteSpaceCollapse); - if (parent_white_space_collapse != CSS::Keyword::Preserve && end_node->text_content()->ends_with(" "sv)) { + if (parent_white_space_collapse != CSS::Keyword::Preserve && end_node->text_content()->ends_with(' ')) { // 1. Subtract one from end offset. --end_offset; diff --git a/Libraries/LibWeb/HTML/Dates.cpp b/Libraries/LibWeb/HTML/Dates.cpp index 308f2123347..575a712fa18 100644 --- a/Libraries/LibWeb/HTML/Dates.cpp +++ b/Libraries/LibWeb/HTML/Dates.cpp @@ -55,7 +55,7 @@ bool is_valid_week_string(Utf16View const& value) if (!is_ascii_digit(digit)) return false; - if (!parts[1].starts_with("W"sv)) + if (!parts[1].starts_with('W')) return false; if (!is_ascii_digit(parts[1].code_unit_at(1))) return false; diff --git a/Libraries/LibWeb/HTML/HTMLInputElement.cpp b/Libraries/LibWeb/HTML/HTMLInputElement.cpp index ba7534bf644..2127d3e7d56 100644 --- a/Libraries/LibWeb/HTML/HTMLInputElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLInputElement.cpp @@ -1615,7 +1615,7 @@ static bool is_valid_simple_color(Utf16View const& value) if (value.length_in_code_units() != 7) return false; // and the first character is a U+0023 NUMBER SIGN character (#), - if (!value.starts_with("#"sv)) + if (!value.starts_with('#')) return false; // and the remaining six characters are all ASCII hex digits for (size_t i = 1; i < value.length_in_code_units(); i++) diff --git a/Libraries/LibWeb/HTML/Scripting/ImportMap.cpp b/Libraries/LibWeb/HTML/Scripting/ImportMap.cpp index ca869a70317..44d3065bd41 100644 --- a/Libraries/LibWeb/HTML/Scripting/ImportMap.cpp +++ b/Libraries/LibWeb/HTML/Scripting/ImportMap.cpp @@ -163,7 +163,7 @@ WebIDL::ExceptionOr sort_and_normalise_module_specifier_map( } // 6. If specifierKey ends with U+002F (/), and the serialization of addressURL does not end with U+002F (/), then: - if (specifier_key.as_string().view().ends_with("/"sv) && !address_url->serialize().ends_with('/')) { + if (specifier_key.as_string().view().ends_with('/') && !address_url->serialize().ends_with('/')) { // 1. The user agent may report a warning to the console indicating that an invalid address was given for the specifier key specifierKey; since specifierKey ends with a slash, the address needs to as well. auto& console = realm.intrinsics().console_object()->console(); console.output_debug_message(JS::Console::LogLevel::Warn, diff --git a/Libraries/LibWeb/Infra/Strings.cpp b/Libraries/LibWeb/Infra/Strings.cpp index 9ff26f1d47c..0618c846810 100644 --- a/Libraries/LibWeb/Infra/Strings.cpp +++ b/Libraries/LibWeb/Infra/Strings.cpp @@ -96,7 +96,7 @@ Utf16String strip_and_collapse_whitespace(Utf16String const& string) for (auto code_point : string) { if (Infra::is_ascii_whitespace(code_point)) { - if (!builder.utf16_string_view().ends_with(" "sv)) + if (!builder.utf16_string_view().ends_with(' ')) builder.append(' '); continue; }