mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-05 09:06:08 +00:00
AK: Add trim methods to Utf16String that skip allocation when not needed
If the string does not begin with any of the provided code units, we do not need to create a new string.
This commit is contained in:
parent
0efa98a57a
commit
2dc0a3b3ce
Notes:
github-actions[bot]
2025-08-05 13:15:06 +00:00
Author: https://github.com/trflynn89
Commit: 2dc0a3b3ce
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5720
Reviewed-by: https://github.com/gmta ✅
5 changed files with 84 additions and 5 deletions
|
@ -199,6 +199,29 @@ public:
|
||||||
return view.replace(needle, replacement, replace_mode);
|
return view.replace(needle, replacement, replace_mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE Utf16String trim(Utf16View const& code_units, TrimMode mode = TrimMode::Both) const
|
||||||
|
{
|
||||||
|
if (is_empty())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
bool needs_trimming = false;
|
||||||
|
|
||||||
|
if (mode == TrimMode::Left || mode == TrimMode::Both)
|
||||||
|
needs_trimming |= code_units.contains(code_unit_at(0));
|
||||||
|
if (mode == TrimMode::Right || mode == TrimMode::Both)
|
||||||
|
needs_trimming |= code_units.contains(code_unit_at(length_in_code_units() - 1));
|
||||||
|
|
||||||
|
if (!needs_trimming)
|
||||||
|
return *this;
|
||||||
|
|
||||||
|
return Utf16String::from_utf16_without_validation(utf16_view().trim(code_units, mode));
|
||||||
|
}
|
||||||
|
|
||||||
|
ALWAYS_INLINE Utf16String trim_ascii_whitespace(TrimMode mode = TrimMode::Both) const
|
||||||
|
{
|
||||||
|
return trim(" \n\t\v\f\r"sv, mode);
|
||||||
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE Utf16String escape_html_entities() const { return utf16_view().escape_html_entities(); }
|
ALWAYS_INLINE Utf16String escape_html_entities() const { return utf16_view().escape_html_entities(); }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
|
@ -477,8 +477,7 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] constexpr Utf16View trim_ascii_whitespace(TrimMode mode = TrimMode::Both) const
|
[[nodiscard]] constexpr Utf16View trim_ascii_whitespace(TrimMode mode = TrimMode::Both) const
|
||||||
{
|
{
|
||||||
static constexpr Utf16View white_space { u" \n\t\v\f\r", 6uz };
|
return trim(" \n\t\v\f\r"sv, mode);
|
||||||
return trim(white_space, mode);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr Optional<size_t> find_code_unit_offset(char16_t needle, size_t start_offset = 0) const
|
constexpr Optional<size_t> find_code_unit_offset(char16_t needle, size_t start_offset = 0) const
|
||||||
|
|
|
@ -1644,8 +1644,7 @@ Utf16String HTMLInputElement::value_sanitization_algorithm(Utf16String const& va
|
||||||
};
|
};
|
||||||
|
|
||||||
auto strip_newlines_and_trim = [&]() {
|
auto strip_newlines_and_trim = [&]() {
|
||||||
auto value_without_newlines = strip_newlines();
|
return strip_newlines().trim(Infra::ASCII_WHITESPACE);
|
||||||
return Utf16String::from_utf16_without_validation(value_without_newlines.utf16_view().trim(Infra::ASCII_WHITESPACE));
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// https://html.spec.whatwg.org/multipage/input.html#text-(type=text)-state-and-search-state-(type=search):value-sanitization-algorithm
|
// https://html.spec.whatwg.org/multipage/input.html#text-(type=text)-state-and-search-state-(type=search):value-sanitization-algorithm
|
||||||
|
|
|
@ -105,7 +105,7 @@ Utf16String strip_and_collapse_whitespace(Utf16String const& string)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ...and then remove any leading and trailing ASCII whitespace from that string.
|
// ...and then remove any leading and trailing ASCII whitespace from that string.
|
||||||
return Utf16String::from_utf16(builder.utf16_string_view().trim(Infra::ASCII_WHITESPACE));
|
return builder.to_utf16_string().trim(Infra::ASCII_WHITESPACE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://infra.spec.whatwg.org/#code-unit-prefix
|
// https://infra.spec.whatwg.org/#code-unit-prefix
|
||||||
|
|
|
@ -913,6 +913,64 @@ TEST_CASE(to_casefold)
|
||||||
EXPECT_EQ(result, u"\u03B1\u0342\u03B9"sv);
|
EXPECT_EQ(result, u"\u03B1\u0342\u03B9"sv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(trim)
|
||||||
|
{
|
||||||
|
auto expect_same_string = [](Utf16String const& string, Utf16String const& result) {
|
||||||
|
EXPECT_EQ(string, result);
|
||||||
|
|
||||||
|
VERIFY(string.has_ascii_storage() == result.has_ascii_storage());
|
||||||
|
auto string_view = string.utf16_view();
|
||||||
|
auto result_view = result.utf16_view();
|
||||||
|
|
||||||
|
if (string.has_ascii_storage())
|
||||||
|
EXPECT_EQ(string_view.ascii_span().data(), result_view.ascii_span().data());
|
||||||
|
else
|
||||||
|
EXPECT_EQ(string_view.utf16_span().data(), result_view.utf16_span().data());
|
||||||
|
};
|
||||||
|
|
||||||
|
Utf16View whitespace { u" "sv };
|
||||||
|
{
|
||||||
|
auto string = u"looooong word"_utf16;
|
||||||
|
expect_same_string(string, string.trim(whitespace, TrimMode::Both));
|
||||||
|
expect_same_string(string, string.trim(whitespace, TrimMode::Left));
|
||||||
|
expect_same_string(string, string.trim(whitespace, TrimMode::Right));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = u" looooong word"_utf16;
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Both), u"looooong word"sv);
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Left), u"looooong word"sv);
|
||||||
|
expect_same_string(string, string.trim(whitespace, TrimMode::Right));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = u"looooong word "_utf16;
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Both), u"looooong word"sv);
|
||||||
|
expect_same_string(string, string.trim(whitespace, TrimMode::Left));
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Right), u"looooong word"sv);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = u" looooong word "_utf16;
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Both), u"looooong word"sv);
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Left), u"looooong word "sv);
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Right), u" looooong word"sv);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = u" \u180E "_utf16;
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Both), u"\u180E"sv);
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Left), u"\u180E "sv);
|
||||||
|
EXPECT_EQ(string.trim(whitespace, TrimMode::Right), u" \u180E"sv);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto string = u"😀wfh😀"_utf16;
|
||||||
|
EXPECT_EQ(string.trim(u"😀"sv, TrimMode::Both), u"wfh"sv);
|
||||||
|
EXPECT_EQ(string.trim(u"😀"sv, TrimMode::Left), u"wfh😀"sv);
|
||||||
|
EXPECT_EQ(string.trim(u"😀"sv, TrimMode::Right), u"😀wfh"sv);
|
||||||
|
|
||||||
|
expect_same_string(string, string.trim(whitespace, TrimMode::Both));
|
||||||
|
expect_same_string(string, string.trim(whitespace, TrimMode::Left));
|
||||||
|
expect_same_string(string, string.trim(whitespace, TrimMode::Right));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(copy_operations)
|
TEST_CASE(copy_operations)
|
||||||
{
|
{
|
||||||
auto test = [](Utf16String const& string1) {
|
auto test = [](Utf16String const& string1) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue