mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-28 21:26:22 +00:00
AK: Implement a method to split a UTF-16 string
This commit is contained in:
parent
48a3b2c28e
commit
baddac5155
Notes:
github-actions[bot]
2025-07-28 10:27:13 +00:00
Author: https://github.com/trflynn89
Commit: baddac5155
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5610
4 changed files with 115 additions and 0 deletions
|
@ -225,6 +225,21 @@ public:
|
|||
[[nodiscard]] ALWAYS_INLINE bool starts_with(Utf16View const& needle) const { return utf16_view().starts_with(needle); }
|
||||
[[nodiscard]] ALWAYS_INLINE bool ends_with(Utf16View const& needle) const { return utf16_view().ends_with(needle); }
|
||||
|
||||
[[nodiscard]] ALWAYS_INLINE Vector<Utf16View> split_view(char16_t needle, SplitBehavior split_behavior) const { return utf16_view().split_view(needle, split_behavior); }
|
||||
[[nodiscard]] ALWAYS_INLINE Vector<Utf16View> split_view(Utf16View const& needle, SplitBehavior split_behavior) const { return utf16_view().split_view(needle, split_behavior); }
|
||||
|
||||
template<typename Callback>
|
||||
ALWAYS_INLINE void for_each_split_view(char16_t separator, SplitBehavior split_behavior, Callback&& callback) const
|
||||
{
|
||||
utf16_view().for_each_split_view(separator, split_behavior, forward<Callback>(callback));
|
||||
}
|
||||
|
||||
template<typename Callback>
|
||||
ALWAYS_INLINE void for_each_split_view(Utf16View const& separator, SplitBehavior split_behavior, Callback&& callback) const
|
||||
{
|
||||
utf16_view().for_each_split_view(separator, split_behavior, forward<Callback>(callback));
|
||||
}
|
||||
|
||||
// This is primarily interesting to unit tests.
|
||||
[[nodiscard]] constexpr bool has_short_ascii_storage() const
|
||||
{
|
||||
|
|
|
@ -254,6 +254,24 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod
|
|||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
Vector<Utf16View> Utf16View::split_view(char16_t separator, SplitBehavior split_behavior) const
|
||||
{
|
||||
Utf16View seperator_view { &separator, 1 };
|
||||
return split_view(seperator_view, split_behavior);
|
||||
}
|
||||
|
||||
Vector<Utf16View> Utf16View::split_view(Utf16View const& separator, SplitBehavior split_behavior) const
|
||||
{
|
||||
Vector<Utf16View> parts;
|
||||
|
||||
for_each_split_view(separator, split_behavior, [&](auto const& part) {
|
||||
parts.append(part);
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
|
||||
return parts;
|
||||
}
|
||||
|
||||
size_t Utf16View::calculate_length_in_code_points() const
|
||||
{
|
||||
ASSERT(!has_ascii_storage());
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <AK/Error.h>
|
||||
#include <AK/Format.h>
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/IterationDecision.h>
|
||||
#include <AK/MemMem.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/Span.h>
|
||||
|
@ -515,6 +516,46 @@ public:
|
|||
return substring_view(length_in_code_units() - needle_length, needle_length) == needle;
|
||||
}
|
||||
|
||||
[[nodiscard]] Vector<Utf16View> split_view(char16_t, SplitBehavior) const;
|
||||
[[nodiscard]] Vector<Utf16View> split_view(Utf16View const&, SplitBehavior) const;
|
||||
|
||||
template<typename Callback>
|
||||
constexpr void for_each_split_view(char16_t separator, SplitBehavior split_behavior, Callback&& callback) const
|
||||
{
|
||||
Utf16View seperator_view { &separator, 1 };
|
||||
for_each_split_view(seperator_view, split_behavior, forward<Callback>(callback));
|
||||
}
|
||||
|
||||
template<typename Callback>
|
||||
constexpr void for_each_split_view(Utf16View const& separator, SplitBehavior split_behavior, Callback&& callback) const
|
||||
{
|
||||
VERIFY(!separator.is_empty());
|
||||
|
||||
if (is_empty())
|
||||
return;
|
||||
|
||||
bool keep_empty = has_flag(split_behavior, SplitBehavior::KeepEmpty);
|
||||
bool keep_separator = has_flag(split_behavior, SplitBehavior::KeepTrailingSeparator);
|
||||
|
||||
auto view { *this };
|
||||
|
||||
for (auto index = view.find_code_unit_offset(separator); index.has_value(); index = view.find_code_unit_offset(separator)) {
|
||||
if (keep_empty || *index > 0) {
|
||||
auto part = keep_separator
|
||||
? view.substring_view(0, *index + separator.length_in_code_units())
|
||||
: view.substring_view(0, *index);
|
||||
|
||||
if (callback(part) == IterationDecision::Break)
|
||||
return;
|
||||
}
|
||||
|
||||
view = view.substring_view(*index + separator.length_in_code_units());
|
||||
}
|
||||
|
||||
if (keep_empty || !view.is_empty())
|
||||
callback(view);
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#code-unit-less-than
|
||||
[[nodiscard]] constexpr bool is_code_unit_less_than(Utf16View const& other) const
|
||||
{
|
||||
|
|
|
@ -634,6 +634,47 @@ TEST_CASE(ends_with)
|
|||
EXPECT(!emoji.ends_with(u"😀"sv));
|
||||
}
|
||||
|
||||
TEST_CASE(split_view)
|
||||
{
|
||||
{
|
||||
auto test = u"axxbxcxd"sv;
|
||||
|
||||
EXPECT_EQ(test.split_view('x', SplitBehavior::Nothing), Vector({ u"a"sv, u"b"sv, u"c"sv, u"d"sv }));
|
||||
EXPECT_EQ(test.split_view("x"sv, SplitBehavior::Nothing), Vector({ u"a"sv, u"b"sv, u"c"sv, u"d"sv }));
|
||||
|
||||
EXPECT_EQ(test.split_view('x', SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u"b"sv, u"c"sv, u"d"sv }));
|
||||
EXPECT_EQ(test.split_view("x"sv, SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u"b"sv, u"c"sv, u"d"sv }));
|
||||
}
|
||||
{
|
||||
auto test = u"axxbx"sv;
|
||||
|
||||
EXPECT_EQ(test.split_view('x', SplitBehavior::Nothing), Vector({ u"a"sv, u"b"sv }));
|
||||
EXPECT_EQ(test.split_view("x"sv, SplitBehavior::Nothing), Vector({ u"a"sv, u"b"sv }));
|
||||
|
||||
EXPECT_EQ(test.split_view('x', SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u"b"sv, u""sv }));
|
||||
EXPECT_EQ(test.split_view("x"sv, SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u"b"sv, u""sv }));
|
||||
}
|
||||
{
|
||||
auto test = u"axxbcxxdxx"sv;
|
||||
EXPECT_EQ(test.split_view(u"xx"sv, SplitBehavior::Nothing), Vector({ u"a"sv, u"bc"sv, u"d"sv }));
|
||||
EXPECT_EQ(test.split_view(u"xx"sv, SplitBehavior::KeepEmpty), Vector({ u"a"sv, u"bc"sv, u"d"sv, u""sv }));
|
||||
}
|
||||
{
|
||||
auto test = u"a,,,b"sv;
|
||||
EXPECT_EQ(test.split_view(u","sv, SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u""sv, u"b"sv }));
|
||||
EXPECT_EQ(test.split_view(u","sv, SplitBehavior::KeepTrailingSeparator), Vector({ u"a,"sv, u"b"sv }));
|
||||
EXPECT_EQ(test.split_view(u","sv, SplitBehavior::KeepTrailingSeparator | SplitBehavior::KeepEmpty), Vector({ u"a,"sv, u","sv, u","sv, u"b"sv }));
|
||||
}
|
||||
{
|
||||
auto test = u"foo bar baz"sv;
|
||||
EXPECT_EQ(test.split_view(u" "sv, SplitBehavior::Nothing), Vector({ u"foo"sv, u"bar"sv, u"baz"sv }));
|
||||
}
|
||||
{
|
||||
auto test = u"ωΣ2ωΣω"sv;
|
||||
EXPECT_EQ(test.split_view(0x03A3u, SplitBehavior::Nothing), Vector({ u"ω"sv, u"2ω"sv, u"ω"sv }));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(find_code_unit_offset)
|
||||
{
|
||||
auto conversion_result = Utf16String::from_utf8("😀foo😀bar"sv);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue