mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-29 21:57:18 +00:00
AK: Implement a method to split a UTF-16 string
This commit is contained in:
parent
48a3b2c28e
commit
baddac5155
Notes:
github-actions[bot]
2025-07-28 10:27:13 +00:00
Author: https://github.com/trflynn89
Commit: baddac5155
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5610
4 changed files with 115 additions and 0 deletions
|
@ -225,6 +225,21 @@ public:
|
||||||
[[nodiscard]] ALWAYS_INLINE bool starts_with(Utf16View const& needle) const { return utf16_view().starts_with(needle); }
|
[[nodiscard]] ALWAYS_INLINE bool starts_with(Utf16View const& needle) const { return utf16_view().starts_with(needle); }
|
||||||
[[nodiscard]] ALWAYS_INLINE bool ends_with(Utf16View const& needle) const { return utf16_view().ends_with(needle); }
|
[[nodiscard]] ALWAYS_INLINE bool ends_with(Utf16View const& needle) const { return utf16_view().ends_with(needle); }
|
||||||
|
|
||||||
|
[[nodiscard]] ALWAYS_INLINE Vector<Utf16View> split_view(char16_t needle, SplitBehavior split_behavior) const { return utf16_view().split_view(needle, split_behavior); }
|
||||||
|
[[nodiscard]] ALWAYS_INLINE Vector<Utf16View> split_view(Utf16View const& needle, SplitBehavior split_behavior) const { return utf16_view().split_view(needle, split_behavior); }
|
||||||
|
|
||||||
|
template<typename Callback>
|
||||||
|
ALWAYS_INLINE void for_each_split_view(char16_t separator, SplitBehavior split_behavior, Callback&& callback) const
|
||||||
|
{
|
||||||
|
utf16_view().for_each_split_view(separator, split_behavior, forward<Callback>(callback));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Callback>
|
||||||
|
ALWAYS_INLINE void for_each_split_view(Utf16View const& separator, SplitBehavior split_behavior, Callback&& callback) const
|
||||||
|
{
|
||||||
|
utf16_view().for_each_split_view(separator, split_behavior, forward<Callback>(callback));
|
||||||
|
}
|
||||||
|
|
||||||
// This is primarily interesting to unit tests.
|
// This is primarily interesting to unit tests.
|
||||||
[[nodiscard]] constexpr bool has_short_ascii_storage() const
|
[[nodiscard]] constexpr bool has_short_ascii_storage() const
|
||||||
{
|
{
|
||||||
|
|
|
@ -254,6 +254,24 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod
|
||||||
VERIFY_NOT_REACHED();
|
VERIFY_NOT_REACHED();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Vector<Utf16View> Utf16View::split_view(char16_t separator, SplitBehavior split_behavior) const
|
||||||
|
{
|
||||||
|
Utf16View seperator_view { &separator, 1 };
|
||||||
|
return split_view(seperator_view, split_behavior);
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector<Utf16View> Utf16View::split_view(Utf16View const& separator, SplitBehavior split_behavior) const
|
||||||
|
{
|
||||||
|
Vector<Utf16View> parts;
|
||||||
|
|
||||||
|
for_each_split_view(separator, split_behavior, [&](auto const& part) {
|
||||||
|
parts.append(part);
|
||||||
|
return IterationDecision::Continue;
|
||||||
|
});
|
||||||
|
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
size_t Utf16View::calculate_length_in_code_points() const
|
size_t Utf16View::calculate_length_in_code_points() const
|
||||||
{
|
{
|
||||||
ASSERT(!has_ascii_storage());
|
ASSERT(!has_ascii_storage());
|
||||||
|
|
|
@ -10,6 +10,7 @@
|
||||||
#include <AK/Error.h>
|
#include <AK/Error.h>
|
||||||
#include <AK/Format.h>
|
#include <AK/Format.h>
|
||||||
#include <AK/Forward.h>
|
#include <AK/Forward.h>
|
||||||
|
#include <AK/IterationDecision.h>
|
||||||
#include <AK/MemMem.h>
|
#include <AK/MemMem.h>
|
||||||
#include <AK/Optional.h>
|
#include <AK/Optional.h>
|
||||||
#include <AK/Span.h>
|
#include <AK/Span.h>
|
||||||
|
@ -515,6 +516,46 @@ public:
|
||||||
return substring_view(length_in_code_units() - needle_length, needle_length) == needle;
|
return substring_view(length_in_code_units() - needle_length, needle_length) == needle;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] Vector<Utf16View> split_view(char16_t, SplitBehavior) const;
|
||||||
|
[[nodiscard]] Vector<Utf16View> split_view(Utf16View const&, SplitBehavior) const;
|
||||||
|
|
||||||
|
template<typename Callback>
|
||||||
|
constexpr void for_each_split_view(char16_t separator, SplitBehavior split_behavior, Callback&& callback) const
|
||||||
|
{
|
||||||
|
Utf16View seperator_view { &separator, 1 };
|
||||||
|
for_each_split_view(seperator_view, split_behavior, forward<Callback>(callback));
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename Callback>
|
||||||
|
constexpr void for_each_split_view(Utf16View const& separator, SplitBehavior split_behavior, Callback&& callback) const
|
||||||
|
{
|
||||||
|
VERIFY(!separator.is_empty());
|
||||||
|
|
||||||
|
if (is_empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
bool keep_empty = has_flag(split_behavior, SplitBehavior::KeepEmpty);
|
||||||
|
bool keep_separator = has_flag(split_behavior, SplitBehavior::KeepTrailingSeparator);
|
||||||
|
|
||||||
|
auto view { *this };
|
||||||
|
|
||||||
|
for (auto index = view.find_code_unit_offset(separator); index.has_value(); index = view.find_code_unit_offset(separator)) {
|
||||||
|
if (keep_empty || *index > 0) {
|
||||||
|
auto part = keep_separator
|
||||||
|
? view.substring_view(0, *index + separator.length_in_code_units())
|
||||||
|
: view.substring_view(0, *index);
|
||||||
|
|
||||||
|
if (callback(part) == IterationDecision::Break)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
view = view.substring_view(*index + separator.length_in_code_units());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (keep_empty || !view.is_empty())
|
||||||
|
callback(view);
|
||||||
|
}
|
||||||
|
|
||||||
// https://infra.spec.whatwg.org/#code-unit-less-than
|
// https://infra.spec.whatwg.org/#code-unit-less-than
|
||||||
[[nodiscard]] constexpr bool is_code_unit_less_than(Utf16View const& other) const
|
[[nodiscard]] constexpr bool is_code_unit_less_than(Utf16View const& other) const
|
||||||
{
|
{
|
||||||
|
|
|
@ -634,6 +634,47 @@ TEST_CASE(ends_with)
|
||||||
EXPECT(!emoji.ends_with(u"😀"sv));
|
EXPECT(!emoji.ends_with(u"😀"sv));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(split_view)
|
||||||
|
{
|
||||||
|
{
|
||||||
|
auto test = u"axxbxcxd"sv;
|
||||||
|
|
||||||
|
EXPECT_EQ(test.split_view('x', SplitBehavior::Nothing), Vector({ u"a"sv, u"b"sv, u"c"sv, u"d"sv }));
|
||||||
|
EXPECT_EQ(test.split_view("x"sv, SplitBehavior::Nothing), Vector({ u"a"sv, u"b"sv, u"c"sv, u"d"sv }));
|
||||||
|
|
||||||
|
EXPECT_EQ(test.split_view('x', SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u"b"sv, u"c"sv, u"d"sv }));
|
||||||
|
EXPECT_EQ(test.split_view("x"sv, SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u"b"sv, u"c"sv, u"d"sv }));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto test = u"axxbx"sv;
|
||||||
|
|
||||||
|
EXPECT_EQ(test.split_view('x', SplitBehavior::Nothing), Vector({ u"a"sv, u"b"sv }));
|
||||||
|
EXPECT_EQ(test.split_view("x"sv, SplitBehavior::Nothing), Vector({ u"a"sv, u"b"sv }));
|
||||||
|
|
||||||
|
EXPECT_EQ(test.split_view('x', SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u"b"sv, u""sv }));
|
||||||
|
EXPECT_EQ(test.split_view("x"sv, SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u"b"sv, u""sv }));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto test = u"axxbcxxdxx"sv;
|
||||||
|
EXPECT_EQ(test.split_view(u"xx"sv, SplitBehavior::Nothing), Vector({ u"a"sv, u"bc"sv, u"d"sv }));
|
||||||
|
EXPECT_EQ(test.split_view(u"xx"sv, SplitBehavior::KeepEmpty), Vector({ u"a"sv, u"bc"sv, u"d"sv, u""sv }));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto test = u"a,,,b"sv;
|
||||||
|
EXPECT_EQ(test.split_view(u","sv, SplitBehavior::KeepEmpty), Vector({ u"a"sv, u""sv, u""sv, u"b"sv }));
|
||||||
|
EXPECT_EQ(test.split_view(u","sv, SplitBehavior::KeepTrailingSeparator), Vector({ u"a,"sv, u"b"sv }));
|
||||||
|
EXPECT_EQ(test.split_view(u","sv, SplitBehavior::KeepTrailingSeparator | SplitBehavior::KeepEmpty), Vector({ u"a,"sv, u","sv, u","sv, u"b"sv }));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto test = u"foo bar baz"sv;
|
||||||
|
EXPECT_EQ(test.split_view(u" "sv, SplitBehavior::Nothing), Vector({ u"foo"sv, u"bar"sv, u"baz"sv }));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto test = u"ωΣ2ωΣω"sv;
|
||||||
|
EXPECT_EQ(test.split_view(0x03A3u, SplitBehavior::Nothing), Vector({ u"ω"sv, u"2ω"sv, u"ω"sv }));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(find_code_unit_offset)
|
TEST_CASE(find_code_unit_offset)
|
||||||
{
|
{
|
||||||
auto conversion_result = Utf16String::from_utf8("😀foo😀bar"sv);
|
auto conversion_result = Utf16String::from_utf8("😀foo😀bar"sv);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue