From 48a3b2c28e0a4183e6420171ab2a47bbef13a366 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 26 Jul 2025 09:59:47 -0400 Subject: [PATCH] AK: Implement a method to count instances of a needle in a UTF-16 string --- AK/Utf16StringBase.h | 2 ++ AK/Utf16View.h | 14 ++++++++++++++ Tests/AK/TestUtf16View.cpp | 24 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+) diff --git a/AK/Utf16StringBase.h b/AK/Utf16StringBase.h index 120e056e48e..3242d725e8d 100644 --- a/AK/Utf16StringBase.h +++ b/AK/Utf16StringBase.h @@ -220,6 +220,8 @@ public: [[nodiscard]] ALWAYS_INLINE bool contains(char16_t needle) const { return find_code_unit_offset(needle).has_value(); } [[nodiscard]] ALWAYS_INLINE bool contains(Utf16View const& needle) const { return find_code_unit_offset(needle).has_value(); } + [[nodiscard]] ALWAYS_INLINE size_t count(Utf16View const& needle) const { return utf16_view().count(needle); } + [[nodiscard]] ALWAYS_INLINE bool starts_with(Utf16View const& needle) const { return utf16_view().starts_with(needle); } [[nodiscard]] ALWAYS_INLINE bool ends_with(Utf16View const& needle) const { return utf16_view().ends_with(needle); } diff --git a/AK/Utf16View.h b/AK/Utf16View.h index fb8bc1ce643..df33da86d12 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -479,6 +479,20 @@ public: [[nodiscard]] constexpr bool contains(char16_t needle) const { return find_code_unit_offset(needle).has_value(); } [[nodiscard]] constexpr bool contains(Utf16View const& needle) const { return find_code_unit_offset(needle).has_value(); } + [[nodiscard]] constexpr size_t count(Utf16View const& needle) const + { + if (needle.is_empty()) + return length_in_code_units(); + + size_t count = 0; + for (size_t i = 0; i < length_in_code_units() - needle.length_in_code_units() + 1; ++i) { + if (substring_view(i).starts_with(needle)) + ++count; + } + + return count; + } + [[nodiscard]] constexpr bool starts_with(Utf16View const& needle) const { auto needle_length = needle.length_in_code_units(); diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp index 1313ad99f48..8c17e4f5961 100644 --- a/Tests/AK/TestUtf16View.cpp +++ b/Tests/AK/TestUtf16View.cpp @@ -558,6 +558,30 @@ TEST_CASE(contains) EXPECT(u"ab😀"sv.contains(u"😀"sv)); } +TEST_CASE(count) +{ + EXPECT_EQ(u""sv.count({}), 0uz); + EXPECT_EQ(u"abc"sv.count({}), 3uz); + + EXPECT_EQ(u""sv.count(u"a"sv), 0uz); + EXPECT_EQ(u"abc"sv.count(u"a"sv), 1uz); + EXPECT_EQ(u"abc"sv.count(u"b"sv), 1uz); + EXPECT_EQ(u"abc"sv.count(u"c"sv), 1uz); + EXPECT_EQ(u"abc"sv.count(u"ab"sv), 1uz); + EXPECT_EQ(u"abc"sv.count(u"bc"sv), 1uz); + EXPECT_EQ(u"abc"sv.count(u"abc"sv), 1uz); + EXPECT_EQ(u"abc"sv.count(u"d"sv), 0uz); + + EXPECT_EQ(u"aaaa"sv.count(u"aa"sv), 3uz); + + EXPECT_EQ(u"😀"sv.count({}), 2uz); + EXPECT_EQ(u"😀"sv.count(u"\xd83d"sv), 1uz); + EXPECT_EQ(u"😀"sv.count(u"\xde00"sv), 1uz); + EXPECT_EQ(u"😀"sv.count(u"😀"sv), 1uz); + EXPECT_EQ(u"😀😀😀"sv.count(u"😀"sv), 3uz); + EXPECT_EQ(u"😀😀😀"sv.count(u"😀😀"sv), 2uz); +} + TEST_CASE(starts_with) { EXPECT(Utf16View {}.starts_with(u""sv));