diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp index 1503b9e50ae..0bc49aba50d 100644 --- a/AK/Utf16View.cpp +++ b/AK/Utf16View.cpp @@ -281,6 +281,32 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod VERIFY_NOT_REACHED(); } +Optional Utf16View::find_code_unit_offset(Utf16View const& needle, size_t start_offset) const +{ + return m_code_units.index_of(needle.m_code_units, start_offset); +} + +Optional Utf16View::find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset) const +{ + Checked maximum_offset { start_offset }; + maximum_offset += needle.length_in_code_units(); + if (maximum_offset.has_overflow() || maximum_offset.value() > length_in_code_units()) + return {}; + + if (needle.is_empty()) + return start_offset; + + size_t index = start_offset; + while (index <= length_in_code_units() - needle.length_in_code_units()) { + Utf16View const slice { m_code_units.slice(index, needle.length_in_code_units()) }; + if (slice.equals_ignoring_case(needle)) + return index; + index += slice.begin().length_in_code_units(); + } + + return {}; +} + bool Utf16View::starts_with(Utf16View const& needle) const { if (needle.is_empty()) diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 26d80ee4098..ed2dee466c2 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -134,6 +134,9 @@ public: Utf16View unicode_substring_view(size_t code_point_offset, size_t code_point_length) const; Utf16View unicode_substring_view(size_t code_point_offset) const { return unicode_substring_view(code_point_offset, length_in_code_points() - code_point_offset); } + Optional find_code_unit_offset(Utf16View const& needle, size_t start_offset = 0) const; + Optional find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset = 0) const; + bool starts_with(Utf16View const&) const; bool is_code_unit_less_than(Utf16View const& other) const; diff --git a/Tests/AK/TestUtf16.cpp b/Tests/AK/TestUtf16.cpp index 585dff22061..6250d4f5684 100644 --- a/Tests/AK/TestUtf16.cpp +++ b/Tests/AK/TestUtf16.cpp @@ -367,3 +367,36 @@ TEST_CASE(starts_with) EXPECT(!emoji.starts_with(u"a")); EXPECT(!emoji.starts_with(u"πŸ™ƒ")); } + +TEST_CASE(find_code_unit_offset) +{ + auto conversion_result = MUST(AK::utf8_to_utf16("πŸ˜€fooπŸ˜€bar"sv)); + Utf16View const view { conversion_result }; + + EXPECT_EQ(0u, view.find_code_unit_offset(u"").value()); + EXPECT_EQ(4u, view.find_code_unit_offset(u"", 4).value()); + EXPECT(!view.find_code_unit_offset(u"", 16).has_value()); + + EXPECT_EQ(0u, view.find_code_unit_offset(u"πŸ˜€").value()); + EXPECT_EQ(5u, view.find_code_unit_offset(u"πŸ˜€", 1).value()); + EXPECT_EQ(2u, view.find_code_unit_offset(u"foo").value()); + EXPECT_EQ(7u, view.find_code_unit_offset(u"bar").value()); + + EXPECT(!view.find_code_unit_offset(u"baz").has_value()); +} + +TEST_CASE(find_code_unit_offset_ignoring_case) +{ + auto conversion_result = MUST(AK::utf8_to_utf16("πŸ˜€FooπŸ˜€Bar"sv)); + Utf16View const view { conversion_result }; + + EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u"").value()); + EXPECT_EQ(4u, view.find_code_unit_offset_ignoring_case(u"", 4).value()); + EXPECT(!view.find_code_unit_offset_ignoring_case(u"", 16).has_value()); + + EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u"πŸ˜€").value()); + EXPECT_EQ(5u, view.find_code_unit_offset_ignoring_case(u"πŸ˜€", 1).value()); + EXPECT_EQ(2u, view.find_code_unit_offset_ignoring_case(u"foO").value()); + EXPECT_EQ(7u, view.find_code_unit_offset_ignoring_case(u"baR").value()); + EXPECT(!view.find_code_unit_offset_ignoring_case(u"baz").has_value()); +}