AK: Add Utf16View::find_code_unit_offset(_ignoring_case)

This commit is contained in:
Jelle Raaijmakers 2025-06-10 16:04:10 +02:00 committed by Jelle Raaijmakers
commit cc0a28ee7d
Notes: github-actions[bot] 2025-06-13 13:10:51 +00:00
3 changed files with 62 additions and 0 deletions

View file

@ -281,6 +281,32 @@ Utf16View Utf16View::unicode_substring_view(size_t code_point_offset, size_t cod
VERIFY_NOT_REACHED(); VERIFY_NOT_REACHED();
} }
Optional<size_t> Utf16View::find_code_unit_offset(Utf16View const& needle, size_t start_offset) const
{
return m_code_units.index_of(needle.m_code_units, start_offset);
}
Optional<size_t> Utf16View::find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset) const
{
Checked maximum_offset { start_offset };
maximum_offset += needle.length_in_code_units();
if (maximum_offset.has_overflow() || maximum_offset.value() > length_in_code_units())
return {};
if (needle.is_empty())
return start_offset;
size_t index = start_offset;
while (index <= length_in_code_units() - needle.length_in_code_units()) {
Utf16View const slice { m_code_units.slice(index, needle.length_in_code_units()) };
if (slice.equals_ignoring_case(needle))
return index;
index += slice.begin().length_in_code_units();
}
return {};
}
bool Utf16View::starts_with(Utf16View const& needle) const bool Utf16View::starts_with(Utf16View const& needle) const
{ {
if (needle.is_empty()) if (needle.is_empty())

View file

@ -134,6 +134,9 @@ public:
Utf16View unicode_substring_view(size_t code_point_offset, size_t code_point_length) const; Utf16View unicode_substring_view(size_t code_point_offset, size_t code_point_length) const;
Utf16View unicode_substring_view(size_t code_point_offset) const { return unicode_substring_view(code_point_offset, length_in_code_points() - code_point_offset); } Utf16View unicode_substring_view(size_t code_point_offset) const { return unicode_substring_view(code_point_offset, length_in_code_points() - code_point_offset); }
Optional<size_t> find_code_unit_offset(Utf16View const& needle, size_t start_offset = 0) const;
Optional<size_t> find_code_unit_offset_ignoring_case(Utf16View const& needle, size_t start_offset = 0) const;
bool starts_with(Utf16View const&) const; bool starts_with(Utf16View const&) const;
bool is_code_unit_less_than(Utf16View const& other) const; bool is_code_unit_less_than(Utf16View const& other) const;

View file

@ -367,3 +367,36 @@ TEST_CASE(starts_with)
EXPECT(!emoji.starts_with(u"a")); EXPECT(!emoji.starts_with(u"a"));
EXPECT(!emoji.starts_with(u"🙃")); EXPECT(!emoji.starts_with(u"🙃"));
} }
TEST_CASE(find_code_unit_offset)
{
auto conversion_result = MUST(AK::utf8_to_utf16("😀foo😀bar"sv));
Utf16View const view { conversion_result };
EXPECT_EQ(0u, view.find_code_unit_offset(u"").value());
EXPECT_EQ(4u, view.find_code_unit_offset(u"", 4).value());
EXPECT(!view.find_code_unit_offset(u"", 16).has_value());
EXPECT_EQ(0u, view.find_code_unit_offset(u"😀").value());
EXPECT_EQ(5u, view.find_code_unit_offset(u"😀", 1).value());
EXPECT_EQ(2u, view.find_code_unit_offset(u"foo").value());
EXPECT_EQ(7u, view.find_code_unit_offset(u"bar").value());
EXPECT(!view.find_code_unit_offset(u"baz").has_value());
}
TEST_CASE(find_code_unit_offset_ignoring_case)
{
auto conversion_result = MUST(AK::utf8_to_utf16("😀Foo😀Bar"sv));
Utf16View const view { conversion_result };
EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u"").value());
EXPECT_EQ(4u, view.find_code_unit_offset_ignoring_case(u"", 4).value());
EXPECT(!view.find_code_unit_offset_ignoring_case(u"", 16).has_value());
EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u"😀").value());
EXPECT_EQ(5u, view.find_code_unit_offset_ignoring_case(u"😀", 1).value());
EXPECT_EQ(2u, view.find_code_unit_offset_ignoring_case(u"foO").value());
EXPECT_EQ(7u, view.find_code_unit_offset_ignoring_case(u"baR").value());
EXPECT(!view.find_code_unit_offset_ignoring_case(u"baz").has_value());
}