AK: Add Utf8View::code_point_offset_of()

This commit is contained in:
Jelle Raaijmakers 2025-06-11 16:56:42 +02:00 committed by Jelle Raaijmakers
commit 6f926e6977
Notes: github-actions[bot] 2025-06-13 13:10:13 +00:00
3 changed files with 27 additions and 0 deletions

View file

@ -30,6 +30,18 @@ Utf8CodePointIterator Utf8View::iterator_at_byte_offset_without_validation(size_
return Utf8CodePointIterator { reinterpret_cast<u8 const*>(m_string.characters_without_null_termination()) + byte_offset, m_string.length() - byte_offset };
}
size_t Utf8View::code_point_offset_of(size_t byte_offset) const
{
VERIFY(byte_offset < byte_length());
size_t code_point_offset = 0;
for (auto it = begin(); !it.done(); ++it) {
if (it.m_ptr > begin_ptr() + byte_offset)
break;
++code_point_offset;
}
return code_point_offset - 1;
}
size_t Utf8View::byte_offset_of(size_t code_point_offset) const
{
size_t byte_offset = 0;

View file

@ -96,6 +96,8 @@ public:
unsigned char const* bytes() const { return begin_ptr(); }
size_t byte_length() const { return m_string.length(); }
size_t code_point_offset_of(size_t code_unit_offset) const;
[[nodiscard]] size_t byte_offset_of(Utf8CodePointIterator const& it) const
{
VERIFY(it.m_ptr >= begin_ptr());

View file

@ -333,3 +333,16 @@ TEST_CASE(for_each_split_view)
EXPECT_EQ(gather(SplitBehavior::KeepEmpty | SplitBehavior::KeepTrailingSeparator),
Vector({ "."sv, "."sv, "."sv, "Well."sv, "."sv, "hello."sv, "friends!."sv, "."sv, "."sv, ""sv }));
}
TEST_CASE(code_point_offset_of)
{
Utf8View view { "😭foo"sv };
EXPECT_EQ(0u, view.code_point_offset_of(0));
EXPECT_EQ(0u, view.code_point_offset_of(1));
EXPECT_EQ(0u, view.code_point_offset_of(2));
EXPECT_EQ(0u, view.code_point_offset_of(3));
EXPECT_EQ(1u, view.code_point_offset_of(4));
EXPECT_EQ(2u, view.code_point_offset_of(5));
EXPECT_EQ(3u, view.code_point_offset_of(6));
}