diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 4c4394be150..a40b6817c9b 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -30,6 +30,18 @@ Utf8CodePointIterator Utf8View::iterator_at_byte_offset_without_validation(size_ return Utf8CodePointIterator { reinterpret_cast(m_string.characters_without_null_termination()) + byte_offset, m_string.length() - byte_offset }; } +size_t Utf8View::code_point_offset_of(size_t byte_offset) const +{ + VERIFY(byte_offset < byte_length()); + size_t code_point_offset = 0; + for (auto it = begin(); !it.done(); ++it) { + if (it.m_ptr > begin_ptr() + byte_offset) + break; + ++code_point_offset; + } + return code_point_offset - 1; +} + size_t Utf8View::byte_offset_of(size_t code_point_offset) const { size_t byte_offset = 0; diff --git a/AK/Utf8View.h b/AK/Utf8View.h index 25ea5f304be..81c4a7ce506 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -96,6 +96,8 @@ public: unsigned char const* bytes() const { return begin_ptr(); } size_t byte_length() const { return m_string.length(); } + size_t code_point_offset_of(size_t code_unit_offset) const; + [[nodiscard]] size_t byte_offset_of(Utf8CodePointIterator const& it) const { VERIFY(it.m_ptr >= begin_ptr()); diff --git a/Tests/AK/TestUtf8.cpp b/Tests/AK/TestUtf8.cpp index 927481a24cf..3c71659c9ff 100644 --- a/Tests/AK/TestUtf8.cpp +++ b/Tests/AK/TestUtf8.cpp @@ -333,3 +333,16 @@ TEST_CASE(for_each_split_view) EXPECT_EQ(gather(SplitBehavior::KeepEmpty | SplitBehavior::KeepTrailingSeparator), Vector({ "."sv, "."sv, "."sv, "Well."sv, "."sv, "hello."sv, "friends!."sv, "."sv, "."sv, ""sv })); } + +TEST_CASE(code_point_offset_of) +{ + Utf8View view { "😭foo"sv }; + + EXPECT_EQ(0u, view.code_point_offset_of(0)); + EXPECT_EQ(0u, view.code_point_offset_of(1)); + EXPECT_EQ(0u, view.code_point_offset_of(2)); + EXPECT_EQ(0u, view.code_point_offset_of(3)); + EXPECT_EQ(1u, view.code_point_offset_of(4)); + EXPECT_EQ(2u, view.code_point_offset_of(5)); + EXPECT_EQ(3u, view.code_point_offset_of(6)); +}