diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 18535cefd34..2caa17909bf 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -32,24 +32,25 @@ Utf8CodePointIterator Utf8View::iterator_at_byte_offset_without_validation(size_ size_t Utf8View::code_point_offset_of(size_t byte_offset) const { - VERIFY(byte_offset < byte_length()); + VERIFY(byte_offset <= byte_length()); // Fast path: each code point is represented by a single byte. if (length() == byte_length()) return byte_offset; size_t code_point_offset = 0; - for (auto it = begin(); !it.done(); ++it) { - if (it.m_ptr > begin_ptr() + byte_offset) + auto it = begin(); + while (!it.done()) { + if ((++it).m_ptr > begin_ptr() + byte_offset) break; ++code_point_offset; } - return code_point_offset - 1; + return code_point_offset; } size_t Utf8View::byte_offset_of(size_t code_point_offset) const { - VERIFY(code_point_offset < length()); + VERIFY(code_point_offset <= length()); // Fast path: each code point is represented by a single byte. if (length() == byte_length()) diff --git a/Tests/AK/TestUtf8View.cpp b/Tests/AK/TestUtf8View.cpp index 10775410611..839f50f271d 100644 --- a/Tests/AK/TestUtf8View.cpp +++ b/Tests/AK/TestUtf8View.cpp @@ -345,4 +345,16 @@ TEST_CASE(code_point_offset_of) EXPECT_EQ(1u, view.code_point_offset_of(4)); EXPECT_EQ(2u, view.code_point_offset_of(5)); EXPECT_EQ(3u, view.code_point_offset_of(6)); + EXPECT_EQ(4u, view.code_point_offset_of(7)); +} + +TEST_CASE(byte_offset_of) +{ + Utf8View view { "😂foo"sv }; + + EXPECT_EQ(0u, view.byte_offset_of(0)); + EXPECT_EQ(4u, view.byte_offset_of(1)); + EXPECT_EQ(5u, view.byte_offset_of(2)); + EXPECT_EQ(6u, view.byte_offset_of(3)); + EXPECT_EQ(7u, view.byte_offset_of(4)); }