AK: Compute Utf16View::code_point_offset_of correctly

There were a couple of issues here, including the following computation
could actually overflow to NumericLimits<size_t>::max():

    code_unit_offset -= it.length_in_code_units();
This commit is contained in:
Timothy Flynn 2025-07-22 07:38:26 -04:00 committed by Jelle Raaijmakers
commit ad7ac679fd
Notes: github-actions[bot] 2025-07-22 15:18:55 +00:00
3 changed files with 26 additions and 5 deletions

View file

@ -208,11 +208,10 @@ size_t Utf16View::code_point_offset_of(size_t code_unit_offset) const
size_t code_point_offset = 0; size_t code_point_offset = 0;
for (auto it = begin(); it != end(); ++it) { for (auto it = begin(); it != end();) {
if (code_unit_offset == 0) // We know the view is using UTF-16 storage because ASCII storage would have returned early above.
return code_point_offset; if ((++it).m_iterator.utf16 > m_string.utf16 + code_unit_offset)
break;
code_unit_offset -= it.length_in_code_units();
++code_point_offset; ++code_point_offset;
} }

View file

@ -106,6 +106,8 @@ public:
} }
private: private:
friend Utf16View;
constexpr Utf16CodePointIterator(char const* iterator, size_t length) constexpr Utf16CodePointIterator(char const* iterator, size_t length)
: m_iterator { .ascii = iterator } : m_iterator { .ascii = iterator }
, m_remaining_code_units(length) , m_remaining_code_units(length)

View file

@ -402,6 +402,26 @@ TEST_CASE(code_unit_offset_of)
EXPECT_EQ(view.code_unit_offset_of(11), 13uz); EXPECT_EQ(view.code_unit_offset_of(11), 13uz);
} }
TEST_CASE(code_point_offset_of)
{
Utf16View view { u"😂 foo 😀 bar"sv };
EXPECT_EQ(view.code_point_offset_of(0), 0uz);
EXPECT_EQ(view.code_point_offset_of(1), 0uz);
EXPECT_EQ(view.code_point_offset_of(2), 1uz);
EXPECT_EQ(view.code_point_offset_of(3), 2uz);
EXPECT_EQ(view.code_point_offset_of(4), 3uz);
EXPECT_EQ(view.code_point_offset_of(5), 4uz);
EXPECT_EQ(view.code_point_offset_of(6), 5uz);
EXPECT_EQ(view.code_point_offset_of(7), 6uz);
EXPECT_EQ(view.code_point_offset_of(8), 6uz);
EXPECT_EQ(view.code_point_offset_of(9), 7uz);
EXPECT_EQ(view.code_point_offset_of(10), 8uz);
EXPECT_EQ(view.code_point_offset_of(11), 9uz);
EXPECT_EQ(view.code_point_offset_of(12), 10uz);
EXPECT_EQ(view.code_point_offset_of(13), 11uz);
}
TEST_CASE(replace) TEST_CASE(replace)
{ {
auto result = u""sv.replace({}, {}, ReplaceMode::FirstOnly); auto result = u""sv.replace({}, {}, ReplaceMode::FirstOnly);