From 67723ef83c22a682faa8a54c8a2485eaae363b3f Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 28 Jul 2025 10:42:05 -0400 Subject: [PATCH] AK: Add a method to peek ahead of a UTF-16 iterator --- AK/Utf16View.h | 19 +++++++++ Tests/AK/TestUtf16View.cpp | 79 ++++++++++++++++++++++++++++++++------ 2 files changed, 87 insertions(+), 11 deletions(-) diff --git a/AK/Utf16View.h b/AK/Utf16View.h index 6aa5581af95..3ef49181280 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -88,6 +88,25 @@ public: return static_cast(code_unit); } + constexpr Optional peek(size_t code_point_offset) const + { + if (code_point_offset == 0) { + if (remaining_code_units() == 0) + return {}; + return this->operator*(); + } + + auto it = *this; + + for (size_t index = 0; index < code_point_offset; ++index) { + ++it; + if (it.remaining_code_units() == 0) + return {}; + } + + return *it; + } + [[nodiscard]] constexpr bool operator==(Utf16CodePointIterator const& other) const { // Note that this also protects against iterators with different underlying storage. diff --git a/Tests/AK/TestUtf16View.cpp b/Tests/AK/TestUtf16View.cpp index 2ec2890ec79..b53fbdc4a5a 100644 --- a/Tests/AK/TestUtf16View.cpp +++ b/Tests/AK/TestUtf16View.cpp @@ -139,45 +139,102 @@ TEST_CASE(utf16_literal) TEST_CASE(iterate_utf16) { - auto string = Utf16String::from_utf8("Привет 😀"sv); - Utf16View view { string }; + Utf16View view { u"Привет 😀🙃"sv }; auto iterator = view.begin(); - EXPECT(*iterator == 1055); + EXPECT_EQ(*iterator, 0x041fu); EXPECT(iterator.length_in_code_units() == 1); + EXPECT_EQ(iterator.peek(0), 0x041fu); + EXPECT_EQ(iterator.peek(1), 0x0440u); + EXPECT_EQ(iterator.peek(2), 0x0438u); + EXPECT_EQ(iterator.peek(3), 0x0432u); + EXPECT_EQ(iterator.peek(4), 0x0435u); + EXPECT_EQ(iterator.peek(5), 0x0442u); + EXPECT_EQ(iterator.peek(6), 0x0020u); + EXPECT_EQ(iterator.peek(7), 0x1f600u); + EXPECT_EQ(iterator.peek(8), 0x1f643u); + EXPECT(!iterator.peek(9).has_value()); EXPECT(++iterator != view.end()); - EXPECT(*iterator == 1088); + EXPECT_EQ(*iterator, 0x0440u); EXPECT(iterator.length_in_code_units() == 1); + EXPECT_EQ(iterator.peek(0), 0x0440u); + EXPECT_EQ(iterator.peek(1), 0x0438u); + EXPECT_EQ(iterator.peek(2), 0x0432u); + EXPECT_EQ(iterator.peek(3), 0x0435u); + EXPECT_EQ(iterator.peek(4), 0x0442u); + EXPECT_EQ(iterator.peek(5), 0x0020u); + EXPECT_EQ(iterator.peek(6), 0x1f600u); + EXPECT_EQ(iterator.peek(7), 0x1f643u); + EXPECT(!iterator.peek(8).has_value()); EXPECT(++iterator != view.end()); - EXPECT(*iterator == 1080); + EXPECT_EQ(*iterator, 0x0438u); EXPECT(iterator.length_in_code_units() == 1); + EXPECT_EQ(iterator.peek(0), 0x0438u); + EXPECT_EQ(iterator.peek(1), 0x0432u); + EXPECT_EQ(iterator.peek(2), 0x0435u); + EXPECT_EQ(iterator.peek(3), 0x0442u); + EXPECT_EQ(iterator.peek(4), 0x0020u); + EXPECT_EQ(iterator.peek(5), 0x1f600u); + EXPECT_EQ(iterator.peek(6), 0x1f643u); + EXPECT(!iterator.peek(7).has_value()); EXPECT(++iterator != view.end()); - EXPECT(*iterator == 1074); + EXPECT_EQ(*iterator, 0x0432u); EXPECT(iterator.length_in_code_units() == 1); + EXPECT_EQ(iterator.peek(0), 0x0432u); + EXPECT_EQ(iterator.peek(1), 0x0435u); + EXPECT_EQ(iterator.peek(2), 0x0442u); + EXPECT_EQ(iterator.peek(3), 0x0020u); + EXPECT_EQ(iterator.peek(4), 0x1f600u); + EXPECT_EQ(iterator.peek(5), 0x1f643u); + EXPECT(!iterator.peek(6).has_value()); EXPECT(++iterator != view.end()); - EXPECT(*iterator == 1077); + EXPECT_EQ(*iterator, 0x0435u); EXPECT(iterator.length_in_code_units() == 1); + EXPECT_EQ(iterator.peek(0), 0x0435u); + EXPECT_EQ(iterator.peek(1), 0x0442u); + EXPECT_EQ(iterator.peek(2), 0x0020u); + EXPECT_EQ(iterator.peek(3), 0x1f600u); + EXPECT_EQ(iterator.peek(4), 0x1f643u); + EXPECT(!iterator.peek(5).has_value()); EXPECT(++iterator != view.end()); - EXPECT(*iterator == 1090); + EXPECT_EQ(*iterator, 0x0442u); EXPECT(iterator.length_in_code_units() == 1); + EXPECT_EQ(iterator.peek(0), 0x0442u); + EXPECT_EQ(iterator.peek(1), 0x0020u); + EXPECT_EQ(iterator.peek(2), 0x1f600u); + EXPECT_EQ(iterator.peek(3), 0x1f643u); + EXPECT(!iterator.peek(4).has_value()); EXPECT(++iterator != view.end()); - EXPECT(*iterator == 32); + EXPECT(*iterator == ' '); EXPECT(iterator.length_in_code_units() == 1); + EXPECT_EQ(iterator.peek(0), 0x0020u); + EXPECT_EQ(iterator.peek(1), 0x1f600u); + EXPECT_EQ(iterator.peek(2), 0x1f643u); + EXPECT(!iterator.peek(3).has_value()); EXPECT(++iterator != view.end()); - EXPECT(*iterator == 128512); + EXPECT_EQ(*iterator, 0x1f600u); EXPECT(iterator.length_in_code_units() == 2); + EXPECT_EQ(iterator.peek(0), 0x1f600u); + EXPECT_EQ(iterator.peek(1), 0x1f643u); + EXPECT(!iterator.peek(2).has_value()); + + EXPECT(++iterator != view.end()); + EXPECT_EQ(*iterator, 0x1f643u); + EXPECT(iterator.length_in_code_units() == 2); + EXPECT_EQ(iterator.peek(0), 0x1f643u); + EXPECT(!iterator.peek(1).has_value()); EXPECT(++iterator == view.end()); + EXPECT(!iterator.peek(0).has_value()); EXPECT_DEATH("Dereferencing Utf16CodePointIterator which is at its end.", *iterator); - EXPECT_DEATH("Incrementing Utf16CodePointIterator which is at its end.", ++iterator); }