diff --git a/Libraries/LibUnicode/Segmenter.cpp b/Libraries/LibUnicode/Segmenter.cpp index ec73eb1e7b8..db66f9e5516 100644 --- a/Libraries/LibUnicode/Segmenter.cpp +++ b/Libraries/LibUnicode/Segmenter.cpp @@ -87,13 +87,15 @@ public: virtual Optional previous_boundary(size_t boundary, Inclusive inclusive) override { auto icu_boundary = align_boundary(boundary); + if (!icu_boundary.has_value()) + return {}; if (inclusive == Inclusive::Yes) { - if (static_cast(m_segmenter->isBoundary(icu_boundary))) - return static_cast(icu_boundary); + if (static_cast(m_segmenter->isBoundary(*icu_boundary))) + return static_cast(*icu_boundary); } - if (auto index = m_segmenter->preceding(icu_boundary); index != icu::BreakIterator::DONE) + if (auto index = m_segmenter->preceding(*icu_boundary); index != icu::BreakIterator::DONE) return static_cast(index); return {}; @@ -102,13 +104,15 @@ public: virtual Optional next_boundary(size_t boundary, Inclusive inclusive) override { auto icu_boundary = align_boundary(boundary); + if (!icu_boundary.has_value()) + return {}; if (inclusive == Inclusive::Yes) { - if (static_cast(m_segmenter->isBoundary(icu_boundary))) - return static_cast(icu_boundary); + if (static_cast(m_segmenter->isBoundary(*icu_boundary))) + return static_cast(*icu_boundary); } - if (auto index = m_segmenter->following(icu_boundary); index != icu::BreakIterator::DONE) + if (auto index = m_segmenter->following(*icu_boundary); index != icu::BreakIterator::DONE) return static_cast(index); return {}; @@ -173,19 +177,25 @@ public: } private: - i32 align_boundary(size_t boundary) + Optional align_boundary(size_t boundary) { auto icu_boundary = static_cast(boundary); return m_segmented_text.visit( - [&](String const& text) { + [&](String const& text) -> Optional { + if (boundary >= text.byte_count()) + return {}; + U8_SET_CP_START(text.bytes().data(), 0, icu_boundary); return icu_boundary; }, - [&](icu::UnicodeString const& text) { + [&](icu::UnicodeString const& text) -> Optional { + if (icu_boundary >= text.length()) + return {}; + return text.getChar32Start(icu_boundary); }, - [](Empty) -> i32 { VERIFY_NOT_REACHED(); }); + [](Empty) -> Optional { VERIFY_NOT_REACHED(); }); } void for_each_boundary(SegmentationCallback callback) diff --git a/Tests/LibUnicode/TestSegmenter.cpp b/Tests/LibUnicode/TestSegmenter.cpp index 403c428f66b..d40ea82cebf 100644 --- a/Tests/LibUnicode/TestSegmenter.cpp +++ b/Tests/LibUnicode/TestSegmenter.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -126,3 +127,31 @@ TEST_CASE(word_segmentation) "The quick (“brown”) fox can’t jump 32.3 feet, right?"sv, { 0u, 3u, 4u, 9u, 10u, 11u, 14u, 19u, 22u, 23u, 24u, 27u, 28u, 35u, 36u, 40u, 41u, 45u, 46u, 50u, 51u, 52u, 57u, 58u }); } + +TEST_CASE(out_of_bounds) +{ + { + auto text = "foo"_string; + + auto segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word); + segmenter->set_segmented_text(text); + + auto result = segmenter->previous_boundary(text.byte_count()); + EXPECT(!result.has_value()); + + result = segmenter->next_boundary(text.byte_count()); + EXPECT(!result.has_value()); + } + { + auto text = MUST(AK::utf8_to_utf16("foo"sv)); + + auto segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word); + segmenter->set_segmented_text(Utf16View { text }); + + auto result = segmenter->previous_boundary(text.size()); + EXPECT(!result.has_value()); + + result = segmenter->next_boundary(text.size()); + EXPECT(!result.has_value()); + } +}