mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 11:49:44 +00:00
LibUnicode: Consistently reject out-of-bounds segmenter indices
In the UTF-8 implementation, this prevents out-of-bounds access of the underlying text data, as the ICU macro would essentially do something akin to `text[text.length()]`. The UTF-16 implementation already checks for out-of-bounds, but would previously return 0. We now return an empty Optional in both impls. This doesn't affect LibJS (the user of the UTF-16 impl), as it already does bounds checking before invoking LibUnicode APIs.
This commit is contained in:
parent
0c09a099a5
commit
e6b7c8cde2
Notes:
github-actions[bot]
2025-01-16 22:23:49 +00:00
Author: https://github.com/trflynn89
Commit: e6b7c8cde2
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3278
Reviewed-by: https://github.com/gmta ✅
2 changed files with 49 additions and 10 deletions
|
@ -87,13 +87,15 @@ public:
|
|||
virtual Optional<size_t> previous_boundary(size_t boundary, Inclusive inclusive) override
|
||||
{
|
||||
auto icu_boundary = align_boundary(boundary);
|
||||
if (!icu_boundary.has_value())
|
||||
return {};
|
||||
|
||||
if (inclusive == Inclusive::Yes) {
|
||||
if (static_cast<bool>(m_segmenter->isBoundary(icu_boundary)))
|
||||
return static_cast<size_t>(icu_boundary);
|
||||
if (static_cast<bool>(m_segmenter->isBoundary(*icu_boundary)))
|
||||
return static_cast<size_t>(*icu_boundary);
|
||||
}
|
||||
|
||||
if (auto index = m_segmenter->preceding(icu_boundary); index != icu::BreakIterator::DONE)
|
||||
if (auto index = m_segmenter->preceding(*icu_boundary); index != icu::BreakIterator::DONE)
|
||||
return static_cast<size_t>(index);
|
||||
|
||||
return {};
|
||||
|
@ -102,13 +104,15 @@ public:
|
|||
virtual Optional<size_t> next_boundary(size_t boundary, Inclusive inclusive) override
|
||||
{
|
||||
auto icu_boundary = align_boundary(boundary);
|
||||
if (!icu_boundary.has_value())
|
||||
return {};
|
||||
|
||||
if (inclusive == Inclusive::Yes) {
|
||||
if (static_cast<bool>(m_segmenter->isBoundary(icu_boundary)))
|
||||
return static_cast<size_t>(icu_boundary);
|
||||
if (static_cast<bool>(m_segmenter->isBoundary(*icu_boundary)))
|
||||
return static_cast<size_t>(*icu_boundary);
|
||||
}
|
||||
|
||||
if (auto index = m_segmenter->following(icu_boundary); index != icu::BreakIterator::DONE)
|
||||
if (auto index = m_segmenter->following(*icu_boundary); index != icu::BreakIterator::DONE)
|
||||
return static_cast<size_t>(index);
|
||||
|
||||
return {};
|
||||
|
@ -173,19 +177,25 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
i32 align_boundary(size_t boundary)
|
||||
Optional<i32> align_boundary(size_t boundary)
|
||||
{
|
||||
auto icu_boundary = static_cast<i32>(boundary);
|
||||
|
||||
return m_segmented_text.visit(
|
||||
[&](String const& text) {
|
||||
[&](String const& text) -> Optional<i32> {
|
||||
if (boundary >= text.byte_count())
|
||||
return {};
|
||||
|
||||
U8_SET_CP_START(text.bytes().data(), 0, icu_boundary);
|
||||
return icu_boundary;
|
||||
},
|
||||
[&](icu::UnicodeString const& text) {
|
||||
[&](icu::UnicodeString const& text) -> Optional<i32> {
|
||||
if (icu_boundary >= text.length())
|
||||
return {};
|
||||
|
||||
return text.getChar32Start(icu_boundary);
|
||||
},
|
||||
[](Empty) -> i32 { VERIFY_NOT_REACHED(); });
|
||||
[](Empty) -> Optional<i32> { VERIFY_NOT_REACHED(); });
|
||||
}
|
||||
|
||||
void for_each_boundary(SegmentationCallback callback)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue