mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-09 01:29:17 +00:00
LibUnicode: Consistently reject out-of-bounds segmenter indices
In the UTF-8 implementation, this prevents out-of-bounds access of the underlying text data, as the ICU macro would essentially do something akin to `text[text.length()]`. The UTF-16 implementation already checks for out-of-bounds, but would previously return 0. We now return an empty Optional in both impls. This doesn't affect LibJS (the user of the UTF-16 impl), as it already does bounds checking before invoking LibUnicode APIs.
This commit is contained in:
parent
0c09a099a5
commit
e6b7c8cde2
Notes:
github-actions[bot]
2025-01-16 22:23:49 +00:00
Author: https://github.com/trflynn89
Commit: e6b7c8cde2
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3278
Reviewed-by: https://github.com/gmta ✅
2 changed files with 49 additions and 10 deletions
|
@ -9,6 +9,7 @@
|
|||
#include <AK/Array.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Utf16View.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibUnicode/Segmenter.h>
|
||||
|
||||
|
@ -126,3 +127,31 @@ TEST_CASE(word_segmentation)
|
|||
"The quick (“brown”) fox can’t jump 32.3 feet, right?"sv,
|
||||
{ 0u, 3u, 4u, 9u, 10u, 11u, 14u, 19u, 22u, 23u, 24u, 27u, 28u, 35u, 36u, 40u, 41u, 45u, 46u, 50u, 51u, 52u, 57u, 58u });
|
||||
}
|
||||
|
||||
TEST_CASE(out_of_bounds)
|
||||
{
|
||||
{
|
||||
auto text = "foo"_string;
|
||||
|
||||
auto segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
|
||||
segmenter->set_segmented_text(text);
|
||||
|
||||
auto result = segmenter->previous_boundary(text.byte_count());
|
||||
EXPECT(!result.has_value());
|
||||
|
||||
result = segmenter->next_boundary(text.byte_count());
|
||||
EXPECT(!result.has_value());
|
||||
}
|
||||
{
|
||||
auto text = MUST(AK::utf8_to_utf16("foo"sv));
|
||||
|
||||
auto segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
|
||||
segmenter->set_segmented_text(Utf16View { text });
|
||||
|
||||
auto result = segmenter->previous_boundary(text.size());
|
||||
EXPECT(!result.has_value());
|
||||
|
||||
result = segmenter->next_boundary(text.size());
|
||||
EXPECT(!result.has_value());
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue