AK+LibJS: Enforce limits in Utf16View offset computations

RegExp was the only caller relying on being able to provide an offset
larger than the string length. So let's do a pre-check in RegExp and
then enforce that the offsets we receive in Utf16View are valid.
This commit is contained in:
Timothy Flynn 2025-07-22 09:46:15 -04:00 committed by Jelle Raaijmakers
commit 42b41431eb
Notes: github-actions[bot] 2025-07-22 15:18:49 +00:00
2 changed files with 8 additions and 1 deletions

View file

@ -185,6 +185,8 @@ bool Utf16View::validate(size_t& valid_code_units, AllowLonelySurrogates allow_l
size_t Utf16View::code_unit_offset_of(size_t code_point_offset) const size_t Utf16View::code_unit_offset_of(size_t code_point_offset) const
{ {
VERIFY(code_point_offset <= length_in_code_points());
if (length_in_code_points() == length_in_code_units()) // Fast path: all code points are one code unit. if (length_in_code_points() == length_in_code_units()) // Fast path: all code points are one code unit.
return code_point_offset; return code_point_offset;
@ -203,6 +205,8 @@ size_t Utf16View::code_unit_offset_of(size_t code_point_offset) const
size_t Utf16View::code_point_offset_of(size_t code_unit_offset) const size_t Utf16View::code_point_offset_of(size_t code_unit_offset) const
{ {
VERIFY(code_unit_offset <= length_in_code_units());
if (length_in_code_points() == length_in_code_units()) // Fast path: all code points are one code unit. if (length_in_code_points() == length_in_code_units()) // Fast path: all code points are one code unit.
return code_unit_offset; return code_unit_offset;

View file

@ -221,7 +221,10 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
// ii. Set matchSucceeded to true. // ii. Set matchSucceeded to true.
// 13.b and 13.c // 13.b and 13.c
regex.start_offset = full_unicode ? string->utf16_string_view().code_point_offset_of(last_index) : last_index; regex.start_offset = full_unicode && last_index <= string->length_in_utf16_code_units()
? string->utf16_string_view().code_point_offset_of(last_index)
: last_index;
result = regex.match(string->utf16_string_view()); result = regex.match(string->utf16_string_view());
// 13.d and 13.a // 13.d and 13.a