LibRegex: Use code unit length in more places that apply
Some checks are pending
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run
CI / macOS, arm64, Sanitizer, Clang (push) Waiting to run
CI / Linux, x86_64, Fuzzers, Clang (push) Waiting to run
CI / Linux, x86_64, Sanitizer, GNU (push) Waiting to run
CI / Linux, x86_64, Sanitizer, Clang (push) Waiting to run
Package the js repl as a binary artifact / Linux, arm64 (push) Waiting to run
Package the js repl as a binary artifact / macOS, arm64 (push) Waiting to run
Package the js repl as a binary artifact / Linux, x86_64 (push) Waiting to run

Finishes what 7f6b70fafb started.
Having one part use length and another code unit length lead to crashes,
the added test ensures we don't mess that up again.
This commit is contained in:
Ali Mohammad Pur 2025-07-24 20:24:55 +02:00 committed by Jelle Raaijmakers
commit c7ad6cd508
Notes: github-actions[bot] 2025-07-24 21:10:34 +00:00
2 changed files with 5 additions and 4 deletions

View file

@ -387,7 +387,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
if (start_position < match.column)
return ExecutionResult::Continue;
VERIFY(start_position + length <= input.view.length());
VERIFY(start_position + length <= input.view.length_in_code_units());
auto captured_text = input.view.substring_view(start_position, length);
@ -420,7 +420,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn
if (start_position < match.column)
return ExecutionResult::Continue;
VERIFY(start_position + length <= input.view.length());
VERIFY(start_position + length <= input.view.length_in_code_units());
auto view = input.view.substring_view(start_position, length);
@ -551,7 +551,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
break;
}
case CharacterCompareType::CharClass: {
if (input.view.length() <= state.string_position_in_code_units)
if (input.view.length_in_code_units() <= state.string_position_in_code_units)
return ExecutionResult::Failed_ExecuteLowPrioForks;
auto character_class = (CharClass)m_bytecode->at(offset++);

View file

@ -828,7 +828,8 @@ TEST_CASE(ECMA262_unicode_match)
"\\ud83c[\\udffb-\\udfff](?=\\ud83c[\\udffb-\\udfff])|(?:[^\\ud800-\\udfff][\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]?|[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff]|[\\ud800-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?(?:\\u200d(?:[^\\ud800-\\udfff]|(?:\\ud83c[\\udde6-\\uddff]){2}|[\\ud800-\\udbff][\\udc00-\\udfff])[\\ufe0e\\ufe0f]?(?:[\\u0300-\\u036f\\ufe20-\\ufe2f\\u20d0-\\u20ff]|\\ud83c[\\udffb-\\udfff])?)*"sv,
"😀"sv,
true,
}
},
{ "(?<before>\\w*)\\s*(?<emoji>\\p{Emoji}+)\\s*(?<after>\\w*)"sv, "Hey 🎉 there! I love 🍕 pizza"sv, true, ECMAScriptFlags::Unicode },
};
for (auto& test : tests) {