mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-02 15:46:33 +00:00
LibRegex: Fix capture groups in quantified alternations
Some checks are pending
CI / macOS, arm64, Sanitizer, Clang (push) Waiting to run
CI / Linux, x86_64, Fuzzers, Clang (push) Waiting to run
CI / Linux, x86_64, Sanitizer, GNU (push) Waiting to run
CI / Linux, x86_64, Sanitizer, Clang (push) Waiting to run
Package the js repl as a binary artifact / Linux, arm64 (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Package the js repl as a binary artifact / macOS, arm64 (push) Waiting to run
Package the js repl as a binary artifact / Linux, x86_64 (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run
Some checks are pending
CI / macOS, arm64, Sanitizer, Clang (push) Waiting to run
CI / Linux, x86_64, Fuzzers, Clang (push) Waiting to run
CI / Linux, x86_64, Sanitizer, GNU (push) Waiting to run
CI / Linux, x86_64, Sanitizer, Clang (push) Waiting to run
Package the js repl as a binary artifact / Linux, arm64 (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Package the js repl as a binary artifact / macOS, arm64 (push) Waiting to run
Package the js repl as a binary artifact / Linux, x86_64 (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run
This prevents empty matches from overwriting non-empty captures in quantified alternations. Fixes patterns like (a|a?)+ where the optional branch would incorrectly overwrite meaningful captures with empty strings.
This commit is contained in:
parent
173bb67004
commit
e2f8f5a350
Notes:
github-actions[bot]
2025-07-24 11:20:26 +00:00
Author: https://github.com/aplefull
Commit: e2f8f5a350
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5571
Reviewed-by: https://github.com/alimpfard
2 changed files with 48 additions and 1 deletions
|
@ -389,7 +389,21 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
|
|||
|
||||
VERIFY(start_position + length <= input.view.length());
|
||||
|
||||
state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position };
|
||||
auto captured_text = input.view.substring_view(start_position, length);
|
||||
|
||||
// NOTE: Don't overwrite existing capture with empty match at the same position. The ECMA-262 RepeatMatcher
|
||||
// continuation chain effectively preserves captures when an empty match occurs at the position where the
|
||||
// existing capture ended.
|
||||
// See: https://tc39.es/ecma262/#step-repeatmatcher-done
|
||||
auto& existing_capture = state.mutable_capture_group_matches(input.match_index).at(id() - 1);
|
||||
if (length == 0 && !existing_capture.view.is_null() && existing_capture.view.length() > 0) {
|
||||
auto existing_end_position = existing_capture.global_offset - input.global_offset + existing_capture.view.length();
|
||||
if (existing_end_position == state.string_position) {
|
||||
return ExecutionResult::Continue;
|
||||
}
|
||||
}
|
||||
|
||||
state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { captured_text, input.line, start_position, input.global_offset + start_position };
|
||||
|
||||
return ExecutionResult::Continue;
|
||||
}
|
||||
|
@ -410,6 +424,16 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn
|
|||
|
||||
auto view = input.view.substring_view(start_position, length);
|
||||
|
||||
// Same logic as in SaveRightCaptureGroup above.
|
||||
// https://tc39.es/ecma262/#step-repeatmatcher-done
|
||||
auto& existing_capture = state.mutable_capture_group_matches(input.match_index).at(id() - 1);
|
||||
if (length == 0 && !existing_capture.view.is_null() && existing_capture.view.length() > 0) {
|
||||
auto existing_end_position = existing_capture.global_offset - input.global_offset + existing_capture.view.length();
|
||||
if (existing_end_position == state.string_position) {
|
||||
return ExecutionResult::Continue;
|
||||
}
|
||||
}
|
||||
|
||||
state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position };
|
||||
|
||||
return ExecutionResult::Continue;
|
||||
|
|
|
@ -1330,6 +1330,29 @@ TEST_CASE(optimizer_repeat_offset)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE(quantified_alternation_capture_groups)
|
||||
{
|
||||
{
|
||||
// Ensure that (a|a?)+ captures the last meaningful match, not empty string
|
||||
Regex<ECMA262> re("^(a|a?)+$");
|
||||
auto result = re.match("a"sv);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.matches.size(), 1u);
|
||||
EXPECT_EQ(result.matches.first().view.to_byte_string(), "a"sv);
|
||||
EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "a"sv);
|
||||
}
|
||||
{
|
||||
Regex<ECMA262> re("^(a|a?)+$");
|
||||
auto result = re.match("aa"sv);
|
||||
|
||||
EXPECT_EQ(result.success, true);
|
||||
EXPECT_EQ(result.matches.size(), 1u);
|
||||
EXPECT_EQ(result.matches.first().view.to_byte_string(), "aa"sv);
|
||||
EXPECT_EQ(result.capture_group_matches.first()[0].view.to_byte_string(), "a"sv);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(zero_width_backreference)
|
||||
{
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue