From cc1f0c3af2c2a237a317202ee844b97783de61ce Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Tue, 8 Oct 2024 19:22:33 +0200 Subject: [PATCH] LibRegex: Restore checkpoints when restoring the state post-fork Fixes the lockup/OOM in #968. --- Tests/LibRegex/Regex.cpp | 3 +++ Userland/Libraries/LibRegex/RegexByteCode.cpp | 10 +++++----- Userland/Libraries/LibRegex/RegexMatch.h | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 7ad236110b4..24222821a6f 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -705,6 +705,8 @@ TEST_CASE(ECMA262_match) { "a$"sv, "a\r\n"sv, true, global_multiline.value() }, // $ should accept all LineTerminators in ECMA262 mode with Multiline. { "^a"sv, "\ra"sv, true, global_multiline.value() }, { "^(.*?):[ \\t]*([^\\r\\n]*)$"sv, "content-length: 488\r\ncontent-type: application/json; charset=utf-8\r\n"sv, true, global_multiline.value() }, + { "^\\?((&?category=[0-9]+)?(&?shippable=1)?(&?ad_type=demand)?(&?page=[0-9]+)?(&?locations=(r|d)_[0-9]+)?)+$"sv, + "?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever. }; // clang-format on @@ -981,6 +983,7 @@ TEST_CASE(theoretically_infinite_loop) "(a*?)*"sv, // Infinitely matching empty substrings, the outer loop should short-circuit. "(a*)*?"sv, // Should match exactly nothing. "(?:)*?"sv, // Should not generate an infinite fork loop. + "(a?)+$"sv, // Infinitely matching empty strings, but with '+' instead of '*'. }; for (auto& pattern : patterns) { Regex re(pattern); diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index 36d55d502a0..f801a1e71ef 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -1072,20 +1072,20 @@ ALWAYS_INLINE ExecutionResult OpCode_ResetRepeat::execute(MatchInput const&, Mat return ExecutionResult::Continue; } -ALWAYS_INLINE ExecutionResult OpCode_Checkpoint::execute(MatchInput const& input, MatchState& state) const +ALWAYS_INLINE ExecutionResult OpCode_Checkpoint::execute(MatchInput const&, MatchState& state) const { auto id = this->id(); - if (id >= input.checkpoints.size()) - input.checkpoints.resize(id + 1); + if (id >= state.checkpoints.size()) + state.checkpoints.resize(id + 1); - input.checkpoints[id] = state.string_position + 1; + state.checkpoints[id] = state.string_position + 1; return ExecutionResult::Continue; } ALWAYS_INLINE ExecutionResult OpCode_JumpNonEmpty::execute(MatchInput const& input, MatchState& state) const { u64 current_position = state.string_position; - auto checkpoint_position = input.checkpoints[checkpoint()]; + auto checkpoint_position = state.checkpoints[checkpoint()]; if (checkpoint_position != 0 && checkpoint_position != current_position + 1) { auto form = this->form(); diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index 46f7ef20de7..bdeb3baaaef 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -547,7 +547,6 @@ struct MatchInput { mutable Vector saved_positions; mutable Vector saved_code_unit_positions; mutable Vector saved_forks_since_last_save; - mutable Vector checkpoints; mutable Optional fork_to_replace; }; @@ -562,6 +561,7 @@ struct MatchState { COWVector matches; COWVector> capture_group_matches; COWVector repetition_marks; + Vector checkpoints; }; }