From 50733c564c3715cac570af6ee5b2907b4aa09b2a Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Mon, 23 Dec 2024 12:16:08 +0100 Subject: [PATCH] LibRegex: Use the *actually* correct repeat start offset for Repeat Fixes #2931 and various frequent crashes. --- Libraries/LibRegex/RegexOptimizer.cpp | 2 +- Tests/LibRegex/Regex.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Libraries/LibRegex/RegexOptimizer.cpp b/Libraries/LibRegex/RegexOptimizer.cpp index b3d08d935c2..e3f4d1853da 100644 --- a/Libraries/LibRegex/RegexOptimizer.cpp +++ b/Libraries/LibRegex/RegexOptimizer.cpp @@ -98,7 +98,7 @@ typename Regex::BasicBlockList Regex::split_basic_blocks(ByteCod case OpCodeId::Repeat: { // Repeat produces two blocks, one containing its repeated expr, and one after that. auto& repeat = static_cast(opcode); - auto repeat_start = state.instruction_position - repeat.offset() - repeat.size(); + auto repeat_start = state.instruction_position - repeat.offset(); if (repeat_start > end_of_last_block) block_boundaries.append({ end_of_last_block, repeat_start, "Repeat"sv }); block_boundaries.append({ repeat_start, state.instruction_position, "Repeat after"sv }); diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index d32b5e820f1..c27fde606c6 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -710,6 +710,11 @@ TEST_CASE(ECMA262_match) "?category=54&shippable=1&baby_age=p,0,1,3"sv, false }, // ladybird#968, ?+ should not loop forever. { "([^\\s]+):\\s*([^;]+);"sv, "font-family: 'Inter';"sv, true }, // optimizer bug, blindly accepting inverted char classes [^x] as atomic rewrite opportunities. { "(a)(?=a*\\1)"sv, "aaaa"sv, true, global_multiline.value() }, // Optimizer bug, ignoring references that weren't bound in the current or past block, ladybird#2281 + { "[ a](b{2})"sv, "abb"sv, true }, // Optimizer bug, wrong Repeat basic block splits. + { "^ {0,3}(([\\`\\~])\\2{2,})\\s*([\\*_]*)\\s*([^\\*_\\s]*).*$"sv, ""sv, false }, // See above. + { "^(\\d{4}|[+-]\\d{6})(?:-?(\\d{2})(?:-?(\\d{2}))?)?(?:[ T]?(\\d{2}):?(\\d{2})(?::?(\\d{2})(?:[,.](\\d{1,}))?)?(?:(Z)|([+-])(\\d{2})(?::?(\\d{2}))?)?)?$"sv, + ""sv, + false, }, // See above, also ladybird#2931. }; // clang-format on