LibRegex: Don't repeat the same fork again

If some state has already been tried, skip over it as it would never
lead to a match regardless.
This fixes performance/memory issues in cases like
/(a+)+b/.exec("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")
or
/(a|a?)+b/...

Fixes #2622.
This commit is contained in:
Ali Mohammad Pur 2025-01-15 15:22:01 +01:00 committed by Andreas Kling
commit cce000d57c
Notes: github-actions[bot] 2025-01-17 09:35:15 +00:00
3 changed files with 66 additions and 9 deletions

View file

@ -1004,9 +1004,21 @@ static auto g_lots_of_a_s = ByteString::repeated('a', 10'000'000);
BENCHMARK_CASE(fork_performance)
{
Regex<ECMA262> re("(?:aa)*");
auto result = re.match(g_lots_of_a_s);
EXPECT_EQ(result.success, true);
{
Regex<ECMA262> re("(?:aa)*");
auto result = re.match(g_lots_of_a_s);
EXPECT_EQ(result.success, true);
}
{
Regex<ECMA262> re("(a+)+b");
auto result = re.match(g_lots_of_a_s.substring_view(0, 100));
EXPECT_EQ(result.success, false);
}
{
Regex<ECMA262> re("^(a|a?)+$");
auto result = re.match(ByteString::formatted("{}b", g_lots_of_a_s.substring_view(0, 100)));
EXPECT_EQ(result.success, false);
}
}
BENCHMARK_CASE(anchor_performance)