LibRegex: Only search start of line if pattern begins with ^

This commit is contained in:
Gingeh 2024-09-30 12:49:15 +10:00 committed by Ali Mohammad Pur
commit de588a97c0
Notes: github-actions[bot] 2024-09-30 10:29:20 +00:00
4 changed files with 33 additions and 1 deletions

View file

@ -998,6 +998,15 @@ BENCHMARK_CASE(fork_performance)
EXPECT_EQ(result.success, true);
}
BENCHMARK_CASE(anchor_performance)
{
Regex<ECMA262> re("^b");
for (auto i = 0; i < 100'000; i++) {
auto result = re.match(g_lots_of_a_s);
EXPECT_EQ(result.success, false);
}
}
TEST_CASE(optimizer_atomic_groups)
{
Array tests {
@ -1078,6 +1087,21 @@ TEST_CASE(optimizer_alternation)
}
}
TEST_CASE(start_anchor)
{
// Ensure that a circumflex at the start only matches the start of the line.
{
Regex<PosixBasic> re("^abc");
EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
EXPECT_EQ(re.match("abc123"sv, PosixFlags::Global).success, true);
EXPECT_EQ(re.match("123^abcdef"sv, PosixFlags::Global).success, false);
EXPECT_EQ(re.match("^abc123"sv, PosixFlags::Global).success, false);
// Multiple lines
EXPECT_EQ(re.match("123\nabc"sv, PosixFlags::Multiline).success, true);
}
}
TEST_CASE(posix_basic_dollar_is_end_anchor)
{
// Ensure that a dollar sign at the end only matches the end of the line.

View file

@ -184,6 +184,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
continue_search = false;
auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch);
auto only_start_of_line = m_pattern->parser_result.optimization_data.only_start_of_line && !input.regex_options.has_flag_set(AllFlags::Multiline);
for (auto const& view : views) {
if (lines_to_skip != 0) {
@ -294,7 +295,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
break;
}
if (!continue_search)
if (!continue_search || only_start_of_line)
break;
}

View file

@ -36,6 +36,12 @@ void Regex<Parser>::run_optimization_passes()
// e.g. a*b -> (ATOMIC a*)b
attempt_rewrite_loops_as_atomic_groups(blocks);
// FIXME: "There are a few more conditions this can be true in (e.g. within an arbitrarily nested capture group)"
MatchState state;
auto& opcode = parser_result.bytecode.get_opcode(state);
if (opcode.opcode_id() == OpCodeId::CheckBegin)
parser_result.optimization_data.only_start_of_line = true;
parser_result.bytecode.flatten();
}

View file

@ -58,6 +58,7 @@ public:
struct {
Optional<ByteString> pure_substring_search;
bool only_start_of_line = false;
} optimization_data {};
};