LibRegex: Only search start of line if pattern begins with ^

This commit is contained in:
Gingeh 2024-09-30 12:49:15 +10:00 committed by Ali Mohammad Pur
commit de588a97c0
Notes: github-actions[bot] 2024-09-30 10:29:20 +00:00
4 changed files with 33 additions and 1 deletions

View file

@ -998,6 +998,15 @@ BENCHMARK_CASE(fork_performance)
EXPECT_EQ(result.success, true); EXPECT_EQ(result.success, true);
} }
BENCHMARK_CASE(anchor_performance)
{
Regex<ECMA262> re("^b");
for (auto i = 0; i < 100'000; i++) {
auto result = re.match(g_lots_of_a_s);
EXPECT_EQ(result.success, false);
}
}
TEST_CASE(optimizer_atomic_groups) TEST_CASE(optimizer_atomic_groups)
{ {
Array tests { Array tests {
@ -1078,6 +1087,21 @@ TEST_CASE(optimizer_alternation)
} }
} }
TEST_CASE(start_anchor)
{
// Ensure that a circumflex at the start only matches the start of the line.
{
Regex<PosixBasic> re("^abc");
EXPECT_EQ(re.match("123abcdef"sv, PosixFlags::Global).success, false);
EXPECT_EQ(re.match("abc123"sv, PosixFlags::Global).success, true);
EXPECT_EQ(re.match("123^abcdef"sv, PosixFlags::Global).success, false);
EXPECT_EQ(re.match("^abc123"sv, PosixFlags::Global).success, false);
// Multiple lines
EXPECT_EQ(re.match("123\nabc"sv, PosixFlags::Multiline).success, true);
}
}
TEST_CASE(posix_basic_dollar_is_end_anchor) TEST_CASE(posix_basic_dollar_is_end_anchor)
{ {
// Ensure that a dollar sign at the end only matches the end of the line. // Ensure that a dollar sign at the end only matches the end of the line.

View file

@ -184,6 +184,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
continue_search = false; continue_search = false;
auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch); auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch);
auto only_start_of_line = m_pattern->parser_result.optimization_data.only_start_of_line && !input.regex_options.has_flag_set(AllFlags::Multiline);
for (auto const& view : views) { for (auto const& view : views) {
if (lines_to_skip != 0) { if (lines_to_skip != 0) {
@ -294,7 +295,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
break; break;
} }
if (!continue_search) if (!continue_search || only_start_of_line)
break; break;
} }

View file

@ -36,6 +36,12 @@ void Regex<Parser>::run_optimization_passes()
// e.g. a*b -> (ATOMIC a*)b // e.g. a*b -> (ATOMIC a*)b
attempt_rewrite_loops_as_atomic_groups(blocks); attempt_rewrite_loops_as_atomic_groups(blocks);
// FIXME: "There are a few more conditions this can be true in (e.g. within an arbitrarily nested capture group)"
MatchState state;
auto& opcode = parser_result.bytecode.get_opcode(state);
if (opcode.opcode_id() == OpCodeId::CheckBegin)
parser_result.optimization_data.only_start_of_line = true;
parser_result.bytecode.flatten(); parser_result.bytecode.flatten();
} }

View file

@ -58,6 +58,7 @@ public:
struct { struct {
Optional<ByteString> pure_substring_search; Optional<ByteString> pure_substring_search;
bool only_start_of_line = false;
} optimization_data {}; } optimization_data {};
}; };