LibRegex: Flatten capture group list in MatchState

This makes copying the capture group COWVector significantly cheaper,
as we no longer have to run any constructors for it - just memcpy.
This commit is contained in:
Ali Mohammad Pur 2025-04-15 15:31:08 +02:00 committed by Andreas Kling
parent bbef0e8375
commit 76f5dce3db
Notes: github-actions[bot] 2025-04-18 15:10:37 +00:00
14 changed files with 98 additions and 87 deletions

View file

@ -164,7 +164,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
size_t match_count { 0 };
MatchInput input;
MatchState state;
MatchState state { m_pattern->parser_result.capture_groups_count };
size_t operations = 0;
input.regex_options = m_regex_options | regex_options.value_or({}).value();
@ -189,20 +189,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
}
}
if (c_match_preallocation_count) {
state.matches.ensure_capacity(c_match_preallocation_count);
state.capture_group_matches.ensure_capacity(c_match_preallocation_count);
auto& capture_groups_count = m_pattern->parser_result.capture_groups_count;
for (size_t j = 0; j < c_match_preallocation_count; ++j) {
state.matches.empend();
state.capture_group_matches.empend();
state.capture_group_matches.mutable_at(j).ensure_capacity(capture_groups_count);
for (size_t k = 0; k < capture_groups_count; ++k)
state.capture_group_matches.mutable_at(j).unchecked_append({});
}
}
auto append_match = [](auto& input, auto& state, auto& start_position) {
if (state.matches.size() == input.match_index)
state.matches.empend();
@ -343,29 +329,34 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
break;
}
auto flat_capture_group_matches = move(state.flat_capture_group_matches).release();
if (flat_capture_group_matches.size() < state.capture_group_count * match_count) {
flat_capture_group_matches.ensure_capacity(match_count * state.capture_group_count);
for (size_t i = flat_capture_group_matches.size(); i < match_count * state.capture_group_count; ++i)
flat_capture_group_matches.empend();
}
Vector<Span<Match>> capture_group_matches;
for (size_t i = 0; i < match_count; ++i) {
auto span = flat_capture_group_matches.span().slice(state.capture_group_count * i, state.capture_group_count);
capture_group_matches.append(span);
}
RegexResult result {
match_count != 0,
match_count,
move(state.matches).release(),
move(state.capture_group_matches).release(),
move(flat_capture_group_matches),
move(capture_group_matches),
operations,
m_pattern->parser_result.capture_groups_count,
m_pattern->parser_result.named_capture_groups_count,
};
if (match_count) {
// Make sure there are as many capture matches as there are actual matches.
if (result.capture_group_matches.size() < match_count)
result.capture_group_matches.resize(match_count);
for (auto& matches : result.capture_group_matches)
matches.resize(m_pattern->parser_result.capture_groups_count + 1);
if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) {
for (auto& matches : result.capture_group_matches)
matches.remove_all_matching([](auto& match) { return match.view.is_null(); });
}
} else {
if (match_count > 0)
VERIFY(result.capture_group_matches.size() >= match_count);
else
result.capture_group_matches.clear_with_capacity();
}
return result;
}