mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-21 12:05:15 +00:00
LibRegex: Flatten capture group list in MatchState
This makes copying the capture group COWVector significantly cheaper, as we no longer have to run any constructors for it - just memcpy.
This commit is contained in:
parent
141f6cb392
commit
0f675d5992
14 changed files with 98 additions and 87 deletions
|
@ -127,6 +127,13 @@ public:
|
|||
return m_detail->m_members[index];
|
||||
}
|
||||
|
||||
Span<T const> span() const { return m_detail->m_members; }
|
||||
Span<T> mutable_span()
|
||||
{
|
||||
copy();
|
||||
return m_detail->m_members;
|
||||
}
|
||||
|
||||
size_t capacity() const
|
||||
{
|
||||
return m_detail->m_members.capacity();
|
||||
|
|
|
@ -35,7 +35,6 @@ public:
|
|||
static constexpr regex::RegexOptions<ECMAScriptFlags> default_flags {
|
||||
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
||||
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
||||
| (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
|
||||
| regex::ECMAScriptFlags::BrowserExtended
|
||||
};
|
||||
|
||||
|
|
|
@ -294,7 +294,7 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
// 33. For each integer i such that i ≥ 1 and i ≤ n, in ascending order, do
|
||||
for (size_t i = 1; i <= result.n_capture_groups; ++i) {
|
||||
// a. Let captureI be ith element of r's captures List.
|
||||
auto& capture = result.capture_group_matches[0][i];
|
||||
auto& capture = result.capture_group_matches[0][i - 1];
|
||||
|
||||
Value captured_value;
|
||||
|
||||
|
|
|
@ -341,40 +341,29 @@ ALWAYS_INLINE ExecutionResult OpCode_CheckEnd::execute(MatchInput const& input,
|
|||
|
||||
ALWAYS_INLINE ExecutionResult OpCode_ClearCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
||||
{
|
||||
if (input.match_index < state.capture_group_matches.size()) {
|
||||
auto& group = state.capture_group_matches.mutable_at(input.match_index);
|
||||
auto group_id = id();
|
||||
if (group_id >= group.size())
|
||||
group.resize(group_id + 1);
|
||||
|
||||
group[group_id].reset();
|
||||
if (input.match_index < state.capture_group_matches_size()) {
|
||||
auto group = state.mutable_capture_group_matches(input.match_index);
|
||||
group[id() - 1].reset();
|
||||
}
|
||||
return ExecutionResult::Continue;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE ExecutionResult OpCode_SaveLeftCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
||||
{
|
||||
if (input.match_index >= state.capture_group_matches.size()) {
|
||||
state.capture_group_matches.ensure_capacity(input.match_index);
|
||||
auto capacity = state.capture_group_matches.capacity();
|
||||
for (size_t i = state.capture_group_matches.size(); i <= capacity; ++i)
|
||||
state.capture_group_matches.empend();
|
||||
if (input.match_index >= state.capture_group_matches_size()) {
|
||||
state.flat_capture_group_matches.ensure_capacity((input.match_index + 1) * state.capture_group_count);
|
||||
for (size_t i = state.capture_group_matches_size(); i <= input.match_index; ++i)
|
||||
for (size_t j = 0; j < state.capture_group_count; ++j)
|
||||
state.flat_capture_group_matches.append({});
|
||||
}
|
||||
|
||||
if (id() >= state.capture_group_matches.at(input.match_index).size()) {
|
||||
state.capture_group_matches.mutable_at(input.match_index).ensure_capacity(id());
|
||||
auto capacity = state.capture_group_matches.at(input.match_index).capacity();
|
||||
for (size_t i = state.capture_group_matches.at(input.match_index).size(); i <= capacity; ++i)
|
||||
state.capture_group_matches.mutable_at(input.match_index).empend();
|
||||
}
|
||||
|
||||
state.capture_group_matches.mutable_at(input.match_index).at(id()).left_column = state.string_position;
|
||||
state.mutable_capture_group_matches(input.match_index).at(id() - 1).left_column = state.string_position;
|
||||
return ExecutionResult::Continue;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
||||
{
|
||||
auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id());
|
||||
auto& match = state.capture_group_matches(input.match_index).at(id() - 1);
|
||||
auto start_position = match.left_column;
|
||||
if (state.string_position < start_position) {
|
||||
dbgln("Right capture group {} is before left capture group {}!", state.string_position, start_position);
|
||||
|
@ -388,14 +377,14 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightCaptureGroup::execute(MatchInput c
|
|||
|
||||
VERIFY(start_position + length <= input.view.length());
|
||||
|
||||
match = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position };
|
||||
state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { input.view.substring_view(start_position, length), input.line, start_position, input.global_offset + start_position };
|
||||
|
||||
return ExecutionResult::Continue;
|
||||
}
|
||||
|
||||
ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchInput const& input, MatchState& state) const
|
||||
{
|
||||
auto& match = state.capture_group_matches.mutable_at(input.match_index).at(id());
|
||||
auto& match = state.capture_group_matches(input.match_index).at(id() - 1);
|
||||
auto start_position = match.left_column;
|
||||
if (state.string_position < start_position)
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
|
@ -409,7 +398,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(MatchIn
|
|||
|
||||
auto view = input.view.substring_view(start_position, length);
|
||||
|
||||
match = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position };
|
||||
state.mutable_capture_group_matches(input.match_index).at(id() - 1) = { view, name_string_table_index(), input.line, start_position, input.global_offset + start_position };
|
||||
|
||||
return ExecutionResult::Continue;
|
||||
}
|
||||
|
@ -584,11 +573,11 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
|||
break;
|
||||
}
|
||||
case CharacterCompareType::Reference: {
|
||||
auto reference_number = (size_t)m_bytecode->at(offset++);
|
||||
if (input.match_index >= state.capture_group_matches.size())
|
||||
auto reference_number = ((size_t)m_bytecode->at(offset++)) - 1;
|
||||
if (input.match_index >= state.capture_group_matches_size())
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
|
||||
auto& groups = state.capture_group_matches.at(input.match_index);
|
||||
auto groups = state.capture_group_matches(input.match_index);
|
||||
if (groups.size() <= reference_number)
|
||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||
|
||||
|
@ -988,8 +977,8 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
|
|||
auto ref = m_bytecode->at(offset++);
|
||||
result.empend(ByteString::formatted(" number={}", ref));
|
||||
if (input.has_value()) {
|
||||
if (state().capture_group_matches.size() > input->match_index) {
|
||||
auto& match = state().capture_group_matches[input->match_index];
|
||||
if (state().capture_group_matches_size() > input->match_index) {
|
||||
auto match = state().capture_group_matches(input->match_index);
|
||||
if (match.size() > ref) {
|
||||
auto& group = match[ref];
|
||||
result.empend(ByteString::formatted(" left={}", group.left_column));
|
||||
|
@ -999,7 +988,7 @@ Vector<ByteString> OpCode_Compare::variable_arguments_to_byte_string(Optional<Ma
|
|||
result.empend(ByteString::formatted(" (invalid ref, max={})", match.size() - 1));
|
||||
}
|
||||
} else {
|
||||
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches.size() - 1));
|
||||
result.empend(ByteString::formatted(" (invalid index {}, max={})", input->match_index, state().capture_group_matches_size() - 1));
|
||||
}
|
||||
}
|
||||
} else if (compare_type == CharacterCompareType::String) {
|
||||
|
|
|
@ -39,7 +39,7 @@ public:
|
|||
|
||||
void print_bytecode(ByteCode const& bytecode) const
|
||||
{
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
for (;;) {
|
||||
auto& opcode = bytecode.get_opcode(state);
|
||||
print_opcode("PrintBytecode", opcode, state);
|
||||
|
|
|
@ -43,12 +43,11 @@ enum __RegexAllFlags {
|
|||
__Regex_SingleLine = __Regex_Global << 10, // Dot matches newline characters
|
||||
__Regex_Sticky = __Regex_Global << 11, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||
__Regex_Multiline = __Regex_Global << 12, // Handle newline characters. Match each line, one by one.
|
||||
__Regex_SkipTrimEmptyMatches = __Regex_Global << 13, // Do not remove empty capture group results.
|
||||
__Regex_SingleMatch = __Regex_Global << 14, // Stop after acquiring a single match.
|
||||
__Regex_UnicodeSets = __Regex_Global << 15, // ECMA262 Parser specific: Allow set operations in char classes.
|
||||
__Regex_Internal_Stateful = __Regex_Global << 16, // Internal flag; enables stateful matches.
|
||||
__Regex_Internal_BrowserExtended = __Regex_Global << 17, // Internal flag; enable browser-specific ECMA262 extensions.
|
||||
__Regex_Internal_ConsiderNewline = __Regex_Global << 18, // Internal flag; allow matchers to consider newlines as line separators.
|
||||
__Regex_Internal_ECMA262DotSemantics = __Regex_Global << 19, // Internal flag; use ECMA262 semantics for dot ('.') - disallow CR/LF/LS/PS instead of just CR.
|
||||
__Regex_SingleMatch = __Regex_Global << 13, // Stop after acquiring a single match.
|
||||
__Regex_UnicodeSets = __Regex_Global << 14, // ECMA262 Parser specific: Allow set operations in char classes.
|
||||
__Regex_Internal_Stateful = __Regex_Global << 15, // Internal flag; enables stateful matches.
|
||||
__Regex_Internal_BrowserExtended = __Regex_Global << 16, // Internal flag; enable browser-specific ECMA262 extensions.
|
||||
__Regex_Internal_ConsiderNewline = __Regex_Global << 17, // Internal flag; allow matchers to consider newlines as line separators.
|
||||
__Regex_Internal_ECMA262DotSemantics = __Regex_Global << 18, // Internal flag; use ECMA262 semantics for dot ('.') - disallow CR/LF/LS/PS instead of just CR.
|
||||
__Regex_Last = __Regex_Internal_ECMA262DotSemantics,
|
||||
};
|
||||
|
|
|
@ -369,6 +369,7 @@ struct MatchInput {
|
|||
};
|
||||
|
||||
struct MatchState {
|
||||
size_t capture_group_count;
|
||||
size_t string_position_before_match { 0 };
|
||||
size_t string_position { 0 };
|
||||
size_t string_position_in_code_units { 0 };
|
||||
|
@ -377,10 +378,38 @@ struct MatchState {
|
|||
size_t forks_since_last_save { 0 };
|
||||
Optional<size_t> initiating_fork;
|
||||
COWVector<Match> matches;
|
||||
COWVector<Vector<Match>> capture_group_matches;
|
||||
COWVector<Match> flat_capture_group_matches; // Vector<Vector<Match>> indexed by match index, then by capture group id; flattened for performance
|
||||
COWVector<u64> repetition_marks;
|
||||
Vector<u64, 64> checkpoints;
|
||||
|
||||
explicit MatchState(size_t capture_group_count)
|
||||
: capture_group_count(capture_group_count)
|
||||
{
|
||||
}
|
||||
|
||||
MatchState(MatchState const&) = default;
|
||||
MatchState(MatchState&&) = default;
|
||||
|
||||
MatchState& operator=(MatchState const&) = default;
|
||||
MatchState& operator=(MatchState&&) = default;
|
||||
|
||||
static MatchState only_for_enumeration() { return MatchState { 0 }; }
|
||||
|
||||
size_t capture_group_matches_size() const
|
||||
{
|
||||
return flat_capture_group_matches.size() / capture_group_count;
|
||||
}
|
||||
|
||||
Span<Match const> capture_group_matches(size_t match_index) const
|
||||
{
|
||||
return flat_capture_group_matches.span().slice(match_index * capture_group_count, capture_group_count);
|
||||
}
|
||||
|
||||
Span<Match> mutable_capture_group_matches(size_t match_index)
|
||||
{
|
||||
return flat_capture_group_matches.mutable_span().slice(match_index * capture_group_count, capture_group_count);
|
||||
}
|
||||
|
||||
// For size_t in {0..100}, ips in {0..500} and repetitions in {0..30}, there are zero collisions.
|
||||
// For the full range, zero collisions were found in 8 million random samples.
|
||||
u64 u64_hash() const
|
||||
|
|
|
@ -164,7 +164,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
|||
size_t match_count { 0 };
|
||||
|
||||
MatchInput input;
|
||||
MatchState state;
|
||||
MatchState state { m_pattern->parser_result.capture_groups_count };
|
||||
size_t operations = 0;
|
||||
|
||||
input.regex_options = m_regex_options | regex_options.value_or({}).value();
|
||||
|
@ -189,20 +189,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
|||
}
|
||||
}
|
||||
|
||||
if (c_match_preallocation_count) {
|
||||
state.matches.ensure_capacity(c_match_preallocation_count);
|
||||
state.capture_group_matches.ensure_capacity(c_match_preallocation_count);
|
||||
auto& capture_groups_count = m_pattern->parser_result.capture_groups_count;
|
||||
|
||||
for (size_t j = 0; j < c_match_preallocation_count; ++j) {
|
||||
state.matches.empend();
|
||||
state.capture_group_matches.empend();
|
||||
state.capture_group_matches.mutable_at(j).ensure_capacity(capture_groups_count);
|
||||
for (size_t k = 0; k < capture_groups_count; ++k)
|
||||
state.capture_group_matches.mutable_at(j).unchecked_append({});
|
||||
}
|
||||
}
|
||||
|
||||
auto append_match = [](auto& input, auto& state, auto& start_position) {
|
||||
if (state.matches.size() == input.match_index)
|
||||
state.matches.empend();
|
||||
|
@ -343,29 +329,34 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
|||
break;
|
||||
}
|
||||
|
||||
auto flat_capture_group_matches = move(state.flat_capture_group_matches).release();
|
||||
if (flat_capture_group_matches.size() < state.capture_group_count * match_count) {
|
||||
flat_capture_group_matches.ensure_capacity(match_count * state.capture_group_count);
|
||||
for (size_t i = flat_capture_group_matches.size(); i < match_count * state.capture_group_count; ++i)
|
||||
flat_capture_group_matches.empend();
|
||||
}
|
||||
|
||||
Vector<Span<Match>> capture_group_matches;
|
||||
for (size_t i = 0; i < match_count; ++i) {
|
||||
auto span = flat_capture_group_matches.span().slice(state.capture_group_count * i, state.capture_group_count);
|
||||
capture_group_matches.append(span);
|
||||
}
|
||||
|
||||
RegexResult result {
|
||||
match_count != 0,
|
||||
match_count,
|
||||
move(state.matches).release(),
|
||||
move(state.capture_group_matches).release(),
|
||||
move(flat_capture_group_matches),
|
||||
move(capture_group_matches),
|
||||
operations,
|
||||
m_pattern->parser_result.capture_groups_count,
|
||||
m_pattern->parser_result.named_capture_groups_count,
|
||||
};
|
||||
|
||||
if (match_count) {
|
||||
// Make sure there are as many capture matches as there are actual matches.
|
||||
if (result.capture_group_matches.size() < match_count)
|
||||
result.capture_group_matches.resize(match_count);
|
||||
for (auto& matches : result.capture_group_matches)
|
||||
matches.resize(m_pattern->parser_result.capture_groups_count + 1);
|
||||
if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) {
|
||||
for (auto& matches : result.capture_group_matches)
|
||||
matches.remove_all_matching([](auto& match) { return match.view.is_null(); });
|
||||
}
|
||||
} else {
|
||||
if (match_count > 0)
|
||||
VERIFY(result.capture_group_matches.size() >= match_count);
|
||||
else
|
||||
result.capture_group_matches.clear_with_capacity();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -31,13 +31,13 @@ struct Block {
|
|||
}
|
||||
|
||||
static constexpr size_t const c_max_recursion = 5000;
|
||||
static constexpr size_t const c_match_preallocation_count = 0;
|
||||
|
||||
struct RegexResult final {
|
||||
bool success { false };
|
||||
size_t count { 0 };
|
||||
Vector<Match> matches;
|
||||
Vector<Vector<Match>> capture_group_matches;
|
||||
Vector<Match> flat_capture_group_matches;
|
||||
Vector<Span<Match>> capture_group_matches;
|
||||
size_t n_operations { 0 };
|
||||
size_t n_capture_groups { 0 };
|
||||
size_t n_named_capture_groups { 0 };
|
||||
|
|
|
@ -37,7 +37,7 @@ void Regex<Parser>::run_optimization_passes()
|
|||
attempt_rewrite_loops_as_atomic_groups(blocks);
|
||||
|
||||
// FIXME: "There are a few more conditions this can be true in (e.g. within an arbitrarily nested capture group)"
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
auto& opcode = parser_result.bytecode.get_opcode(state);
|
||||
if (opcode.opcode_id() == OpCodeId::CheckBegin)
|
||||
parser_result.optimization_data.only_start_of_line = true;
|
||||
|
@ -53,7 +53,7 @@ typename Regex<Parser>::BasicBlockList Regex<Parser>::split_basic_blocks(ByteCod
|
|||
|
||||
auto bytecode_size = bytecode.size();
|
||||
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
state.instruction_position = 0;
|
||||
auto check_jump = [&]<typename T>(OpCode const& opcode) {
|
||||
auto& op = static_cast<T const&>(opcode);
|
||||
|
@ -512,7 +512,7 @@ enum class AtomicRewritePreconditionResult {
|
|||
static AtomicRewritePreconditionResult block_satisfies_atomic_rewrite_precondition(ByteCode const& bytecode, Block repeated_block, Block following_block, auto const& all_blocks)
|
||||
{
|
||||
Vector<Vector<CompareTypeAndValuePair>> repeated_values;
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
auto has_seen_actionable_opcode = false;
|
||||
for (state.instruction_position = repeated_block.start; state.instruction_position < repeated_block.end;) {
|
||||
auto& opcode = bytecode.get_opcode(state);
|
||||
|
@ -680,7 +680,7 @@ bool Regex<Parser>::attempt_rewrite_entire_match_as_substring_search(BasicBlockL
|
|||
|
||||
// We have a single basic block, let's see if it's a series of character or string compares.
|
||||
StringBuilder final_string;
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
while (state.instruction_position < bytecode.size()) {
|
||||
auto& opcode = bytecode.get_opcode(state);
|
||||
switch (opcode.opcode_id()) {
|
||||
|
@ -796,7 +796,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||
Optional<Block> fork_fallback_block;
|
||||
if (i + 1 < basic_blocks.size())
|
||||
fork_fallback_block = basic_blocks[i + 1];
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
// Check if the last instruction in this block is a jump to the block itself:
|
||||
{
|
||||
state.instruction_position = forking_block.end;
|
||||
|
@ -913,7 +913,7 @@ void Regex<Parser>::attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&
|
|||
}
|
||||
|
||||
if (!needed_patches.is_empty()) {
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
auto bytecode_size = bytecode.size();
|
||||
state.instruction_position = 0;
|
||||
struct Patch {
|
||||
|
@ -1039,7 +1039,7 @@ void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives
|
|||
|
||||
auto has_any_backwards_jump = false;
|
||||
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
|
||||
for (size_t i = 0; i < alternatives.size(); ++i) {
|
||||
auto& alternative = alternatives[i];
|
||||
|
@ -1144,7 +1144,7 @@ void Optimizer::append_alternation(ByteCode& target, Span<ByteCode> alternatives
|
|||
node.metadata_value().size(),
|
||||
node.metadata_value().size() == 1 ? "" : "s");
|
||||
|
||||
MatchState state;
|
||||
auto state = MatchState::only_for_enumeration();
|
||||
state.instruction_position = node.metadata_value().first().instruction_position;
|
||||
auto& opcode = alternatives[node.metadata_value().first().alternative_index].get_opcode(state);
|
||||
insn = ByteString::formatted("{} {}", opcode.to_byte_string(), opcode.arguments_string());
|
||||
|
|
|
@ -28,7 +28,6 @@ enum class AllFlags {
|
|||
SingleLine = __Regex_SingleLine, // Dot matches newline characters
|
||||
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
||||
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
||||
SingleMatch = __Regex_SingleMatch, // Stop after acquiring a single match.
|
||||
UnicodeSets = __Regex_UnicodeSets, // Only for ECMA262, Allow set operations in character classes.
|
||||
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
||||
|
@ -49,7 +48,6 @@ enum class PosixFlags : FlagsUnderlyingType {
|
|||
MatchNotBeginOfLine = (FlagsUnderlyingType)AllFlags::MatchNotBeginOfLine,
|
||||
MatchNotEndOfLine = (FlagsUnderlyingType)AllFlags::MatchNotEndOfLine,
|
||||
SkipSubExprResults = (FlagsUnderlyingType)AllFlags::SkipSubExprResults,
|
||||
SkipTrimEmptyMatches = (FlagsUnderlyingType)AllFlags::SkipTrimEmptyMatches,
|
||||
Multiline = (FlagsUnderlyingType)AllFlags::Multiline,
|
||||
SingleMatch = (FlagsUnderlyingType)AllFlags::SingleMatch,
|
||||
};
|
||||
|
|
|
@ -857,7 +857,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
|
|||
|
||||
auto current_capture_group = m_parser_state.capture_groups_count;
|
||||
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
|
||||
bytecode.insert_bytecode_group_capture_left(current_capture_group);
|
||||
bytecode.insert_bytecode_group_capture_left(current_capture_group + 1);
|
||||
m_parser_state.capture_groups_count++;
|
||||
}
|
||||
|
||||
|
@ -888,9 +888,9 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si
|
|||
|
||||
if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) {
|
||||
if (capture_group_name.has_value())
|
||||
bytecode.insert_bytecode_group_capture_right(current_capture_group, capture_group_name.value());
|
||||
bytecode.insert_bytecode_group_capture_right(current_capture_group + 1, capture_group_name.value());
|
||||
else
|
||||
bytecode.insert_bytecode_group_capture_right(current_capture_group);
|
||||
bytecode.insert_bytecode_group_capture_right(current_capture_group + 1);
|
||||
}
|
||||
should_parse_repetition_symbol = true;
|
||||
break;
|
||||
|
|
|
@ -228,7 +228,6 @@ PatternErrorOr<Component> Component::compile(Utf8View const& input, PatternParse
|
|||
auto flags = regex::RegexOptions<ECMAScriptFlags> {
|
||||
(regex::ECMAScriptFlags)regex::AllFlags::SingleMatch
|
||||
| (regex::ECMAScriptFlags)regex::AllFlags::Global
|
||||
| (regex::ECMAScriptFlags)regex::AllFlags::SkipTrimEmptyMatches
|
||||
| regex::ECMAScriptFlags::BrowserExtended
|
||||
};
|
||||
|
||||
|
@ -288,7 +287,7 @@ Component::Result Component::create_match_result(String const& input, regex::Reg
|
|||
// 4. Let index be 1.
|
||||
// 5. While index is less than Get(execResult, "length"):
|
||||
for (size_t index = 1; index <= exec_result.n_capture_groups; ++index) {
|
||||
auto const& capture = exec_result.capture_group_matches[0][index];
|
||||
auto const& capture = exec_result.capture_group_matches[0][index - 1];
|
||||
|
||||
// 1. Let name be component’s group name list[index − 1].
|
||||
auto name = group_name_list[index - 1];
|
||||
|
|
|
@ -373,7 +373,7 @@ TEST_CASE(ini_file_entries)
|
|||
}
|
||||
|
||||
EXPECT_EQ(result.matches.at(0).view, "[Window]");
|
||||
EXPECT_EQ(result.capture_group_matches.at(0).at(0).view, "Window");
|
||||
EXPECT_EQ(result.capture_group_matches.at(0).at(1).view, "Window");
|
||||
EXPECT_EQ(result.matches.at(1).view, "Opacity=255");
|
||||
EXPECT_EQ(result.matches.at(1).line, 1u);
|
||||
EXPECT_EQ(result.matches.at(1).column, 0u);
|
||||
|
|
Loading…
Add table
Reference in a new issue