LibRegex: Ensure nested capture groups have non-conflicting names

Take record of the named capture group prior to parsing the group's
body. This requires removal of the recorded minimum length of the named
capture group directly, and now needs to be looked up via the group
minimu lengths table.
This commit is contained in:
Marc Jessome 2024-11-23 18:38:57 -05:00 committed by Ali Mohammad Pur
parent e37c9eaeff
commit efcaf991e6
Notes: github-actions[bot] 2024-11-24 09:27:05 +00:00
3 changed files with 12 additions and 10 deletions

View file

@ -1627,9 +1627,14 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
match_length_minimum += maybe_capture_group->minimum_length;
auto maybe_length = m_parser_state.capture_group_minimum_lengths.get(maybe_capture_group.value());
if (!maybe_length.has_value()) {
set_error(Error::InvalidNameForCaptureGroup);
return false;
}
match_length_minimum += maybe_length.value();
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group->group_index } });
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group.value() } });
return true;
}
@ -2674,6 +2679,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
return false;
}
m_parser_state.named_capture_groups.set(name, group_index);
ByteCode capture_group_bytecode;
size_t length = 0;
enter_capture_group_scope();
@ -2693,7 +2700,6 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
match_length_minimum += length;
m_parser_state.capture_group_minimum_lengths.set(group_index, length);
m_parser_state.named_capture_groups.set(name, { group_index, length });
return true;
}

View file

@ -98,11 +98,6 @@ protected:
size_t tell() const { return m_parser_state.current_token.position(); }
struct NamedCaptureGroup {
size_t group_index { 0 };
size_t minimum_length { 0 };
};
struct ParserState {
Lexer& lexer;
Token current_token;
@ -114,8 +109,8 @@ protected:
size_t match_length_minimum { 0 };
size_t repetition_mark_count { 0 };
AllOptions regex_options;
HashMap<int, size_t> capture_group_minimum_lengths;
HashMap<DeprecatedFlyString, NamedCaptureGroup> named_capture_groups;
HashMap<size_t, size_t> capture_group_minimum_lengths;
HashMap<DeprecatedFlyString, size_t> named_capture_groups;
explicit ParserState(Lexer& lexer)
: lexer(lexer)

View file

@ -597,6 +597,7 @@ TEST_CASE(ECMA262_parse)
{ "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent },
{ "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture },
{ "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture },
{ "(?<a>(?<a>a))"sv, regex::Error::DuplicateNamedCapture },
{ "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
{ "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
{ "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup },