mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-22 12:35:14 +00:00
LibRegex: Ensure nested capture groups have non-conflicting names
Take record of the named capture group prior to parsing the group's body. This requires removal of the recorded minimum length of the named capture group directly, and now needs to be looked up via the group minimu lengths table.
This commit is contained in:
parent
e37c9eaeff
commit
efcaf991e6
Notes:
github-actions[bot]
2024-11-24 09:27:05 +00:00
Author: https://github.com/mjessome 🔰 Commit: https://github.com/LadybirdBrowser/ladybird/commit/efcaf991e68 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2541 Reviewed-by: https://github.com/alimpfard
3 changed files with 12 additions and 10 deletions
|
@ -1627,9 +1627,14 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
|||
set_error(Error::InvalidNameForCaptureGroup);
|
||||
return false;
|
||||
}
|
||||
match_length_minimum += maybe_capture_group->minimum_length;
|
||||
auto maybe_length = m_parser_state.capture_group_minimum_lengths.get(maybe_capture_group.value());
|
||||
if (!maybe_length.has_value()) {
|
||||
set_error(Error::InvalidNameForCaptureGroup);
|
||||
return false;
|
||||
}
|
||||
match_length_minimum += maybe_length.value();
|
||||
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group->group_index } });
|
||||
stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group.value() } });
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2674,6 +2679,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
|
|||
return false;
|
||||
}
|
||||
|
||||
m_parser_state.named_capture_groups.set(name, group_index);
|
||||
|
||||
ByteCode capture_group_bytecode;
|
||||
size_t length = 0;
|
||||
enter_capture_group_scope();
|
||||
|
@ -2693,7 +2700,6 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
|
|||
match_length_minimum += length;
|
||||
|
||||
m_parser_state.capture_group_minimum_lengths.set(group_index, length);
|
||||
m_parser_state.named_capture_groups.set(name, { group_index, length });
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -98,11 +98,6 @@ protected:
|
|||
|
||||
size_t tell() const { return m_parser_state.current_token.position(); }
|
||||
|
||||
struct NamedCaptureGroup {
|
||||
size_t group_index { 0 };
|
||||
size_t minimum_length { 0 };
|
||||
};
|
||||
|
||||
struct ParserState {
|
||||
Lexer& lexer;
|
||||
Token current_token;
|
||||
|
@ -114,8 +109,8 @@ protected:
|
|||
size_t match_length_minimum { 0 };
|
||||
size_t repetition_mark_count { 0 };
|
||||
AllOptions regex_options;
|
||||
HashMap<int, size_t> capture_group_minimum_lengths;
|
||||
HashMap<DeprecatedFlyString, NamedCaptureGroup> named_capture_groups;
|
||||
HashMap<size_t, size_t> capture_group_minimum_lengths;
|
||||
HashMap<DeprecatedFlyString, size_t> named_capture_groups;
|
||||
|
||||
explicit ParserState(Lexer& lexer)
|
||||
: lexer(lexer)
|
||||
|
|
|
@ -597,6 +597,7 @@ TEST_CASE(ECMA262_parse)
|
|||
{ "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent },
|
||||
{ "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture },
|
||||
{ "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture },
|
||||
{ "(?<a>(?<a>a))"sv, regex::Error::DuplicateNamedCapture },
|
||||
{ "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
|
||||
{ "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
|
||||
{ "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup },
|
||||
|
|
Loading…
Add table
Reference in a new issue