LibRegex: Ensure nested capture groups have non-conflicting names

Take record of the named capture group prior to parsing the group's body. This requires removal of the recorded minimum length of the named capture group directly, and now needs to be looked up via the group minimu lengths table.
Author: https://github.com/mjessome 🔰 Commit: https://github.com/LadybirdBrowser/ladybird/commit/efcaf991e68 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2541 Reviewed-by: https://github.com/alimpfard
2025-04-22 12:35:14 +00:00 · 2024-11-23 18:38:57 -05:00 · 2024-11-23 18:38:57 -05:00 · efcaf991e6 · 2024-11-24 09:27:05 +00:00
commit efcaf991e6
parent e37c9eaeff
3 changed files with 12 additions and 10 deletions
--- a/Libraries/LibRegex/RegexParser.cpp
+++ b/Libraries/LibRegex/RegexParser.cpp
@ -1627,9 +1627,14 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
            set_error(Error::InvalidNameForCaptureGroup);
            return false;
        }
-        match_length_minimum += maybe_capture_group->minimum_length;
+        auto maybe_length = m_parser_state.capture_group_minimum_lengths.get(maybe_capture_group.value());
+        if (!maybe_length.has_value()) {
+            set_error(Error::InvalidNameForCaptureGroup);
+            return false;
+        }
+        match_length_minimum += maybe_length.value();

-        stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group->group_index } });
+        stack.insert_bytecode_compare_values({ { CharacterCompareType::Reference, (ByteCodeValueType)maybe_capture_group.value() } });
        return true;
    }

@ -2674,6 +2679,8 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
                return false;
            }

+            m_parser_state.named_capture_groups.set(name, group_index);
+
            ByteCode capture_group_bytecode;
            size_t length = 0;
            enter_capture_group_scope();
@ -2693,7 +2700,6 @@ bool ECMA262Parser::parse_capture_group(ByteCode& stack, size_t& match_length_mi
            match_length_minimum += length;

            m_parser_state.capture_group_minimum_lengths.set(group_index, length);
-            m_parser_state.named_capture_groups.set(name, { group_index, length });
            return true;
        }

--- a/Libraries/LibRegex/RegexParser.h
+++ b/Libraries/LibRegex/RegexParser.h
@ -98,11 +98,6 @@ protected:

    size_t tell() const { return m_parser_state.current_token.position(); }

-    struct NamedCaptureGroup {
-        size_t group_index { 0 };
-        size_t minimum_length { 0 };
-    };
-
    struct ParserState {
        Lexer& lexer;
        Token current_token;
@ -114,8 +109,8 @@ protected:
        size_t match_length_minimum { 0 };
        size_t repetition_mark_count { 0 };
        AllOptions regex_options;
-        HashMap<int, size_t> capture_group_minimum_lengths;
-        HashMap<DeprecatedFlyString, NamedCaptureGroup> named_capture_groups;
+        HashMap<size_t, size_t> capture_group_minimum_lengths;
+        HashMap<DeprecatedFlyString, size_t> named_capture_groups;

        explicit ParserState(Lexer& lexer)
            : lexer(lexer)
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@ -597,6 +597,7 @@ TEST_CASE(ECMA262_parse)
        { "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent },
        { "(?<a>a)(?<a>b)"sv, regex::Error::DuplicateNamedCapture },
        { "(?<a>a)(?<b>b)(?<a>c)"sv, regex::Error::DuplicateNamedCapture },
+        { "(?<a>(?<a>a))"sv, regex::Error::DuplicateNamedCapture },
        { "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
        { "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
        { "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup },