/* * Copyright (c) 2025, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ #include #include #include namespace URL::Pattern { PatternParser::PatternParser(EncodingCallback encoding_callback, String segment_wildcard_regexp) : m_encoding_callback(move(encoding_callback)) , m_segment_wildcard_regexp(move(segment_wildcard_regexp)) { } // https://urlpattern.spec.whatwg.org/#consume-a-required-token PatternErrorOr PatternParser::consume_a_required_token(Token::Type type) { // 1. Let result be the result of running try to consume a token given parser and type. auto result = try_to_consume_a_token(type); // 2. If result is null, then throw a TypeError. if (!result.has_value()) return ErrorInfo { MUST(String::formatted("Missing required token '{}' in URL pattern", Token::type_to_string(type))) }; // 3. Return result. // NOTE: No caller actually needs the result, so we just ignore it. return {}; } // https://urlpattern.spec.whatwg.org/#consume-text String PatternParser::consume_text() { // 1. Let result be the empty string. StringBuilder result; // 1. While true: while (true) { // 1. Let token be the result of running try to consume a token given parser and "char". auto token = try_to_consume_a_token(Token::Type::Char); // 2. If token is null, then set token to the result of running try to consume a token given parser and "escaped-char". if (!token.has_value()) token = try_to_consume_a_token(Token::Type::EscapedChar); // 3. If token is null, then break. if (!token.has_value()) break; // 4. Append token’s value to the end of result. result.append(token->value); } // 2. Return result. return result.to_string_without_validation(); } // https://urlpattern.spec.whatwg.org/#maybe-add-a-part-from-the-pending-fixed-value PatternErrorOr PatternParser::maybe_add_a_part_from_the_pending_fixed_value() { // 1. If parser’s pending fixed value is the empty string, then return. if (m_pending_fixed_value.is_empty()) return {}; // 2. Let encoded value be the result of running parser’s encoding callback given parser’s pending fixed value. auto encoded_value = TRY(m_encoding_callback(m_pending_fixed_value.to_string_without_validation())); // 3. Set parser’s pending fixed value to the empty string. m_pending_fixed_value.clear(); // 4. Let part be a new part whose type is "fixed-text", value is encoded value, and modifier is "none". // 5. Append part to parser’s part list. m_part_list.append({ Part::Type::FixedText, move(encoded_value), Part::Modifier::None }); return {}; } // https://urlpattern.spec.whatwg.org/#is-a-duplicate-name bool PatternParser::is_a_duplicate_name(String const& name) const { // 1. For each part of parser’s part list: for (auto const& part : m_part_list) { // 1. If part’s name is name, then return true. if (part.name == name) return true; } // 2. Return false. return false; } // https://urlpattern.spec.whatwg.org/#add-a-part PatternErrorOr PatternParser::add_a_part(String const& prefix, Optional name_token, Optional regexp_or_wildcard_token, String const& suffix, Optional modifier_token) { // 1. Let modifier be "none". auto modifier = Part::Modifier::None; // 2. If modifier token is not null: if (modifier_token.has_value()) { // 1. If modifier token’s value is "?" then set modifier to "optional". if (modifier_token->value == "?"sv) { modifier = Part::Modifier::Optional; } // 2. Otherwise if modifier token’s value is "*" then set modifier to "zero-or-more". else if (modifier_token->value == "*"sv) { modifier = Part::Modifier::ZeroOrMore; } // 3. Otherwise if modifier token’s value is "+" then set modifier to "one-or-more". else if (modifier_token->value == "+"sv) { modifier = Part::Modifier::OneOrMore; } } // 3. If name token is null and regexp or wildcard token is null and modifier is "none": // NOTE: This was a "{foo}" grouping. We add this to the pending fixed value so that it will be combined with // any previous or subsequent text. if (!name_token.has_value() && !regexp_or_wildcard_token.has_value() && modifier == Part::Modifier::None) { // 1. Append prefix to the end of parser’s pending fixed value. m_pending_fixed_value.append(prefix); // 2. Return. return {}; } // 4. Run maybe add a part from the pending fixed value given parser. TRY(maybe_add_a_part_from_the_pending_fixed_value()); // 5. If name token is null and regexp or wildcard token is null: // NOTE: This was a "{foo}?" grouping. The modifier means we cannot combine it with other text. Therefore we // add it as a part immediately. if (!name_token.has_value() && !regexp_or_wildcard_token.has_value()) { // 1. Assert: suffix is the empty string. VERIFY(suffix.is_empty()); // 2. If prefix is the empty string, then return. if (prefix.is_empty()) return {}; // 3. Let encoded value be the result of running parser’s encoding callback given prefix. auto encoded_value = TRY(m_encoding_callback(prefix)); // 4. Let part be a new part whose type is "fixed-text", value is encoded value, and modifier is modifier. // 5. Append part to parser’s part list. m_part_list.append({ Part::Type::FixedText, move(encoded_value), modifier }); // 6. Return. return {}; } // 6. Let regexp value be the empty string. // NOTE: Next, we convert the regexp or wildcard token into a regular expression. String regexp_value; // 7. If regexp or wildcard token is null, then set regexp value to parser’s segment wildcard regexp. if (!regexp_or_wildcard_token.has_value()) { regexp_value = m_segment_wildcard_regexp; } // 8. Otherwise if regexp or wildcard token’s type is "asterisk", then set regexp value to the full wildcard regexp value. else if (regexp_or_wildcard_token->type == Token::Type::Asterisk) { regexp_value = MUST(String::from_utf8(full_wildcard_regexp_value)); } // 9. Otherwise set regexp value to regexp or wildcard token’s value. else { regexp_value = regexp_or_wildcard_token->value; } // 10. Let type be "regexp". // NOTE: Next, we convert regexp value into a part type. We make sure to go to a regular expression first so // that an equivalent "regexp" token will be treated the same as a "name" or "asterisk" token. auto type = Part::Type::Regexp; // 11. If regexp value is parser’s segment wildcard regexp: if (regexp_value == m_segment_wildcard_regexp) { // 1. Set type to "segment-wildcard". type = Part::Type::SegmentWildcard; // 2. Set regexp value to the empty string. regexp_value = String {}; } // 12. Otherwise if regexp value is the full wildcard regexp value: else if (regexp_value == full_wildcard_regexp_value) { // 1. Set type to "full-wildcard". type = Part::Type::FullWildcard; // 2. Set regexp value to the empty string. regexp_value = String {}; } // 13. Let name be the empty string. // NOTE: Next, we determine the part name. This can be explicitly provided by a "name" token or be automatically assigned. String name; // 14. If name token is not null, then set name to name token’s value. if (name_token.has_value()) { name = name_token->value; } // 15. Otherwise if regexp or wildcard token is not null: else if (regexp_or_wildcard_token.has_value()) { // 1. Set name to parser’s next numeric name, serialized. name = String::number(m_next_numeric_name); // 2. Increment parser’s next numeric name by 1. ++m_next_numeric_name; } // 16. If the result of running is a duplicate name given parser and name is true, then throw a TypeError. if (is_a_duplicate_name(name)) return ErrorInfo { MUST(String::formatted("Duplicate name '{}' provided in URL pattern", name)) }; // 17. Let encoded prefix be the result of running parser’s encoding callback given prefix. // NOTE: Finally, we encode the fixed text values and create the part. auto encoded_prefix = TRY(m_encoding_callback(prefix)); // 18. Let encoded suffix be the result of running parser’s encoding callback given suffix. auto encoded_suffix = TRY(m_encoding_callback(suffix)); // 19. Let part be a new part whose type is type, value is regexp value, modifier is modifier, name is name, prefix // is encoded prefix, and suffix is encoded suffix. // 20. Append part to parser’s part list. m_part_list.append({ type, move(regexp_value), modifier, move(name), move(encoded_prefix), move(encoded_suffix) }); return {}; } // https://urlpattern.spec.whatwg.org/#try-to-consume-a-modifier-token Optional PatternParser::try_to_consume_a_modifier_token() { // 1. Let token be the result of running try to consume a token given parser and "other-modifier". auto token = try_to_consume_a_token(Token::Type::OtherModifier); // 2. If token is not null, then return token. if (token.has_value()) return token; // 3. Set token to the result of running try to consume a token given parser and "asterisk". token = try_to_consume_a_token(Token::Type::Asterisk); // 4. Return token. return token; } // https://urlpattern.spec.whatwg.org/#try-to-consume-a-regexp-or-wildcard-token Optional PatternParser::try_to_consume_a_regexp_or_wildcard_token(Optional name_token) { // 1. Let token be the result of running try to consume a token given parser and "regexp". auto token = try_to_consume_a_token(Token::Type::Regexp); // 2. If name token is null and token is null, then set token to the result of running try to consume a token given // parser and "asterisk". if (!name_token.has_value() && !token.has_value()) token = try_to_consume_a_token(Token::Type::Asterisk); // 3. Return token. return token; } // https://urlpattern.spec.whatwg.org/#try-to-consume-a-token Optional PatternParser::try_to_consume_a_token(Token::Type type) { // 1. Assert: parser’s index is less than parser’s token list size. VERIFY(m_index < m_token_list.size()); // 2. Let next token be parser’s token list[parser’s index]. auto const& next_token = m_token_list[m_index]; // 3. If next token’s type is not type return null. if (next_token.type != type) return {}; // 4. Increment parser’s index by 1. ++m_index; // 5. Return next token. return next_token; } // https://urlpattern.spec.whatwg.org/#parse-a-pattern-string PatternErrorOr> PatternParser::parse(Utf8View const& input, Options const& options, EncodingCallback encoding_callback) { // 1. Let parser be a new pattern parser whose encoding callback is encoding callback and segment wildcard regexp // is the result of running generate a segment wildcard regexp given options. PatternParser parser { move(encoding_callback), generate_a_segment_wildcard_regexp(options) }; // 2. Set parser’s token list to the result of running tokenize given input and "strict". parser.m_token_list = TRY(Tokenizer::tokenize(input, Tokenizer::Policy::Strict)); // 3. While parser’s index is less than parser’s token list's size: while (parser.m_index < parser.m_token_list.size()) { // 1. Let char token be the result of running try to consume a token given parser and "char". auto char_token = parser.try_to_consume_a_token(Token::Type::Char); // 2. Let name token be the result of running try to consume a token given parser and "name". auto name_token = parser.try_to_consume_a_token(Token::Type::Name); // 3. Let regexp or wildcard token be the result of running try to consume a regexp or wildcard token given // parser and name token. auto regexp_or_wildcard_token = parser.try_to_consume_a_regexp_or_wildcard_token(name_token); // 4. If name token is not null or regexp or wildcard token is not null: // NOTE: If there is a matching group, we need to add the part immediately. if (name_token.has_value() || regexp_or_wildcard_token.has_value()) { // 1. Let prefix be the empty string. String prefix; // 2. If char token is not null then set prefix to char token’s value. if (char_token.has_value()) prefix = char_token->value; // 3. If prefix is not the empty string and not options’s prefix code point: if (!prefix.is_empty() && (!options.prefix_code_point.has_value() || prefix != String::from_code_point(*options.prefix_code_point))) { // 1. Append prefix to the end of parser’s pending fixed value. parser.m_pending_fixed_value.append(prefix); // 2. Set prefix to the empty string. prefix = String {}; } // 4. Run maybe add a part from the pending fixed value given parser. TRY(parser.maybe_add_a_part_from_the_pending_fixed_value()); // 5. Let modifier token be the result of running try to consume a modifier token given parser. auto modifier_token = parser.try_to_consume_a_modifier_token(); // 6. Run add a part given parser, prefix, name token, regexp or wildcard token, the empty string, // and modifier token. TRY(parser.add_a_part(prefix, name_token, regexp_or_wildcard_token, String {}, modifier_token)); // 7. Continue. continue; } // 5. Let fixed token be char token. // NOTE: If there was no matching group, then we need to buffer any fixed text. We want to collect as // much text as possible before adding it as a "fixed-text" part. auto fixed_token = char_token; // 6. If fixed token is null, then set fixed token to the result of running try to consume a token given // parser and "escaped-char". if (!fixed_token.has_value()) fixed_token = parser.try_to_consume_a_token(Token::Type::EscapedChar); // 7. If fixed token is not null: if (fixed_token.has_value()) { // 1. Append fixed token’s value to parser’s pending fixed value. parser.m_pending_fixed_value.append(fixed_token->value); // 2. Continue. continue; } // 8. Let open token be the result of running try to consume a token given parser and "open". auto open_token = parser.try_to_consume_a_token(Token::Type::Open); // 9. If open token is not null: if (open_token.has_value()) { // 1. Let prefix be the result of running consume text given parser. auto prefix = parser.consume_text(); // 2. Set name token to the result of running try to consume a token given parser and "name". name_token = parser.try_to_consume_a_token(Token::Type::Name); // 3. Set regexp or wildcard token to the result of running try to consume a regexp or wildcard token // given parser and name token. regexp_or_wildcard_token = parser.try_to_consume_a_regexp_or_wildcard_token(name_token); // 4. Let suffix be the result of running consume text given parser. auto suffix = parser.consume_text(); // 5. Run consume a required token given parser and "close". TRY(parser.consume_a_required_token(Token::Type::Close)); // 6. Let modifier token to the result of running try to consume a modifier token given parser. auto modifier_token = parser.try_to_consume_a_modifier_token(); // 7. Run add a part given parser, prefix, name token, regexp or wildcard token, suffix, and modifier token. TRY(parser.add_a_part(prefix, name_token, regexp_or_wildcard_token, suffix, modifier_token)); // 8. Continue. continue; } // 10. Run maybe add a part from the pending fixed value given parser. TRY(parser.maybe_add_a_part_from_the_pending_fixed_value()); // 11. Run consume a required token given parser and "end". TRY(parser.consume_a_required_token(Token::Type::End)); } if constexpr (URL_PATTERN_DEBUG) { dbgln("Pattern parser produced the part list:"); for (auto const& part : parser.m_part_list) { dbgln("Type {}, Value '{}', Modifier {}, Name '{}', Prefix '{}', Suffix '{}'", Part::type_to_string(part.type), part.value, Part::convert_modifier_to_string(part.modifier), part.name, part.prefix, part.suffix); } } // 4. Return parser’s part list. return move(parser.m_part_list); } }