mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-06-07 02:42:52 +00:00
We were not properly handling the case that prefix code point was the empty string (which we represent as an OptionalNone). While this still resulted in the correct pattern string being generated, an incorrect regular expression was being generated causing matching to fail.
405 lines
17 KiB
C++
405 lines
17 KiB
C++
/*
|
||
* Copyright (c) 2025, Shannon Booth <shannon@serenityos.org>
|
||
*
|
||
* SPDX-License-Identifier: BSD-2-Clause
|
||
*/
|
||
|
||
#include <LibURL/Pattern/Component.h>
|
||
#include <LibURL/Pattern/PatternParser.h>
|
||
#include <LibURL/Pattern/String.h>
|
||
|
||
namespace URL::Pattern {
|
||
|
||
PatternParser::PatternParser(EncodingCallback encoding_callback, String segment_wildcard_regexp)
|
||
: m_encoding_callback(move(encoding_callback))
|
||
, m_segment_wildcard_regexp(move(segment_wildcard_regexp))
|
||
{
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#consume-a-required-token
|
||
PatternErrorOr<void> PatternParser::consume_a_required_token(Token::Type type)
|
||
{
|
||
// 1. Let result be the result of running try to consume a token given parser and type.
|
||
auto result = try_to_consume_a_token(type);
|
||
|
||
// 2. If result is null, then throw a TypeError.
|
||
if (!result.has_value())
|
||
return ErrorInfo { MUST(String::formatted("Missing required token '{}' in URL pattern", Token::type_to_string(type))) };
|
||
|
||
// 3. Return result.
|
||
// NOTE: No caller actually needs the result, so we just ignore it.
|
||
return {};
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#consume-text
|
||
String PatternParser::consume_text()
|
||
{
|
||
// 1. Let result be the empty string.
|
||
StringBuilder result;
|
||
|
||
// 1. While true:
|
||
while (true) {
|
||
// 1. Let token be the result of running try to consume a token given parser and "char".
|
||
auto token = try_to_consume_a_token(Token::Type::Char);
|
||
|
||
// 2. If token is null, then set token to the result of running try to consume a token given parser and "escaped-char".
|
||
if (!token.has_value())
|
||
token = try_to_consume_a_token(Token::Type::EscapedChar);
|
||
|
||
// 3. If token is null, then break.
|
||
if (!token.has_value())
|
||
break;
|
||
|
||
// 4. Append token’s value to the end of result.
|
||
result.append(token->value);
|
||
}
|
||
|
||
// 2. Return result.
|
||
return result.to_string_without_validation();
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#maybe-add-a-part-from-the-pending-fixed-value
|
||
PatternErrorOr<void> PatternParser::maybe_add_a_part_from_the_pending_fixed_value()
|
||
{
|
||
// 1. If parser’s pending fixed value is the empty string, then return.
|
||
if (m_pending_fixed_value.is_empty())
|
||
return {};
|
||
|
||
// 2. Let encoded value be the result of running parser’s encoding callback given parser’s pending fixed value.
|
||
auto encoded_value = TRY(m_encoding_callback(m_pending_fixed_value.to_string_without_validation()));
|
||
|
||
// 3. Set parser’s pending fixed value to the empty string.
|
||
m_pending_fixed_value.clear();
|
||
|
||
// 4. Let part be a new part whose type is "fixed-text", value is encoded value, and modifier is "none".
|
||
// 5. Append part to parser’s part list.
|
||
m_part_list.append({ Part::Type::FixedText, move(encoded_value), Part::Modifier::None });
|
||
|
||
return {};
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#is-a-duplicate-name
|
||
bool PatternParser::is_a_duplicate_name(String const& name) const
|
||
{
|
||
// 1. For each part of parser’s part list:
|
||
for (auto const& part : m_part_list) {
|
||
// 1. If part’s name is name, then return true.
|
||
if (part.name == name)
|
||
return true;
|
||
}
|
||
|
||
// 2. Return false.
|
||
return false;
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#add-a-part
|
||
PatternErrorOr<void> PatternParser::add_a_part(String const& prefix, Optional<Token const&> name_token,
|
||
Optional<Token const&> regexp_or_wildcard_token, String const& suffix, Optional<Token const&> modifier_token)
|
||
{
|
||
// 1. Let modifier be "none".
|
||
auto modifier = Part::Modifier::None;
|
||
|
||
// 2. If modifier token is not null:
|
||
if (modifier_token.has_value()) {
|
||
// 1. If modifier token’s value is "?" then set modifier to "optional".
|
||
if (modifier_token->value == "?"sv) {
|
||
modifier = Part::Modifier::Optional;
|
||
}
|
||
// 2. Otherwise if modifier token’s value is "*" then set modifier to "zero-or-more".
|
||
else if (modifier_token->value == "*"sv) {
|
||
modifier = Part::Modifier::ZeroOrMore;
|
||
}
|
||
// 3. Otherwise if modifier token’s value is "+" then set modifier to "one-or-more".
|
||
else if (modifier_token->value == "+"sv) {
|
||
modifier = Part::Modifier::OneOrMore;
|
||
}
|
||
}
|
||
|
||
// 3. If name token is null and regexp or wildcard token is null and modifier is "none":
|
||
// NOTE: This was a "{foo}" grouping. We add this to the pending fixed value so that it will be combined with
|
||
// any previous or subsequent text.
|
||
if (!name_token.has_value() && !regexp_or_wildcard_token.has_value() && modifier == Part::Modifier::None) {
|
||
// 1. Append prefix to the end of parser’s pending fixed value.
|
||
m_pending_fixed_value.append(prefix);
|
||
|
||
// 2. Return.
|
||
return {};
|
||
}
|
||
|
||
// 4. Run maybe add a part from the pending fixed value given parser.
|
||
TRY(maybe_add_a_part_from_the_pending_fixed_value());
|
||
|
||
// 5. If name token is null and regexp or wildcard token is null:
|
||
// NOTE: This was a "{foo}?" grouping. The modifier means we cannot combine it with other text. Therefore we
|
||
// add it as a part immediately.
|
||
if (!name_token.has_value() && !regexp_or_wildcard_token.has_value()) {
|
||
// 1. Assert: suffix is the empty string.
|
||
VERIFY(suffix.is_empty());
|
||
|
||
// 2. If prefix is the empty string, then return.
|
||
if (prefix.is_empty())
|
||
return {};
|
||
|
||
// 3. Let encoded value be the result of running parser’s encoding callback given prefix.
|
||
auto encoded_value = TRY(m_encoding_callback(prefix));
|
||
|
||
// 4. Let part be a new part whose type is "fixed-text", value is encoded value, and modifier is modifier.
|
||
// 5. Append part to parser’s part list.
|
||
m_part_list.append({ Part::Type::FixedText, move(encoded_value), modifier });
|
||
|
||
// 6. Return.
|
||
return {};
|
||
}
|
||
|
||
// 6. Let regexp value be the empty string.
|
||
// NOTE: Next, we convert the regexp or wildcard token into a regular expression.
|
||
String regexp_value;
|
||
|
||
// 7. If regexp or wildcard token is null, then set regexp value to parser’s segment wildcard regexp.
|
||
if (!regexp_or_wildcard_token.has_value()) {
|
||
regexp_value = m_segment_wildcard_regexp;
|
||
}
|
||
// 8. Otherwise if regexp or wildcard token’s type is "asterisk", then set regexp value to the full wildcard regexp value.
|
||
else if (regexp_or_wildcard_token->type == Token::Type::Asterisk) {
|
||
regexp_value = MUST(String::from_utf8(full_wildcard_regexp_value));
|
||
}
|
||
// 9. Otherwise set regexp value to regexp or wildcard token’s value.
|
||
else {
|
||
regexp_value = regexp_or_wildcard_token->value;
|
||
}
|
||
|
||
// 10. Let type be "regexp".
|
||
// NOTE: Next, we convert regexp value into a part type. We make sure to go to a regular expression first so
|
||
// that an equivalent "regexp" token will be treated the same as a "name" or "asterisk" token.
|
||
auto type = Part::Type::Regexp;
|
||
|
||
// 11. If regexp value is parser’s segment wildcard regexp:
|
||
if (regexp_value == m_segment_wildcard_regexp) {
|
||
// 1. Set type to "segment-wildcard".
|
||
type = Part::Type::SegmentWildcard;
|
||
|
||
// 2. Set regexp value to the empty string.
|
||
regexp_value = String {};
|
||
}
|
||
// 12. Otherwise if regexp value is the full wildcard regexp value:
|
||
else if (regexp_value == full_wildcard_regexp_value) {
|
||
// 1. Set type to "full-wildcard".
|
||
type = Part::Type::FullWildcard;
|
||
|
||
// 2. Set regexp value to the empty string.
|
||
regexp_value = String {};
|
||
}
|
||
|
||
// 13. Let name be the empty string.
|
||
// NOTE: Next, we determine the part name. This can be explicitly provided by a "name" token or be automatically assigned.
|
||
String name;
|
||
|
||
// 14. If name token is not null, then set name to name token’s value.
|
||
if (name_token.has_value()) {
|
||
name = name_token->value;
|
||
}
|
||
// 15. Otherwise if regexp or wildcard token is not null:
|
||
else if (regexp_or_wildcard_token.has_value()) {
|
||
// 1. Set name to parser’s next numeric name, serialized.
|
||
name = String::number(m_next_numeric_name);
|
||
|
||
// 2. Increment parser’s next numeric name by 1.
|
||
++m_next_numeric_name;
|
||
}
|
||
|
||
// 16. If the result of running is a duplicate name given parser and name is true, then throw a TypeError.
|
||
if (is_a_duplicate_name(name))
|
||
return ErrorInfo { MUST(String::formatted("Duplicate name '{}' provided in URL pattern", name)) };
|
||
|
||
// 17. Let encoded prefix be the result of running parser’s encoding callback given prefix.
|
||
// NOTE: Finally, we encode the fixed text values and create the part.
|
||
auto encoded_prefix = TRY(m_encoding_callback(prefix));
|
||
|
||
// 18. Let encoded suffix be the result of running parser’s encoding callback given suffix.
|
||
auto encoded_suffix = TRY(m_encoding_callback(suffix));
|
||
|
||
// 19. Let part be a new part whose type is type, value is regexp value, modifier is modifier, name is name, prefix
|
||
// is encoded prefix, and suffix is encoded suffix.
|
||
// 20. Append part to parser’s part list.
|
||
m_part_list.append({ type, move(regexp_value), modifier, move(name), move(encoded_prefix), move(encoded_suffix) });
|
||
|
||
return {};
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#try-to-consume-a-modifier-token
|
||
Optional<Token const&> PatternParser::try_to_consume_a_modifier_token()
|
||
{
|
||
// 1. Let token be the result of running try to consume a token given parser and "other-modifier".
|
||
auto token = try_to_consume_a_token(Token::Type::OtherModifier);
|
||
|
||
// 2. If token is not null, then return token.
|
||
if (token.has_value())
|
||
return token;
|
||
|
||
// 3. Set token to the result of running try to consume a token given parser and "asterisk".
|
||
token = try_to_consume_a_token(Token::Type::Asterisk);
|
||
|
||
// 4. Return token.
|
||
return token;
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#try-to-consume-a-regexp-or-wildcard-token
|
||
Optional<Token const&> PatternParser::try_to_consume_a_regexp_or_wildcard_token(Optional<Token const&> name_token)
|
||
{
|
||
// 1. Let token be the result of running try to consume a token given parser and "regexp".
|
||
auto token = try_to_consume_a_token(Token::Type::Regexp);
|
||
|
||
// 2. If name token is null and token is null, then set token to the result of running try to consume a token given
|
||
// parser and "asterisk".
|
||
if (!name_token.has_value() && !token.has_value())
|
||
token = try_to_consume_a_token(Token::Type::Asterisk);
|
||
|
||
// 3. Return token.
|
||
return token;
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#try-to-consume-a-token
|
||
Optional<Token const&> PatternParser::try_to_consume_a_token(Token::Type type)
|
||
{
|
||
// 1. Assert: parser’s index is less than parser’s token list size.
|
||
VERIFY(m_index < m_token_list.size());
|
||
|
||
// 2. Let next token be parser’s token list[parser’s index].
|
||
auto const& next_token = m_token_list[m_index];
|
||
|
||
// 3. If next token’s type is not type return null.
|
||
if (next_token.type != type)
|
||
return {};
|
||
|
||
// 4. Increment parser’s index by 1.
|
||
++m_index;
|
||
|
||
// 5. Return next token.
|
||
return next_token;
|
||
}
|
||
|
||
// https://urlpattern.spec.whatwg.org/#parse-a-pattern-string
|
||
PatternErrorOr<Vector<Part>> PatternParser::parse(Utf8View const& input, Options const& options, EncodingCallback encoding_callback)
|
||
{
|
||
// 1. Let parser be a new pattern parser whose encoding callback is encoding callback and segment wildcard regexp
|
||
// is the result of running generate a segment wildcard regexp given options.
|
||
PatternParser parser { move(encoding_callback), generate_a_segment_wildcard_regexp(options) };
|
||
|
||
// 2. Set parser’s token list to the result of running tokenize given input and "strict".
|
||
parser.m_token_list = TRY(Tokenizer::tokenize(input, Tokenizer::Policy::Strict));
|
||
|
||
// 3. While parser’s index is less than parser’s token list's size:
|
||
while (parser.m_index < parser.m_token_list.size()) {
|
||
// 1. Let char token be the result of running try to consume a token given parser and "char".
|
||
auto char_token = parser.try_to_consume_a_token(Token::Type::Char);
|
||
|
||
// 2. Let name token be the result of running try to consume a token given parser and "name".
|
||
auto name_token = parser.try_to_consume_a_token(Token::Type::Name);
|
||
|
||
// 3. Let regexp or wildcard token be the result of running try to consume a regexp or wildcard token given
|
||
// parser and name token.
|
||
auto regexp_or_wildcard_token = parser.try_to_consume_a_regexp_or_wildcard_token(name_token);
|
||
|
||
// 4. If name token is not null or regexp or wildcard token is not null:
|
||
// NOTE: If there is a matching group, we need to add the part immediately.
|
||
if (name_token.has_value() || regexp_or_wildcard_token.has_value()) {
|
||
// 1. Let prefix be the empty string.
|
||
String prefix;
|
||
|
||
// 2. If char token is not null then set prefix to char token’s value.
|
||
if (char_token.has_value())
|
||
prefix = char_token->value;
|
||
|
||
// 3. If prefix is not the empty string and not options’s prefix code point:
|
||
if (!prefix.is_empty() && (!options.prefix_code_point.has_value() || prefix != String::from_code_point(*options.prefix_code_point))) {
|
||
// 1. Append prefix to the end of parser’s pending fixed value.
|
||
parser.m_pending_fixed_value.append(prefix);
|
||
|
||
// 2. Set prefix to the empty string.
|
||
prefix = String {};
|
||
}
|
||
|
||
// 4. Run maybe add a part from the pending fixed value given parser.
|
||
TRY(parser.maybe_add_a_part_from_the_pending_fixed_value());
|
||
|
||
// 5. Let modifier token be the result of running try to consume a modifier token given parser.
|
||
auto modifier_token = parser.try_to_consume_a_modifier_token();
|
||
|
||
// 6. Run add a part given parser, prefix, name token, regexp or wildcard token, the empty string,
|
||
// and modifier token.
|
||
TRY(parser.add_a_part(prefix, name_token, regexp_or_wildcard_token, String {}, modifier_token));
|
||
|
||
// 7. Continue.
|
||
continue;
|
||
}
|
||
|
||
// 5. Let fixed token be char token.
|
||
// NOTE: If there was no matching group, then we need to buffer any fixed text. We want to collect as
|
||
// much text as possible before adding it as a "fixed-text" part.
|
||
auto fixed_token = char_token;
|
||
|
||
// 6. If fixed token is null, then set fixed token to the result of running try to consume a token given
|
||
// parser and "escaped-char".
|
||
if (!fixed_token.has_value())
|
||
fixed_token = parser.try_to_consume_a_token(Token::Type::EscapedChar);
|
||
|
||
// 7. If fixed token is not null:
|
||
if (fixed_token.has_value()) {
|
||
// 1. Append fixed token’s value to parser’s pending fixed value.
|
||
parser.m_pending_fixed_value.append(fixed_token->value);
|
||
|
||
// 2. Continue.
|
||
continue;
|
||
}
|
||
|
||
// 8. Let open token be the result of running try to consume a token given parser and "open".
|
||
auto open_token = parser.try_to_consume_a_token(Token::Type::Open);
|
||
|
||
// 9. If open token is not null:
|
||
if (open_token.has_value()) {
|
||
// 1. Let prefix be the result of running consume text given parser.
|
||
auto prefix = parser.consume_text();
|
||
|
||
// 2. Set name token to the result of running try to consume a token given parser and "name".
|
||
name_token = parser.try_to_consume_a_token(Token::Type::Name);
|
||
|
||
// 3. Set regexp or wildcard token to the result of running try to consume a regexp or wildcard token
|
||
// given parser and name token.
|
||
regexp_or_wildcard_token = parser.try_to_consume_a_regexp_or_wildcard_token(name_token);
|
||
|
||
// 4. Let suffix be the result of running consume text given parser.
|
||
auto suffix = parser.consume_text();
|
||
|
||
// 5. Run consume a required token given parser and "close".
|
||
TRY(parser.consume_a_required_token(Token::Type::Close));
|
||
|
||
// 6. Let modifier token to the result of running try to consume a modifier token given parser.
|
||
auto modifier_token = parser.try_to_consume_a_modifier_token();
|
||
|
||
// 7. Run add a part given parser, prefix, name token, regexp or wildcard token, suffix, and modifier token.
|
||
TRY(parser.add_a_part(prefix, name_token, regexp_or_wildcard_token, suffix, modifier_token));
|
||
|
||
// 8. Continue.
|
||
continue;
|
||
}
|
||
|
||
// 10. Run maybe add a part from the pending fixed value given parser.
|
||
TRY(parser.maybe_add_a_part_from_the_pending_fixed_value());
|
||
|
||
// 11. Run consume a required token given parser and "end".
|
||
TRY(parser.consume_a_required_token(Token::Type::End));
|
||
}
|
||
|
||
if constexpr (URL_PATTERN_DEBUG) {
|
||
dbgln("Pattern parser produced the part list:");
|
||
for (auto const& part : parser.m_part_list) {
|
||
dbgln("Type {}, Value '{}', Modifier {}, Name '{}', Prefix '{}', Suffix '{}'",
|
||
Part::type_to_string(part.type), part.value, Part::convert_modifier_to_string(part.modifier),
|
||
part.name, part.prefix, part.suffix);
|
||
}
|
||
}
|
||
// 4. Return parser’s part list.
|
||
return move(parser.m_part_list);
|
||
}
|
||
|
||
}
|