ladybird/Libraries/LibURL/Pattern/Canonicalization.cpp
Shannon Booth e70272ddef LibURL/Pattern: Implement URL Pattern canonicalization
These are used to normalize URL components.
2025-03-15 07:39:03 -04:00

267 lines
9.6 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2025, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibURL/Parser.h>
#include <LibURL/Pattern/Canonicalization.h>
namespace URL::Pattern {
// https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol
PatternErrorOr<String> canonicalize_a_protocol(String const& value)
{
// 1. If value is the empty string, return value.
if (value.is_empty())
return value;
// 2. Let dummyURL be a new URL record.
URL dummy_url;
// 3. Let parseResult be the result of running the basic URL parser given value followed by "://dummy.test", with dummyURL as url.
//
// NOTE: Note, state override is not used here because it enforces restrictions that are only appropriate for the
// protocol setter. Instead we use the protocol to parse a dummy URL using the normal parsing entry point.
auto parse_result = Parser::basic_parse(MUST(String::formatted("{}://dummy.test"sv, value)), {}, &dummy_url);
// 4. If parseResult is failure, then throw a TypeError.
if (!parse_result.has_value())
return ErrorInfo { "Failed to canonicalize URL protocol string"_string };
// 5. Return dummyURLs scheme.
return dummy_url.scheme();
}
// https://urlpattern.spec.whatwg.org/#canonicalize-a-username
String canonicalize_a_username(String const& value)
{
// 1. If value is the empty string, return value.
if (value.is_empty())
return value;
// 2. Let dummyURL be a new URL record.
URL dummy_url;
// 3. Set the username given dummyURL and value.
dummy_url.set_username(value);
// 4. Return dummyURLs username.
return dummy_url.username();
}
// https://urlpattern.spec.whatwg.org/#canonicalize-a-password
String canonicalize_a_password(String const& value)
{
// 1. If value is the empty string, return value.
if (value.is_empty())
return value;
// 2. Let dummyURL be a new URL record.
URL dummy_url;
// 3. Set the password given dummyURL and value.
dummy_url.set_password(value);
// 4. Return dummyURLs password.
return dummy_url.password();
}
// https://urlpattern.spec.whatwg.org/#canonicalize-a-hostname
PatternErrorOr<String> canonicalize_a_hostname(String const& value)
{
// 1. If value is the empty string, return value.
if (value.is_empty())
return value;
// 2. Let dummyURL be a new URL record.
URL dummy_url;
// 3. Let parseResult be the result of running the basic URL parser given value with dummyURL
// as url and hostname state as state override.
auto parse_result = Parser::basic_parse(value, {}, &dummy_url, Parser::State::Hostname);
// 4. If parseResult is failure, then throw a TypeError.
if (!parse_result.has_value())
return ErrorInfo { "Failed to canonicalize URL hostname string"_string };
// 5. Return dummyURLs host, serialized, or empty string if it is null.
if (!dummy_url.host().has_value())
return String {};
return dummy_url.host()->serialize();
}
// https://urlpattern.spec.whatwg.org/#canonicalize-an-ipv6-hostname
PatternErrorOr<String> canonicalize_an_ipv6_hostname(String const& value)
{
// 1. Let result be the empty string.
StringBuilder result;
// 2. For each code point in value interpreted as a list of code points:
for (auto code_point : value.code_points()) {
// 1. If all of the following are true:
// * code point is not an ASCII hex digit;
// * code point is not U+005B ([);
// * code point is not U+005D (]); and
// * code point is not U+003A (:),
// then throw a TypeError.
if (!is_ascii_hex_digit(code_point)
&& code_point != '['
&& code_point != ']'
&& code_point != ':') {
return ErrorInfo { "Failed to canonicalize IPv6 hostname string"_string };
}
// 2. Append the result of running ASCII lowercase given code point to the end of result.
result.append(to_ascii_lowercase(code_point));
}
// 3. Return result.
return result.to_string_without_validation();
}
// https://urlpattern.spec.whatwg.org/#canonicalize-a-port
PatternErrorOr<String> canonicalize_a_port(String const& port_value, Optional<String> const& protocol_value)
{
// 1. If portValue is the empty string, return portValue.
if (port_value.is_empty())
return port_value;
// 2. Let dummyURL be a new URL record.
URL dummy_url;
// 3. If protocolValue was given, then set dummyURLs scheme to protocolValue.
// NOTE: Note, we set the URL record's scheme in order for the basic URL parser to
// recognize and normalize default port values.
if (protocol_value.has_value())
dummy_url.set_scheme(protocol_value.value());
// 4. Let parseResult be the result of running basic URL parser given portValue with dummyURL
// as url and port state as state override.
auto parse_result = Parser::basic_parse(port_value, {}, &dummy_url, Parser::State::Port);
// 4. If parseResult is failure, then throw a TypeError.
if (!parse_result.has_value())
return ErrorInfo { "Failed to canonicalize port string"_string };
// 5. Return dummyURLs port, serialized, or empty string if it is null.
if (!dummy_url.port().has_value())
return String {};
return String::number(*dummy_url.port());
}
// https://urlpattern.spec.whatwg.org/#canonicalize-a-pathname
PatternErrorOr<String> canonicalize_a_pathname(String const& value)
{
// 1. If value is the empty string, then return value.
if (value.is_empty())
return value;
// 2. Let leading slash be true if the first code point in value is U+002F (/) and otherwise false.
bool leading_slash = value.bytes()[0] == '/';
// 3. Let modified value be "/-" if leading slash is false and otherwise the empty string.
StringBuilder modified_value;
if (!leading_slash)
modified_value.append("/-"sv);
// 4. Append value to the end of modified value.
modified_value.append(value);
// 5. Let dummyURL be a new URL record.
URL dummy_url;
// 6. Let parseResult be the result of running basic URL parser given modified value with dummyURL
// as url and path start state as state override.
auto parse_result = Parser::basic_parse(value, {}, &dummy_url, Parser::State::PathStart);
// 7. If parseResult is failure, then throw a TypeError.
if (!parse_result.has_value())
return ErrorInfo { "Failed to canonicalize pathname string"_string };
// 8. Let result be the result of URL path serializing dummyURL.
auto result = dummy_url.serialize_path();
// 9. If leading slash is false, then set result to the code point substring from 2 to the end of the string within result.
if (!leading_slash)
result = MUST(String::from_utf8(result.code_points().unicode_substring_view(2).as_string()));
// 10. Return result.
return result;
}
// https://urlpattern.spec.whatwg.org/#canonicalize-an-opaque-pathname
PatternErrorOr<String> canonicalize_an_opaque_pathname(String const& value)
{
// 1. If value is the empty string, return value.
if (value.is_empty())
return value;
// 2. Let dummyURL be a new URL record.
URL dummy_url;
// 3. Set dummyURLs path to the empty string.
dummy_url.set_paths({ "" });
// 4. Let parseResult be the result of running URL parsing given value with dummyURL as url and opaque path state as state override.
auto parse_result = Parser::basic_parse(value, {}, &dummy_url, Parser::State::OpaquePath);
// 5. If parseResult is failure, then throw a TypeError.
if (!parse_result.has_value())
return ErrorInfo { "Failed to canonicalize opaque pathname string"_string };
// 6. Return the result of URL path serializing dummyURL.
return dummy_url.serialize_path();
}
// https://urlpattern.spec.whatwg.org/#canonicalize-a-search
PatternErrorOr<String> canonicalize_a_search(String const& value)
{
// 1. If value is the empty string, return value.
if (value.is_empty())
return value;
// 2. Let dummyURL be a new URL record.
URL dummy_url;
// 3. Set dummyURLs query to the empty string.
dummy_url.set_query(String {});
// 4. Let parseResult be the result of running basic URL parser given value with dummyURL as url and query state as state override.
auto parse_result = Parser::basic_parse(value, {}, &dummy_url, Parser::State::Query);
// 5. If parseResult is failure, then throw a TypeError.
if (!parse_result.has_value())
return ErrorInfo { "Failed to canonicalize query string"_string };
// 6. Return dummyURLs query.
VERIFY(dummy_url.query().has_value());
return *dummy_url.query();
}
// https://urlpattern.spec.whatwg.org/#canonicalize-a-hash
PatternErrorOr<String> canonicalize_a_hash(String const& value)
{
// 1. If value is the empty string, return value.
if (value.is_empty())
return value;
// 2. Let dummyURL be a new URL record.
URL dummy_url;
// 3. Set dummyURLs fragment to the empty string.
dummy_url.set_fragment(String {});
// 4. Let parseResult be the result of running basic URL parser given value with dummyURL as url and fragment state as state override.
auto parse_result = Parser::basic_parse(value, {}, &dummy_url, Parser::State::Fragment);
// 5. If parseResult is failure, then throw a TypeError.
if (!parse_result.has_value())
return ErrorInfo { "Failed to canonicalize query string"_string };
// 6. Return dummyURLs fragment.
VERIFY(dummy_url.fragment().has_value());
return *dummy_url.fragment();
}
}