LibURL/Pattern: Make dummyURL from the URL parser with a special scheme

Corresponds to: 46c30fda8f

Along with a follow up bug fix that I made of:

5e1c93e2

This for example, fixes canonicalization of URL hosts containing
special characters that should have the unicode ToAscii algorithm
performed on them as the URLs were not being treated as special.
This commit is contained in:
Shannon Booth 2025-03-27 00:05:37 +13:00 committed by Tim Flynn
commit bee3720b6f
Notes: github-actions[bot] 2025-04-06 12:26:03 +00:00
2 changed files with 42 additions and 35 deletions

View file

@ -9,6 +9,14 @@
namespace URL::Pattern { namespace URL::Pattern {
// https://urlpattern.spec.whatwg.org/#url-pattern-create-a-dummy-url
static URL create_a_dummy_url()
{
// 1. Let dummyInput be "https://dummy.invalid/".
// 2. Return the result of running the basic URL parser on dummyInput.
return Parser::basic_parse("https://dummy.invalid/"sv).release_value();
}
// https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol // https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol
PatternErrorOr<String> canonicalize_a_protocol(String const& value) PatternErrorOr<String> canonicalize_a_protocol(String const& value)
{ {
@ -16,21 +24,17 @@ PatternErrorOr<String> canonicalize_a_protocol(String const& value)
if (value.is_empty()) if (value.is_empty())
return value; return value;
// 2. Let dummyURL be a new URL record. // 2. Let parseResult be the result of running the basic URL parser given value followed by "://dummy.invalid/".
URL dummy_url;
// 3. Let parseResult be the result of running the basic URL parser given value followed by "://dummy.test", with dummyURL as url.
//
// NOTE: Note, state override is not used here because it enforces restrictions that are only appropriate for the // NOTE: Note, state override is not used here because it enforces restrictions that are only appropriate for the
// protocol setter. Instead we use the protocol to parse a dummy URL using the normal parsing entry point. // protocol setter. Instead we use the protocol to parse a dummy URL using the normal parsing entry point.
auto parse_result = Parser::basic_parse(MUST(String::formatted("{}://dummy.test"sv, value)), {}, &dummy_url); auto parse_result = Parser::basic_parse(MUST(String::formatted("{}://dummy.invalid"sv, value)));
// 4. If parseResult is failure, then throw a TypeError. // 4. If parseResult is failure, then throw a TypeError.
if (!parse_result.has_value()) if (!parse_result.has_value())
return ErrorInfo { "Failed to canonicalize URL protocol string"_string }; return ErrorInfo { "Failed to canonicalize URL protocol string"_string };
// 5. Return dummyURLs scheme. // 5. Return parseResults scheme.
return dummy_url.scheme(); return parse_result->scheme();
} }
// https://urlpattern.spec.whatwg.org/#canonicalize-a-username // https://urlpattern.spec.whatwg.org/#canonicalize-a-username
@ -40,8 +44,8 @@ String canonicalize_a_username(String const& value)
if (value.is_empty()) if (value.is_empty())
return value; return value;
// 2. Let dummyURL be a new URL record. // 2. Let dummyURL be the result of creating a dummy URL.
URL dummy_url; auto dummy_url = create_a_dummy_url();
// 3. Set the username given dummyURL and value. // 3. Set the username given dummyURL and value.
dummy_url.set_username(value); dummy_url.set_username(value);
@ -57,8 +61,8 @@ String canonicalize_a_password(String const& value)
if (value.is_empty()) if (value.is_empty())
return value; return value;
// 2. Let dummyURL be a new URL record. // 2. Let dummyURL be the result of creating a dummy URL.
URL dummy_url; auto dummy_url = create_a_dummy_url();
// 3. Set the password given dummyURL and value. // 3. Set the password given dummyURL and value.
dummy_url.set_password(value); dummy_url.set_password(value);
@ -74,8 +78,8 @@ PatternErrorOr<String> canonicalize_a_hostname(String const& value)
if (value.is_empty()) if (value.is_empty())
return value; return value;
// 2. Let dummyURL be a new URL record. // 2. Let dummyURL be the result of creating a dummy URL.
URL dummy_url; auto dummy_url = create_a_dummy_url();
// 3. Let parseResult be the result of running the basic URL parser given value with dummyURL // 3. Let parseResult be the result of running the basic URL parser given value with dummyURL
// as url and hostname state as state override. // as url and hostname state as state override.
@ -127,8 +131,8 @@ PatternErrorOr<String> canonicalize_a_port(String const& port_value, Optional<St
if (port_value.is_empty()) if (port_value.is_empty())
return port_value; return port_value;
// 2. Let dummyURL be a new URL record. // 2. Let dummyURL be the result of creating a dummy URL.
URL dummy_url; auto dummy_url = create_a_dummy_url();
// 3. If protocolValue was given, then set dummyURLs scheme to protocolValue. // 3. If protocolValue was given, then set dummyURLs scheme to protocolValue.
// NOTE: Note, we set the URL record's scheme in order for the basic URL parser to // NOTE: Note, we set the URL record's scheme in order for the basic URL parser to
@ -168,20 +172,23 @@ String canonicalize_a_pathname(String const& value)
// 4. Append value to the end of modified value. // 4. Append value to the end of modified value.
modified_value.append(value); modified_value.append(value);
// 5. Let dummyURL be a new URL record. // 5. Let dummyURL be the result of creating a dummy URL.
URL dummy_url; auto dummy_url = create_a_dummy_url();
// 6. Run basic URL parser given modified value with dummyURL as url and path start state as state override. // 6. Empty dummyURLs path.
dummy_url.set_paths({});
// 7. Run basic URL parser given modified value with dummyURL as url and path start state as state override.
(void)Parser::basic_parse(modified_value.string_view(), {}, &dummy_url, Parser::State::PathStart); (void)Parser::basic_parse(modified_value.string_view(), {}, &dummy_url, Parser::State::PathStart);
// 7. Let result be the result of URL path serializing dummyURL. // 8. Let result be the result of URL path serializing dummyURL.
auto result = dummy_url.serialize_path(); auto result = dummy_url.serialize_path();
// 8. If leading slash is false, then set result to the code point substring from 2 to the end of the string within result. // 9. If leading slash is false, then set result to the code point substring from 2 to the end of the string within result.
if (!leading_slash) if (!leading_slash)
result = MUST(String::from_utf8(result.code_points().unicode_substring_view(2).as_string())); result = MUST(String::from_utf8(result.code_points().unicode_substring_view(2).as_string()));
// 9. Return result. // 10. Return result.
return result; return result;
} }
@ -192,8 +199,8 @@ PatternErrorOr<String> canonicalize_an_opaque_pathname(String const& value)
if (value.is_empty()) if (value.is_empty())
return value; return value;
// 2. Let dummyURL be a new URL record. // 2. Let dummyURL be the result of creating a dummy URL.
URL dummy_url; auto dummy_url = create_a_dummy_url();
// 3. Set dummyURLs path to the empty string. // 3. Set dummyURLs path to the empty string.
dummy_url.set_paths({ "" }); dummy_url.set_paths({ "" });
@ -217,8 +224,8 @@ String canonicalize_a_search(String const& value)
if (value.is_empty()) if (value.is_empty())
return value; return value;
// 2. Let dummyURL be a new URL record. // 2. Let dummyURL be the result of creating a dummy URL.
URL dummy_url; auto dummy_url = create_a_dummy_url();
// 3. Set dummyURLs query to the empty string. // 3. Set dummyURLs query to the empty string.
dummy_url.set_query(String {}); dummy_url.set_query(String {});
@ -238,8 +245,8 @@ String canonicalize_a_hash(String const& value)
if (value.is_empty()) if (value.is_empty())
return value; return value;
// 2. Let dummyURL be a new URL record. // 2. Let dummyURL be the result of creating a dummy URL.
URL dummy_url; auto dummy_url = create_a_dummy_url();
// 3. Set dummyURLs fragment to the empty string. // 3. Set dummyURLs fragment to the empty string.
dummy_url.set_fragment(String {}); dummy_url.set_fragment(String {});

View file

@ -2,8 +2,8 @@ Harness status: OK
Found 354 tests Found 354 tests
346 Pass 351 Pass
8 Fail 3 Fail
Pass Loading data... Pass Loading data...
Pass Pattern: [{"pathname":"/foo/bar"}] Inputs: [{"pathname":"/foo/bar"}] Pass Pattern: [{"pathname":"/foo/bar"}] Inputs: [{"pathname":"/foo/bar"}]
Pass Pattern: [{"pathname":"/foo/bar"}] Inputs: [{"pathname":"/foo/ba"}] Pass Pattern: [{"pathname":"/foo/bar"}] Inputs: [{"pathname":"/foo/ba"}]
@ -157,9 +157,9 @@ Pass Pattern: [{"username":"caf%c3%a9"}] Inputs: [{"username":"café"}]
Pass Pattern: [{"password":"caf%C3%A9"}] Inputs: [{"password":"café"}] Pass Pattern: [{"password":"caf%C3%A9"}] Inputs: [{"password":"café"}]
Pass Pattern: [{"password":"café"}] Inputs: [{"password":"café"}] Pass Pattern: [{"password":"café"}] Inputs: [{"password":"café"}]
Pass Pattern: [{"password":"caf%c3%a9"}] Inputs: [{"password":"café"}] Pass Pattern: [{"password":"caf%c3%a9"}] Inputs: [{"password":"café"}]
Fail Pattern: [{"hostname":"xn--caf-dma.com"}] Inputs: [{"hostname":"café.com"}] Pass Pattern: [{"hostname":"xn--caf-dma.com"}] Inputs: [{"hostname":"café.com"}]
Fail Pattern: [{"hostname":"café.com"}] Inputs: [{"hostname":"café.com"}] Pass Pattern: [{"hostname":"café.com"}] Inputs: [{"hostname":"café.com"}]
Fail Pattern: ["http://<2F><><EFBFBD>U+deb2.com/"] Inputs: ["http://<2F><><EFBFBD>U+deb2.com/"] Pass Pattern: ["http://<2F><><EFBFBD>U+deb2.com/"] Inputs: ["http://<2F><><EFBFBD>U+deb2.com/"]
Pass Pattern: ["http://\ud83d \udeb2"] Inputs: undefined Pass Pattern: ["http://\ud83d \udeb2"] Inputs: undefined
Pass Pattern: [{"hostname":"\ud83d \udeb2"}] Inputs: undefined Pass Pattern: [{"hostname":"\ud83d \udeb2"}] Inputs: undefined
Pass Pattern: [{"pathname":"\ud83d \udeb2"}] Inputs: [] Pass Pattern: [{"pathname":"\ud83d \udeb2"}] Inputs: []
@ -295,7 +295,7 @@ Pass Pattern: [{"protocol":":name+"}] Inputs: [{"protocol":"foobar"}]
Pass Pattern: [{"protocol":":name"}] Inputs: [{"protocol":"foobar"}] Pass Pattern: [{"protocol":":name"}] Inputs: [{"protocol":"foobar"}]
Pass Pattern: [{"hostname":"bad hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad hostname"}] Inputs: undefined
Pass Pattern: [{"hostname":"bad#hostname"}] Inputs: [{"hostname":"bad"}] Pass Pattern: [{"hostname":"bad#hostname"}] Inputs: [{"hostname":"bad"}]
Fail Pattern: [{"hostname":"bad%hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad%hostname"}] Inputs: undefined
Pass Pattern: [{"hostname":"bad/hostname"}] Inputs: [{"hostname":"bad"}] Pass Pattern: [{"hostname":"bad/hostname"}] Inputs: [{"hostname":"bad"}]
Pass Pattern: [{"hostname":"bad\\:hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad\\:hostname"}] Inputs: undefined
Pass Pattern: [{"hostname":"bad<hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad<hostname"}] Inputs: undefined
@ -304,7 +304,7 @@ Pass Pattern: [{"hostname":"bad?hostname"}] Inputs: undefined
Pass Pattern: [{"hostname":"bad@hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad@hostname"}] Inputs: undefined
Pass Pattern: [{"hostname":"bad[hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad[hostname"}] Inputs: undefined
Pass Pattern: [{"hostname":"bad]hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad]hostname"}] Inputs: undefined
Fail Pattern: [{"hostname":"bad\\\\hostname"}] Inputs: [{"hostname":"badhostname"}] Pass Pattern: [{"hostname":"bad\\\\hostname"}] Inputs: [{"hostname":"badhostname"}]
Pass Pattern: [{"hostname":"bad^hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad^hostname"}] Inputs: undefined
Pass Pattern: [{"hostname":"bad|hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad|hostname"}] Inputs: undefined
Pass Pattern: [{"hostname":"bad\nhostname"}] Inputs: [{"hostname":"badhostname"}] Pass Pattern: [{"hostname":"bad\nhostname"}] Inputs: [{"hostname":"badhostname"}]