From bee3720b6fd75efdc1f5f2d3fdd35fc3a7675085 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Thu, 27 Mar 2025 00:05:37 +1300 Subject: [PATCH] LibURL/Pattern: Make dummyURL from the URL parser with a special scheme Corresponds to: https://github.com/whatwg/urlpattern/commit/46c30fda8f Along with a follow up bug fix that I made of: https://github.com/whatwg/urlpattern/commit/5e1c93e2 This for example, fixes canonicalization of URL hosts containing special characters that should have the unicode ToAscii algorithm performed on them as the URLs were not being treated as special. --- Libraries/LibURL/Pattern/Canonicalization.cpp | 63 ++++++++++--------- .../wpt-import/urlpattern/urlpattern.any.txt | 14 ++--- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/Libraries/LibURL/Pattern/Canonicalization.cpp b/Libraries/LibURL/Pattern/Canonicalization.cpp index 55a19e94771..1fd4c363cc0 100644 --- a/Libraries/LibURL/Pattern/Canonicalization.cpp +++ b/Libraries/LibURL/Pattern/Canonicalization.cpp @@ -9,6 +9,14 @@ namespace URL::Pattern { +// https://urlpattern.spec.whatwg.org/#url-pattern-create-a-dummy-url +static URL create_a_dummy_url() +{ + // 1. Let dummyInput be "https://dummy.invalid/". + // 2. Return the result of running the basic URL parser on dummyInput. + return Parser::basic_parse("https://dummy.invalid/"sv).release_value(); +} + // https://urlpattern.spec.whatwg.org/#canonicalize-a-protocol PatternErrorOr canonicalize_a_protocol(String const& value) { @@ -16,21 +24,17 @@ PatternErrorOr canonicalize_a_protocol(String const& value) if (value.is_empty()) return value; - // 2. Let dummyURL be a new URL record. - URL dummy_url; - - // 3. Let parseResult be the result of running the basic URL parser given value followed by "://dummy.test", with dummyURL as url. - // + // 2. Let parseResult be the result of running the basic URL parser given value followed by "://dummy.invalid/". // NOTE: Note, state override is not used here because it enforces restrictions that are only appropriate for the // protocol setter. Instead we use the protocol to parse a dummy URL using the normal parsing entry point. - auto parse_result = Parser::basic_parse(MUST(String::formatted("{}://dummy.test"sv, value)), {}, &dummy_url); + auto parse_result = Parser::basic_parse(MUST(String::formatted("{}://dummy.invalid"sv, value))); // 4. If parseResult is failure, then throw a TypeError. if (!parse_result.has_value()) return ErrorInfo { "Failed to canonicalize URL protocol string"_string }; - // 5. Return dummyURL’s scheme. - return dummy_url.scheme(); + // 5. Return parseResult’s scheme. + return parse_result->scheme(); } // https://urlpattern.spec.whatwg.org/#canonicalize-a-username @@ -40,8 +44,8 @@ String canonicalize_a_username(String const& value) if (value.is_empty()) return value; - // 2. Let dummyURL be a new URL record. - URL dummy_url; + // 2. Let dummyURL be the result of creating a dummy URL. + auto dummy_url = create_a_dummy_url(); // 3. Set the username given dummyURL and value. dummy_url.set_username(value); @@ -57,8 +61,8 @@ String canonicalize_a_password(String const& value) if (value.is_empty()) return value; - // 2. Let dummyURL be a new URL record. - URL dummy_url; + // 2. Let dummyURL be the result of creating a dummy URL. + auto dummy_url = create_a_dummy_url(); // 3. Set the password given dummyURL and value. dummy_url.set_password(value); @@ -74,8 +78,8 @@ PatternErrorOr canonicalize_a_hostname(String const& value) if (value.is_empty()) return value; - // 2. Let dummyURL be a new URL record. - URL dummy_url; + // 2. Let dummyURL be the result of creating a dummy URL. + auto dummy_url = create_a_dummy_url(); // 3. Let parseResult be the result of running the basic URL parser given value with dummyURL // as url and hostname state as state override. @@ -127,8 +131,8 @@ PatternErrorOr canonicalize_a_port(String const& port_value, Optional canonicalize_an_opaque_pathname(String const& value) if (value.is_empty()) return value; - // 2. Let dummyURL be a new URL record. - URL dummy_url; + // 2. Let dummyURL be the result of creating a dummy URL. + auto dummy_url = create_a_dummy_url(); // 3. Set dummyURL’s path to the empty string. dummy_url.set_paths({ "" }); @@ -217,8 +224,8 @@ String canonicalize_a_search(String const& value) if (value.is_empty()) return value; - // 2. Let dummyURL be a new URL record. - URL dummy_url; + // 2. Let dummyURL be the result of creating a dummy URL. + auto dummy_url = create_a_dummy_url(); // 3. Set dummyURL’s query to the empty string. dummy_url.set_query(String {}); @@ -238,8 +245,8 @@ String canonicalize_a_hash(String const& value) if (value.is_empty()) return value; - // 2. Let dummyURL be a new URL record. - URL dummy_url; + // 2. Let dummyURL be the result of creating a dummy URL. + auto dummy_url = create_a_dummy_url(); // 3. Set dummyURL’s fragment to the empty string. dummy_url.set_fragment(String {}); diff --git a/Tests/LibWeb/Text/expected/wpt-import/urlpattern/urlpattern.any.txt b/Tests/LibWeb/Text/expected/wpt-import/urlpattern/urlpattern.any.txt index c43bb150237..06c928f6346 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/urlpattern/urlpattern.any.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/urlpattern/urlpattern.any.txt @@ -2,8 +2,8 @@ Harness status: OK Found 354 tests -346 Pass -8 Fail +351 Pass +3 Fail Pass Loading data... Pass Pattern: [{"pathname":"/foo/bar"}] Inputs: [{"pathname":"/foo/bar"}] Pass Pattern: [{"pathname":"/foo/bar"}] Inputs: [{"pathname":"/foo/ba"}] @@ -157,9 +157,9 @@ Pass Pattern: [{"username":"caf%c3%a9"}] Inputs: [{"username":"café"}] Pass Pattern: [{"password":"caf%C3%A9"}] Inputs: [{"password":"café"}] Pass Pattern: [{"password":"café"}] Inputs: [{"password":"café"}] Pass Pattern: [{"password":"caf%c3%a9"}] Inputs: [{"password":"café"}] -Fail Pattern: [{"hostname":"xn--caf-dma.com"}] Inputs: [{"hostname":"café.com"}] -Fail Pattern: [{"hostname":"café.com"}] Inputs: [{"hostname":"café.com"}] -Fail Pattern: ["http://í ½U+deb2.com/"] Inputs: ["http://í ½U+deb2.com/"] +Pass Pattern: [{"hostname":"xn--caf-dma.com"}] Inputs: [{"hostname":"café.com"}] +Pass Pattern: [{"hostname":"café.com"}] Inputs: [{"hostname":"café.com"}] +Pass Pattern: ["http://í ½U+deb2.com/"] Inputs: ["http://í ½U+deb2.com/"] Pass Pattern: ["http://\ud83d \udeb2"] Inputs: undefined Pass Pattern: [{"hostname":"\ud83d \udeb2"}] Inputs: undefined Pass Pattern: [{"pathname":"\ud83d \udeb2"}] Inputs: [] @@ -295,7 +295,7 @@ Pass Pattern: [{"protocol":":name+"}] Inputs: [{"protocol":"foobar"}] Pass Pattern: [{"protocol":":name"}] Inputs: [{"protocol":"foobar"}] Pass Pattern: [{"hostname":"bad hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad#hostname"}] Inputs: [{"hostname":"bad"}] -Fail Pattern: [{"hostname":"bad%hostname"}] Inputs: undefined +Pass Pattern: [{"hostname":"bad%hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad/hostname"}] Inputs: [{"hostname":"bad"}] Pass Pattern: [{"hostname":"bad\\:hostname"}] Inputs: undefined Pass Pattern: [{"hostname":"bad