diff --git a/Libraries/LibURL/Parser.cpp b/Libraries/LibURL/Parser.cpp index 9c0b3638b2c..4594238410a 100644 --- a/Libraries/LibURL/Parser.cpp +++ b/Libraries/LibURL/Parser.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2021, Max Wipfli - * Copyright (c) 2023-2024, Shannon Booth + * Copyright (c) 2023-2025, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ @@ -1558,23 +1558,32 @@ Optional Parser::basic_parse(StringView raw_input, Optional bas buffer.clear(); state = State::Fragment; } - // 3. Otherwise: - else { - // 1. If c is not the EOF code point, not a URL code point, and not U+0025 (%), invalid-URL-unit validation error. - if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%') + // 3. Otherwise, if c is U+0020 SPACE: + else if (code_point == ' ') { + // 1. If remaining starts with U+003F (?) or U+003F (#), then append "%20" to url’s path. + if (auto remaining = get_remaining(); remaining.starts_with('?') || remaining.starts_with('#')) { + buffer.append("%20"sv); + } + // 2. Otherwise, append U+0020 SPACE to url’s path. + else { + buffer.append(' '); + } + } + // 4. Otherwise, if c is not the EOF code point: + else if (code_point != end_of_file) { + // 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error. + if (!is_url_code_point(code_point) && code_point != '%') report_validation_error(); - // 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + // 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, invalid-URL-unit validation error. if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits()) report_validation_error(); - // 3. If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path. - if (code_point != end_of_file) { - append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control); - } else { - url->m_data->paths[0] = buffer.to_string_without_validation(); - buffer.clear(); - } + // 3. UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path. + append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control); + } else { + url->m_data->paths[0] = buffer.to_string_without_validation(); + buffer.clear(); } break; // -> query state, https://url.spec.whatwg.org/#query-state diff --git a/Libraries/LibWeb/DOMURL/DOMURL.cpp b/Libraries/LibWeb/DOMURL/DOMURL.cpp index 7945a799abb..f30b6fdc0e1 100644 --- a/Libraries/LibWeb/DOMURL/DOMURL.cpp +++ b/Libraries/LibWeb/DOMURL/DOMURL.cpp @@ -388,18 +388,10 @@ void DOMURL::set_search(String const& search) // 1. Let url be this’s URL. auto& url = m_url; - // 2. If the given value is the empty string: + // 2. If the given value is the empty string, then set url’s query to null, empty this’s query object’s list, and return. if (search.is_empty()) { - // 1. Set url’s query to null. url.set_query({}); - - // 2. Empty this’s query object’s list. m_query->m_list.clear(); - - // 3. Potentially strip trailing spaces from an opaque path with this. - strip_trailing_spaces_from_an_opaque_path(*this); - - // 4. Return. return; } @@ -438,15 +430,9 @@ String DOMURL::hash() const // https://url.spec.whatwg.org/#ref-for-dom-url-hash%E2%91%A0 void DOMURL::set_hash(String const& hash) { - // 1. If the given value is the empty string: + // 1. If the given value is the empty string, then set this’s URL’s fragment to null and return. if (hash.is_empty()) { - // 1. Set this’s URL’s fragment to null. m_url.set_fragment({}); - - // 2. Potentially strip trailing spaces from an opaque path with this. - strip_trailing_spaces_from_an_opaque_path(*this); - - // 3. Return. return; } @@ -461,29 +447,6 @@ void DOMURL::set_hash(String const& hash) (void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment); } -// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path -void strip_trailing_spaces_from_an_opaque_path(DOMURL& url) -{ - // 1. If url’s URL does not have an opaque path, then return. - // FIXME: Reimplement this step once we modernize the URL implementation to meet the spec. - if (!url.cannot_be_a_base_url()) - return; - - // 2. If url’s URL’s fragment is non-null, then return. - if (url.fragment().has_value()) - return; - - // 3. If url’s URL’s query is non-null, then return. - if (url.query().has_value()) - return; - - // 4. Remove all trailing U+0020 SPACE code points from url’s URL’s path. - // NOTE: At index 0 since the first step tells us that the URL only has one path segment. - auto opaque_path = url.path_segment_at_index(0); - auto trimmed_path = opaque_path.trim(" "sv, TrimMode::Right); - url.set_paths({ trimmed_path }); -} - // https://url.spec.whatwg.org/#concept-url-parser Optional parse(StringView input, Optional base_url, Optional encoding) { diff --git a/Libraries/LibWeb/DOMURL/DOMURL.h b/Libraries/LibWeb/DOMURL/DOMURL.h index 3dd016dc74a..66e0d958fdd 100644 --- a/Libraries/LibWeb/DOMURL/DOMURL.h +++ b/Libraries/LibWeb/DOMURL/DOMURL.h @@ -2,7 +2,7 @@ * Copyright (c) 2021, Idan Horowitz * Copyright (c) 2021, the SerenityOS developers. * Copyright (c) 2023, networkException - * Copyright (c) 2024, Shannon Booth + * Copyright (c) 2024-2025, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ @@ -92,9 +92,6 @@ private: GC::Ref m_query; }; -// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path -void strip_trailing_spaces_from_an_opaque_path(DOMURL& url); - // https://url.spec.whatwg.org/#concept-url-parser Optional parse(StringView input, Optional base_url = {}, Optional encoding = {}); diff --git a/Libraries/LibWeb/DOMURL/URLSearchParams.cpp b/Libraries/LibWeb/DOMURL/URLSearchParams.cpp index 2a9dc7d7475..9dac122d356 100644 --- a/Libraries/LibWeb/DOMURL/URLSearchParams.cpp +++ b/Libraries/LibWeb/DOMURL/URLSearchParams.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2021, Idan Horowitz - * Copyright (c) 2023-2024, Shannon Booth + * Copyright (c) 2023-2025, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ @@ -229,10 +229,6 @@ void URLSearchParams::update() // 4. Set query’s URL object’s URL’s query to serializedQuery. m_url->set_query({}, serialized_query); - - // 5. If serializedQuery is null, then potentially strip trailing spaces from an opaque path with query’s URL object. - if (!serialized_query.has_value()) - strip_trailing_spaces_from_an_opaque_path(*m_url); } // https://url.spec.whatwg.org/#dom-urlsearchparams-delete diff --git a/Tests/LibWeb/Text/expected/URL/opaque_paths_roundtrip.txt b/Tests/LibWeb/Text/expected/URL/opaque_paths_roundtrip.txt new file mode 100644 index 00000000000..6d17a6b22cc --- /dev/null +++ b/Tests/LibWeb/Text/expected/URL/opaque_paths_roundtrip.txt @@ -0,0 +1,4 @@ +pathname => 'foobar %20' +pathname => 'foobar %20' +pathname => 'baz%20' +pathname => 'baz%20' diff --git a/Tests/LibWeb/Text/expected/URL/search-params-strip-trailing-spaces-from-opaque-url.txt b/Tests/LibWeb/Text/expected/URL/search-params-strip-trailing-spaces-from-opaque-url.txt index 8b09e1ec3a7..1e0e2886bc9 100644 --- a/Tests/LibWeb/Text/expected/URL/search-params-strip-trailing-spaces-from-opaque-url.txt +++ b/Tests/LibWeb/Text/expected/URL/search-params-strip-trailing-spaces-from-opaque-url.txt @@ -1,6 +1,6 @@ -URL pathname is 'space ' -URL href is 'data:space ?test' +URL pathname is 'space %20' +URL href is 'data:space %20?test' true false -URL pathname is 'space' -URL href is 'data:space' +URL pathname is 'space %20' +URL href is 'data:space %20' diff --git a/Tests/LibWeb/Text/expected/URL/strip_trailing_spaces_from_opaque_path.txt b/Tests/LibWeb/Text/expected/URL/strip_trailing_spaces_from_opaque_path.txt deleted file mode 100644 index 817503998b0..00000000000 --- a/Tests/LibWeb/Text/expected/URL/strip_trailing_spaces_from_opaque_path.txt +++ /dev/null @@ -1,4 +0,0 @@ -pathname => 'foobar ' -pathname => 'foobar' -pathname => 'baz ' -pathname => 'baz' diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/a-element-origin.txt b/Tests/LibWeb/Text/expected/wpt-import/url/a-element-origin.txt index a7fb0cdf031..835c2f0c45b 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/url/a-element-origin.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/url/a-element-origin.txt @@ -1,8 +1,8 @@ Harness status: OK -Found 386 tests +Found 394 tests -386 Pass +394 Pass Pass Loading data… Pass Parsing origin: against @@ -216,6 +216,14 @@ Pass Parsing origin: against Pass Parsing origin: against Pass Parsing origin: against Pass Parsing origin: < http://example.com/ > against +Pass Parsing origin: against +Pass Parsing origin: against +Pass Parsing origin: against +Pass Parsing origin: against +Pass Parsing origin: against +Pass Parsing origin: against +Pass Parsing origin: against +Pass Parsing origin: against Pass Parsing origin: against Pass Parsing origin: against Pass Parsing origin: against diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/url-constructor.any.txt b/Tests/LibWeb/Text/expected/wpt-import/url/url-constructor.any.txt index cd1ce01cb75..ee3f3b17030 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/url/url-constructor.any.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/url/url-constructor.any.txt @@ -1,8 +1,8 @@ Harness status: OK -Found 863 tests +Found 871 tests -863 Pass +871 Pass Pass Loading data… Pass Parsing: against @@ -283,6 +283,14 @@ Pass Parsing: against Pass Parsing: against Pass Parsing: against Pass Parsing: < http://example.com/ > without base +Pass Parsing: without base +Pass Parsing: without base +Pass Parsing: without base +Pass Parsing: without base +Pass Parsing: without base +Pass Parsing: without base +Pass Parsing: without base +Pass Parsing: without base Pass Parsing: against Pass Parsing: against Pass Parsing: against diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/url-origin.any.txt b/Tests/LibWeb/Text/expected/wpt-import/url/url-origin.any.txt index 91df4045059..bcf29a5e573 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/url/url-origin.any.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/url/url-origin.any.txt @@ -1,8 +1,8 @@ Harness status: OK -Found 387 tests +Found 395 tests -387 Pass +395 Pass Pass Loading data… Pass Origin parsing: against @@ -216,6 +216,14 @@ Pass Origin parsing: against Pass Origin parsing: against Pass Origin parsing: against Pass Origin parsing: < http://example.com/ > without base +Pass Origin parsing: without base +Pass Origin parsing: without base +Pass Origin parsing: without base +Pass Origin parsing: without base +Pass Origin parsing: without base +Pass Origin parsing: without base +Pass Origin parsing: without base +Pass Origin parsing: without base Pass Origin parsing: against Pass Origin parsing: without base Pass Origin parsing: against diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/url-setters-a-area.window.txt b/Tests/LibWeb/Text/expected/wpt-import/url/url-setters-a-area.window.txt index 333b5c4f1d0..991f0ef23cb 100644 Binary files a/Tests/LibWeb/Text/expected/wpt-import/url/url-setters-a-area.window.txt and b/Tests/LibWeb/Text/expected/wpt-import/url/url-setters-a-area.window.txt differ diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/url-setters.any.txt b/Tests/LibWeb/Text/expected/wpt-import/url/url-setters.any.txt index 63c044ee178..8eea2b8ba1b 100644 Binary files a/Tests/LibWeb/Text/expected/wpt-import/url/url-setters.any.txt and b/Tests/LibWeb/Text/expected/wpt-import/url/url-setters.any.txt differ diff --git a/Tests/LibWeb/Text/expected/wpt-import/url/urlsearchparams-delete.any.txt b/Tests/LibWeb/Text/expected/wpt-import/url/urlsearchparams-delete.any.txt index 5ca570d123a..66a003d1f9b 100644 --- a/Tests/LibWeb/Text/expected/wpt-import/url/urlsearchparams-delete.any.txt +++ b/Tests/LibWeb/Text/expected/wpt-import/url/urlsearchparams-delete.any.txt @@ -7,7 +7,7 @@ Pass Delete basics Pass Deleting appended multiple Pass Deleting all params removes ? from URL Pass Removing non-existent param removes ? from URL -Pass Changing the query of a URL with an opaque path can impact the path -Pass Changing the query of a URL with an opaque path can impact the path if the URL has no fragment +Pass Changing the query of a URL with an opaque path with trailing spaces +Pass Changing the query of a URL with an opaque path with trailing spaces and a fragment Pass Two-argument delete() Pass Two-argument delete() respects undefined as second arg \ No newline at end of file diff --git a/Tests/LibWeb/Text/input/URL/strip_trailing_spaces_from_opaque_path.html b/Tests/LibWeb/Text/input/URL/opaque_paths_roundtrip.html similarity index 100% rename from Tests/LibWeb/Text/input/URL/strip_trailing_spaces_from_opaque_path.html rename to Tests/LibWeb/Text/input/URL/opaque_paths_roundtrip.html diff --git a/Tests/LibWeb/Text/input/wpt-import/url/resources/setters_tests.json b/Tests/LibWeb/Text/input/wpt-import/url/resources/setters_tests.json index efd548b6c88..cf8a420caab 100644 --- a/Tests/LibWeb/Text/input/wpt-import/url/resources/setters_tests.json +++ b/Tests/LibWeb/Text/input/wpt-import/url/resources/setters_tests.json @@ -1141,6 +1141,42 @@ "host": "example.com", "hostname": "example.com" } + }, + { + "href": "https://test.invalid/", + "new_value": "*", + "expected": { + "href": "https://*/", + "host": "*", + "hostname": "*" + } + }, + { + "href": "https://test.invalid/", + "new_value": "x@x", + "expected": { + "href": "https://test.invalid/", + "host": "test.invalid", + "hostname": "test.invalid" + } + }, + { + "href": "https://test.invalid/", + "new_value": "foo\t\r\nbar", + "expected": { + "href": "https://foobar/", + "host": "foobar", + "hostname": "foobar" + } + }, + { + "href": "https://test.invalid/", + "new_value": "><", + "expected": { + "href": "https://test.invalid/", + "host": "test.invalid", + "hostname": "test.invalid" + } } ], "hostname": [ @@ -1552,6 +1588,42 @@ "host": "example.com", "hostname": "example.com" } + }, + { + "href": "https://test.invalid/", + "new_value": "*", + "expected": { + "href": "https://*/", + "host": "*", + "hostname": "*" + } + }, + { + "href": "https://test.invalid/", + "new_value": "x@x", + "expected": { + "href": "https://test.invalid/", + "host": "test.invalid", + "hostname": "test.invalid" + } + }, + { + "href": "https://test.invalid/", + "new_value": "foo\t\r\nbar", + "expected": { + "href": "https://foobar/", + "host": "foobar", + "hostname": "foobar" + } + }, + { + "href": "https://test.invalid/", + "new_value": "><", + "expected": { + "href": "https://test.invalid/", + "host": "test.invalid", + "hostname": "test.invalid" + } } ], "port": [ @@ -2169,12 +2241,12 @@ } }, { - "comment": "Drop trailing spaces from trailing opaque paths", + "comment": "Trailing spaces and opaque paths", "href": "data:space ?query", "new_value": "", "expected": { - "href": "data:space", - "pathname": "space", + "href": "data:space%20", + "pathname": "space%20", "search": "" } }, @@ -2182,17 +2254,17 @@ "href": "sc:space ?query", "new_value": "", "expected": { - "href": "sc:space", - "pathname": "space", + "href": "sc:space%20", + "pathname": "space%20", "search": "" } }, { - "comment": "Do not drop trailing spaces from non-trailing opaque paths", + "comment": "Trailing spaces and opaque paths", "href": "data:space ?query#fragment", "new_value": "", "expected": { - "href": "data:space #fragment", + "href": "data:space %20#fragment", "search": "" } }, @@ -2200,7 +2272,7 @@ "href": "sc:space ?query#fragment", "new_value": "", "expected": { - "href": "sc:space #fragment", + "href": "sc:space %20#fragment", "search": "" } }, @@ -2357,12 +2429,12 @@ } }, { - "comment": "Drop trailing spaces from trailing opaque paths", + "comment": "Trailing spaces and opaque paths", "href": "data:space #fragment", "new_value": "", "expected": { - "href": "data:space", - "pathname": "space", + "href": "data:space %20", + "pathname": "space %20", "hash": "" } }, @@ -2370,17 +2442,17 @@ "href": "sc:space #fragment", "new_value": "", "expected": { - "href": "sc:space", - "pathname": "space", + "href": "sc:space %20", + "pathname": "space %20", "hash": "" } }, { - "comment": "Do not drop trailing spaces from non-trailing opaque paths", + "comment": "Trailing spaces and opaque paths", "href": "data:space ?query#fragment", "new_value": "", "expected": { - "href": "data:space ?query", + "href": "data:space %20?query", "hash": "" } }, @@ -2388,7 +2460,7 @@ "href": "sc:space ?query#fragment", "new_value": "", "expected": { - "href": "sc:space ?query", + "href": "sc:space %20?query", "hash": "" } }, diff --git a/Tests/LibWeb/Text/input/wpt-import/url/resources/urltestdata.json b/Tests/LibWeb/Text/input/wpt-import/url/resources/urltestdata.json index 214ed0852aa..d1a06f6319d 100644 --- a/Tests/LibWeb/Text/input/wpt-import/url/resources/urltestdata.json +++ b/Tests/LibWeb/Text/input/wpt-import/url/resources/urltestdata.json @@ -3778,6 +3778,126 @@ "search": "", "hash": "" }, + { + "input": "non-special:opaque ", + "base": null, + "href": "non-special:opaque", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque", + "search": "", + "hash": "" + }, + { + "input": "non-special:opaque ?hi", + "base": null, + "href": "non-special:opaque %20?hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "?hi", + "hash": "" + }, + { + "input": "non-special:opaque #hi", + "base": null, + "href": "non-special:opaque %20#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "", + "hash": "#hi" + }, + { + "input": "non-special:opaque x?hi", + "base": null, + "href": "non-special:opaque x?hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque x", + "search": "?hi", + "hash": "" + }, + { + "input": "non-special:opaque x#hi", + "base": null, + "href": "non-special:opaque x#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque x", + "search": "", + "hash": "#hi" + }, + { + "input": "non-special:opaque \t\t \t#hi", + "base": null, + "href": "non-special:opaque %20#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "", + "hash": "#hi" + }, + { + "input": "non-special:opaque \t\t #hi", + "base": null, + "href": "non-special:opaque %20#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "", + "hash": "#hi" + }, + { + "input": "non-special:opaque\t\t \r #hi", + "base": null, + "href": "non-special:opaque %20#hi", + "origin": "null", + "protocol": "non-special:", + "username": "", + "password": "", + "host": "", + "hostname": "", + "port": "", + "pathname": "opaque %20", + "search": "", + "hash": "#hi" + }, "Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)", { "input": "http://www.foo。bar.com", diff --git a/Tests/LibWeb/Text/input/wpt-import/url/urlsearchparams-delete.any.js b/Tests/LibWeb/Text/input/wpt-import/url/urlsearchparams-delete.any.js index c597142c51d..09a5dccb648 100644 --- a/Tests/LibWeb/Text/input/wpt-import/url/urlsearchparams-delete.any.js +++ b/Tests/LibWeb/Text/input/wpt-import/url/urlsearchparams-delete.any.js @@ -50,17 +50,17 @@ test(() => { url.searchParams.delete('test'); assert_false(url.searchParams.has('test')); assert_equals(url.search, ''); - assert_equals(url.pathname, 'space'); - assert_equals(url.href, 'data:space'); -}, 'Changing the query of a URL with an opaque path can impact the path'); + assert_equals(url.pathname, 'space %20'); + assert_equals(url.href, 'data:space %20'); +}, 'Changing the query of a URL with an opaque path with trailing spaces'); test(() => { const url = new URL('data:space ?test#test'); url.searchParams.delete('test'); assert_equals(url.search, ''); - assert_equals(url.pathname, 'space '); - assert_equals(url.href, 'data:space #test'); -}, 'Changing the query of a URL with an opaque path can impact the path if the URL has no fragment'); + assert_equals(url.pathname, 'space %20'); + assert_equals(url.href, 'data:space %20#test'); +}, 'Changing the query of a URL with an opaque path with trailing spaces and a fragment'); test(() => { const params = new URLSearchParams();