LibURL+LibWeb: Ensure opaque paths always roundtrip

Corresponds to: https://github.com/whatwg/url/commit/6c782003
This commit is contained in:
Shannon Booth 2025-03-15 15:38:09 +13:00 committed by Sam Atkins
parent 01d1a9528b
commit ec3c545426
Notes: github-actions[bot] 2025-03-18 12:18:21 +00:00
17 changed files with 280 additions and 99 deletions

View file

@ -1,6 +1,6 @@
/*
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
* Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
* Copyright (c) 2023-2025, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -1558,23 +1558,32 @@ Optional<URL> Parser::basic_parse(StringView raw_input, Optional<URL const&> bas
buffer.clear();
state = State::Fragment;
}
// 3. Otherwise:
else {
// 1. If c is not the EOF code point, not a URL code point, and not U+0025 (%), invalid-URL-unit validation error.
if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%')
// 3. Otherwise, if c is U+0020 SPACE:
else if (code_point == ' ') {
// 1. If remaining starts with U+003F (?) or U+003F (#), then append "%20" to urls path.
if (auto remaining = get_remaining(); remaining.starts_with('?') || remaining.starts_with('#')) {
buffer.append("%20"sv);
}
// 2. Otherwise, append U+0020 SPACE to urls path.
else {
buffer.append(' ');
}
}
// 4. Otherwise, if c is not the EOF code point:
else if (code_point != end_of_file) {
// 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
if (!is_url_code_point(code_point) && code_point != '%')
report_validation_error();
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, invalid-URL-unit validation error.
if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits())
report_validation_error();
// 3. If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to urls path.
if (code_point != end_of_file) {
append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control);
} else {
url->m_data->paths[0] = buffer.to_string_without_validation();
buffer.clear();
}
// 3. UTF-8 percent-encode c using the C0 control percent-encode set and append the result to urls path.
append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control);
} else {
url->m_data->paths[0] = buffer.to_string_without_validation();
buffer.clear();
}
break;
// -> query state, https://url.spec.whatwg.org/#query-state

View file

@ -388,18 +388,10 @@ void DOMURL::set_search(String const& search)
// 1. Let url be thiss URL.
auto& url = m_url;
// 2. If the given value is the empty string:
// 2. If the given value is the empty string, then set urls query to null, empty thiss query objects list, and return.
if (search.is_empty()) {
// 1. Set urls query to null.
url.set_query({});
// 2. Empty thiss query objects list.
m_query->m_list.clear();
// 3. Potentially strip trailing spaces from an opaque path with this.
strip_trailing_spaces_from_an_opaque_path(*this);
// 4. Return.
return;
}
@ -438,15 +430,9 @@ String DOMURL::hash() const
// https://url.spec.whatwg.org/#ref-for-dom-url-hash%E2%91%A0
void DOMURL::set_hash(String const& hash)
{
// 1. If the given value is the empty string:
// 1. If the given value is the empty string, then set thiss URLs fragment to null and return.
if (hash.is_empty()) {
// 1. Set thiss URLs fragment to null.
m_url.set_fragment({});
// 2. Potentially strip trailing spaces from an opaque path with this.
strip_trailing_spaces_from_an_opaque_path(*this);
// 3. Return.
return;
}
@ -461,29 +447,6 @@ void DOMURL::set_hash(String const& hash)
(void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment);
}
// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
void strip_trailing_spaces_from_an_opaque_path(DOMURL& url)
{
// 1. If urls URL does not have an opaque path, then return.
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
if (!url.cannot_be_a_base_url())
return;
// 2. If urls URLs fragment is non-null, then return.
if (url.fragment().has_value())
return;
// 3. If urls URLs query is non-null, then return.
if (url.query().has_value())
return;
// 4. Remove all trailing U+0020 SPACE code points from urls URLs path.
// NOTE: At index 0 since the first step tells us that the URL only has one path segment.
auto opaque_path = url.path_segment_at_index(0);
auto trimmed_path = opaque_path.trim(" "sv, TrimMode::Right);
url.set_paths({ trimmed_path });
}
// https://url.spec.whatwg.org/#concept-url-parser
Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url, Optional<StringView> encoding)
{

View file

@ -2,7 +2,7 @@
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
* Copyright (c) 2021, the SerenityOS developers.
* Copyright (c) 2023, networkException <networkexception@serenityos.org>
* Copyright (c) 2024, Shannon Booth <shannon@serenityos.org>
* Copyright (c) 2024-2025, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -92,9 +92,6 @@ private:
GC::Ref<URLSearchParams> m_query;
};
// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
void strip_trailing_spaces_from_an_opaque_path(DOMURL& url);
// https://url.spec.whatwg.org/#concept-url-parser
Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url = {}, Optional<StringView> encoding = {});

View file

@ -1,6 +1,6 @@
/*
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
* Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
* Copyright (c) 2023-2025, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -229,10 +229,6 @@ void URLSearchParams::update()
// 4. Set querys URL objects URLs query to serializedQuery.
m_url->set_query({}, serialized_query);
// 5. If serializedQuery is null, then potentially strip trailing spaces from an opaque path with querys URL object.
if (!serialized_query.has_value())
strip_trailing_spaces_from_an_opaque_path(*m_url);
}
// https://url.spec.whatwg.org/#dom-urlsearchparams-delete

View file

@ -0,0 +1,4 @@
pathname => 'foobar %20'
pathname => 'foobar %20'
pathname => 'baz%20'
pathname => 'baz%20'

View file

@ -1,6 +1,6 @@
URL pathname is 'space '
URL href is 'data:space ?test'
URL pathname is 'space %20'
URL href is 'data:space %20?test'
true
false
URL pathname is 'space'
URL href is 'data:space'
URL pathname is 'space %20'
URL href is 'data:space %20'

View file

@ -1,4 +0,0 @@
pathname => 'foobar '
pathname => 'foobar'
pathname => 'baz '
pathname => 'baz'

View file

@ -1,8 +1,8 @@
Harness status: OK
Found 386 tests
Found 394 tests
386 Pass
394 Pass
Pass Loading data…
Pass Parsing origin: <http://example .
org> against <http://example.org/foo/bar>
@ -216,6 +216,14 @@ Pass Parsing origin: <//www.example2.com> against <http://www.example.com/test>
Pass Parsing origin: <http://ExAmPlE.CoM> against <http://other.com/>
Pass Parsing origin: <http://GOOgoo.com> against <http://other.com/>
Pass Parsing origin: < http://example.com/ > against <about:blank>
Pass Parsing origin: <non-special:opaque > against <about:blank>
Pass Parsing origin: <non-special:opaque ?hi> against <about:blank>
Pass Parsing origin: <non-special:opaque #hi> against <about:blank>
Pass Parsing origin: <non-special:opaque x?hi> against <about:blank>
Pass Parsing origin: <non-special:opaque x#hi> against <about:blank>
Pass Parsing origin: <non-special:opaque #hi> against <about:blank>
Pass Parsing origin: <non-special:opaque #hi> against <about:blank>
Pass Parsing origin: <non-special:opaque #hi> against <about:blank>
Pass Parsing origin: <http://www.foo。bar.com> against <http://other.com/>
Pass Parsing origin: <https://x/<2F>?<3F>#<23>> against <about:blank>
Pass Parsing origin: <http://.com> against <http://other.com/>

View file

@ -1,8 +1,8 @@
Harness status: OK
Found 863 tests
Found 871 tests
863 Pass
871 Pass
Pass Loading data…
Pass Parsing: <http://example .
org> against <http://example.org/foo/bar>
@ -283,6 +283,14 @@ Pass Parsing: <http://[:]> against <http://other.com/>
Pass Parsing: <http://GOO  goo.com> against <http://other.com/>
Pass Parsing: <http://GOOgoo.com> against <http://other.com/>
Pass Parsing: < http://example.com/ > without base
Pass Parsing: <non-special:opaque > without base
Pass Parsing: <non-special:opaque ?hi> without base
Pass Parsing: <non-special:opaque #hi> without base
Pass Parsing: <non-special:opaque x?hi> without base
Pass Parsing: <non-special:opaque x#hi> without base
Pass Parsing: <non-special:opaque #hi> without base
Pass Parsing: <non-special:opaque #hi> without base
Pass Parsing: <non-special:opaque #hi> without base
Pass Parsing: <http://www.foo。bar.com> against <http://other.com/>
Pass Parsing: <http://﷐zyx.com> against <http://other.com/>
Pass Parsing: <http://%ef%b7%90zyx.com> against <http://other.com/>

View file

@ -1,8 +1,8 @@
Harness status: OK
Found 387 tests
Found 395 tests
387 Pass
395 Pass
Pass Loading data…
Pass Origin parsing: <http://example .
org> against <http://example.org/foo/bar>
@ -216,6 +216,14 @@ Pass Origin parsing: <//www.example2.com> against <http://www.example.com/test>
Pass Origin parsing: <http://ExAmPlE.CoM> against <http://other.com/>
Pass Origin parsing: <http://GOOgoo.com> against <http://other.com/>
Pass Origin parsing: < http://example.com/ > without base
Pass Origin parsing: <non-special:opaque > without base
Pass Origin parsing: <non-special:opaque ?hi> without base
Pass Origin parsing: <non-special:opaque #hi> without base
Pass Origin parsing: <non-special:opaque x?hi> without base
Pass Origin parsing: <non-special:opaque x#hi> without base
Pass Origin parsing: <non-special:opaque #hi> without base
Pass Origin parsing: <non-special:opaque #hi> without base
Pass Origin parsing: <non-special:opaque #hi> without base
Pass Origin parsing: <http://www.foo。bar.com> against <http://other.com/>
Pass Origin parsing: <https://x/<2F>?<3F>#<23>> without base
Pass Origin parsing: <http://.com> against <http://other.com/>

View file

@ -7,7 +7,7 @@ Pass Delete basics
Pass Deleting appended multiple
Pass Deleting all params removes ? from URL
Pass Removing non-existent param removes ? from URL
Pass Changing the query of a URL with an opaque path can impact the path
Pass Changing the query of a URL with an opaque path can impact the path if the URL has no fragment
Pass Changing the query of a URL with an opaque path with trailing spaces
Pass Changing the query of a URL with an opaque path with trailing spaces and a fragment
Pass Two-argument delete()
Pass Two-argument delete() respects undefined as second arg

View file

@ -1141,6 +1141,42 @@
"host": "example.com",
"hostname": "example.com"
}
},
{
"href": "https://test.invalid/",
"new_value": "*",
"expected": {
"href": "https://*/",
"host": "*",
"hostname": "*"
}
},
{
"href": "https://test.invalid/",
"new_value": "x@x",
"expected": {
"href": "https://test.invalid/",
"host": "test.invalid",
"hostname": "test.invalid"
}
},
{
"href": "https://test.invalid/",
"new_value": "foo\t\r\nbar",
"expected": {
"href": "https://foobar/",
"host": "foobar",
"hostname": "foobar"
}
},
{
"href": "https://test.invalid/",
"new_value": "><",
"expected": {
"href": "https://test.invalid/",
"host": "test.invalid",
"hostname": "test.invalid"
}
}
],
"hostname": [
@ -1552,6 +1588,42 @@
"host": "example.com",
"hostname": "example.com"
}
},
{
"href": "https://test.invalid/",
"new_value": "*",
"expected": {
"href": "https://*/",
"host": "*",
"hostname": "*"
}
},
{
"href": "https://test.invalid/",
"new_value": "x@x",
"expected": {
"href": "https://test.invalid/",
"host": "test.invalid",
"hostname": "test.invalid"
}
},
{
"href": "https://test.invalid/",
"new_value": "foo\t\r\nbar",
"expected": {
"href": "https://foobar/",
"host": "foobar",
"hostname": "foobar"
}
},
{
"href": "https://test.invalid/",
"new_value": "><",
"expected": {
"href": "https://test.invalid/",
"host": "test.invalid",
"hostname": "test.invalid"
}
}
],
"port": [
@ -2169,12 +2241,12 @@
}
},
{
"comment": "Drop trailing spaces from trailing opaque paths",
"comment": "Trailing spaces and opaque paths",
"href": "data:space ?query",
"new_value": "",
"expected": {
"href": "data:space",
"pathname": "space",
"href": "data:space%20",
"pathname": "space%20",
"search": ""
}
},
@ -2182,17 +2254,17 @@
"href": "sc:space ?query",
"new_value": "",
"expected": {
"href": "sc:space",
"pathname": "space",
"href": "sc:space%20",
"pathname": "space%20",
"search": ""
}
},
{
"comment": "Do not drop trailing spaces from non-trailing opaque paths",
"comment": "Trailing spaces and opaque paths",
"href": "data:space ?query#fragment",
"new_value": "",
"expected": {
"href": "data:space #fragment",
"href": "data:space %20#fragment",
"search": ""
}
},
@ -2200,7 +2272,7 @@
"href": "sc:space ?query#fragment",
"new_value": "",
"expected": {
"href": "sc:space #fragment",
"href": "sc:space %20#fragment",
"search": ""
}
},
@ -2357,12 +2429,12 @@
}
},
{
"comment": "Drop trailing spaces from trailing opaque paths",
"comment": "Trailing spaces and opaque paths",
"href": "data:space #fragment",
"new_value": "",
"expected": {
"href": "data:space",
"pathname": "space",
"href": "data:space %20",
"pathname": "space %20",
"hash": ""
}
},
@ -2370,17 +2442,17 @@
"href": "sc:space #fragment",
"new_value": "",
"expected": {
"href": "sc:space",
"pathname": "space",
"href": "sc:space %20",
"pathname": "space %20",
"hash": ""
}
},
{
"comment": "Do not drop trailing spaces from non-trailing opaque paths",
"comment": "Trailing spaces and opaque paths",
"href": "data:space ?query#fragment",
"new_value": "",
"expected": {
"href": "data:space ?query",
"href": "data:space %20?query",
"hash": ""
}
},
@ -2388,7 +2460,7 @@
"href": "sc:space ?query#fragment",
"new_value": "",
"expected": {
"href": "sc:space ?query",
"href": "sc:space %20?query",
"hash": ""
}
},

View file

@ -3778,6 +3778,126 @@
"search": "",
"hash": ""
},
{
"input": "non-special:opaque ",
"base": null,
"href": "non-special:opaque",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque",
"search": "",
"hash": ""
},
{
"input": "non-special:opaque ?hi",
"base": null,
"href": "non-special:opaque %20?hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "?hi",
"hash": ""
},
{
"input": "non-special:opaque #hi",
"base": null,
"href": "non-special:opaque %20#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "",
"hash": "#hi"
},
{
"input": "non-special:opaque x?hi",
"base": null,
"href": "non-special:opaque x?hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque x",
"search": "?hi",
"hash": ""
},
{
"input": "non-special:opaque x#hi",
"base": null,
"href": "non-special:opaque x#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque x",
"search": "",
"hash": "#hi"
},
{
"input": "non-special:opaque \t\t \t#hi",
"base": null,
"href": "non-special:opaque %20#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "",
"hash": "#hi"
},
{
"input": "non-special:opaque \t\t #hi",
"base": null,
"href": "non-special:opaque %20#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "",
"hash": "#hi"
},
{
"input": "non-special:opaque\t\t \r #hi",
"base": null,
"href": "non-special:opaque %20#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "",
"hash": "#hi"
},
"Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)",
{
"input": "http://www.foo。bar.com",

View file

@ -50,17 +50,17 @@ test(() => {
url.searchParams.delete('test');
assert_false(url.searchParams.has('test'));
assert_equals(url.search, '');
assert_equals(url.pathname, 'space');
assert_equals(url.href, 'data:space');
}, 'Changing the query of a URL with an opaque path can impact the path');
assert_equals(url.pathname, 'space %20');
assert_equals(url.href, 'data:space %20');
}, 'Changing the query of a URL with an opaque path with trailing spaces');
test(() => {
const url = new URL('data:space ?test#test');
url.searchParams.delete('test');
assert_equals(url.search, '');
assert_equals(url.pathname, 'space ');
assert_equals(url.href, 'data:space #test');
}, 'Changing the query of a URL with an opaque path can impact the path if the URL has no fragment');
assert_equals(url.pathname, 'space %20');
assert_equals(url.href, 'data:space %20#test');
}, 'Changing the query of a URL with an opaque path with trailing spaces and a fragment');
test(() => {
const params = new URLSearchParams();