LibURL+LibWeb: Ensure opaque paths always roundtrip

Corresponds to: 6c782003
This commit is contained in:
Shannon Booth 2025-03-15 15:38:09 +13:00 committed by Sam Atkins
commit ec3c545426
Notes: github-actions[bot] 2025-03-18 12:18:21 +00:00
17 changed files with 280 additions and 99 deletions

View file

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch> * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
* Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org> * Copyright (c) 2023-2025, Shannon Booth <shannon@serenityos.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -1558,23 +1558,32 @@ Optional<URL> Parser::basic_parse(StringView raw_input, Optional<URL const&> bas
buffer.clear(); buffer.clear();
state = State::Fragment; state = State::Fragment;
} }
// 3. Otherwise: // 3. Otherwise, if c is U+0020 SPACE:
else { else if (code_point == ' ') {
// 1. If c is not the EOF code point, not a URL code point, and not U+0025 (%), invalid-URL-unit validation error. // 1. If remaining starts with U+003F (?) or U+003F (#), then append "%20" to urls path.
if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%') if (auto remaining = get_remaining(); remaining.starts_with('?') || remaining.starts_with('#')) {
buffer.append("%20"sv);
}
// 2. Otherwise, append U+0020 SPACE to urls path.
else {
buffer.append(' ');
}
}
// 4. Otherwise, if c is not the EOF code point:
else if (code_point != end_of_file) {
// 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
if (!is_url_code_point(code_point) && code_point != '%')
report_validation_error(); report_validation_error();
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. // 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, invalid-URL-unit validation error.
if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits()) if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits())
report_validation_error(); report_validation_error();
// 3. If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to urls path. // 3. UTF-8 percent-encode c using the C0 control percent-encode set and append the result to urls path.
if (code_point != end_of_file) { append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control);
append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control); } else {
} else { url->m_data->paths[0] = buffer.to_string_without_validation();
url->m_data->paths[0] = buffer.to_string_without_validation(); buffer.clear();
buffer.clear();
}
} }
break; break;
// -> query state, https://url.spec.whatwg.org/#query-state // -> query state, https://url.spec.whatwg.org/#query-state

View file

@ -388,18 +388,10 @@ void DOMURL::set_search(String const& search)
// 1. Let url be thiss URL. // 1. Let url be thiss URL.
auto& url = m_url; auto& url = m_url;
// 2. If the given value is the empty string: // 2. If the given value is the empty string, then set urls query to null, empty thiss query objects list, and return.
if (search.is_empty()) { if (search.is_empty()) {
// 1. Set urls query to null.
url.set_query({}); url.set_query({});
// 2. Empty thiss query objects list.
m_query->m_list.clear(); m_query->m_list.clear();
// 3. Potentially strip trailing spaces from an opaque path with this.
strip_trailing_spaces_from_an_opaque_path(*this);
// 4. Return.
return; return;
} }
@ -438,15 +430,9 @@ String DOMURL::hash() const
// https://url.spec.whatwg.org/#ref-for-dom-url-hash%E2%91%A0 // https://url.spec.whatwg.org/#ref-for-dom-url-hash%E2%91%A0
void DOMURL::set_hash(String const& hash) void DOMURL::set_hash(String const& hash)
{ {
// 1. If the given value is the empty string: // 1. If the given value is the empty string, then set thiss URLs fragment to null and return.
if (hash.is_empty()) { if (hash.is_empty()) {
// 1. Set thiss URLs fragment to null.
m_url.set_fragment({}); m_url.set_fragment({});
// 2. Potentially strip trailing spaces from an opaque path with this.
strip_trailing_spaces_from_an_opaque_path(*this);
// 3. Return.
return; return;
} }
@ -461,29 +447,6 @@ void DOMURL::set_hash(String const& hash)
(void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment); (void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment);
} }
// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
void strip_trailing_spaces_from_an_opaque_path(DOMURL& url)
{
// 1. If urls URL does not have an opaque path, then return.
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
if (!url.cannot_be_a_base_url())
return;
// 2. If urls URLs fragment is non-null, then return.
if (url.fragment().has_value())
return;
// 3. If urls URLs query is non-null, then return.
if (url.query().has_value())
return;
// 4. Remove all trailing U+0020 SPACE code points from urls URLs path.
// NOTE: At index 0 since the first step tells us that the URL only has one path segment.
auto opaque_path = url.path_segment_at_index(0);
auto trimmed_path = opaque_path.trim(" "sv, TrimMode::Right);
url.set_paths({ trimmed_path });
}
// https://url.spec.whatwg.org/#concept-url-parser // https://url.spec.whatwg.org/#concept-url-parser
Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url, Optional<StringView> encoding) Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url, Optional<StringView> encoding)
{ {

View file

@ -2,7 +2,7 @@
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org> * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
* Copyright (c) 2021, the SerenityOS developers. * Copyright (c) 2021, the SerenityOS developers.
* Copyright (c) 2023, networkException <networkexception@serenityos.org> * Copyright (c) 2023, networkException <networkexception@serenityos.org>
* Copyright (c) 2024, Shannon Booth <shannon@serenityos.org> * Copyright (c) 2024-2025, Shannon Booth <shannon@serenityos.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -92,9 +92,6 @@ private:
GC::Ref<URLSearchParams> m_query; GC::Ref<URLSearchParams> m_query;
}; };
// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
void strip_trailing_spaces_from_an_opaque_path(DOMURL& url);
// https://url.spec.whatwg.org/#concept-url-parser // https://url.spec.whatwg.org/#concept-url-parser
Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url = {}, Optional<StringView> encoding = {}); Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url = {}, Optional<StringView> encoding = {});

View file

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org> * Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
* Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org> * Copyright (c) 2023-2025, Shannon Booth <shannon@serenityos.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -229,10 +229,6 @@ void URLSearchParams::update()
// 4. Set querys URL objects URLs query to serializedQuery. // 4. Set querys URL objects URLs query to serializedQuery.
m_url->set_query({}, serialized_query); m_url->set_query({}, serialized_query);
// 5. If serializedQuery is null, then potentially strip trailing spaces from an opaque path with querys URL object.
if (!serialized_query.has_value())
strip_trailing_spaces_from_an_opaque_path(*m_url);
} }
// https://url.spec.whatwg.org/#dom-urlsearchparams-delete // https://url.spec.whatwg.org/#dom-urlsearchparams-delete

View file

@ -0,0 +1,4 @@
pathname => 'foobar %20'
pathname => 'foobar %20'
pathname => 'baz%20'
pathname => 'baz%20'

View file

@ -1,6 +1,6 @@
URL pathname is 'space ' URL pathname is 'space %20'
URL href is 'data:space ?test' URL href is 'data:space %20?test'
true true
false false
URL pathname is 'space' URL pathname is 'space %20'
URL href is 'data:space' URL href is 'data:space %20'

View file

@ -1,4 +0,0 @@
pathname => 'foobar '
pathname => 'foobar'
pathname => 'baz '
pathname => 'baz'

View file

@ -1,8 +1,8 @@
Harness status: OK Harness status: OK
Found 386 tests Found 394 tests
386 Pass 394 Pass
Pass Loading data… Pass Loading data…
Pass Parsing origin: <http://example . Pass Parsing origin: <http://example .
org> against <http://example.org/foo/bar> org> against <http://example.org/foo/bar>
@ -216,6 +216,14 @@ Pass Parsing origin: <//www.example2.com> against <http://www.example.com/test>
Pass Parsing origin: <http://ExAmPlE.CoM> against <http://other.com/> Pass Parsing origin: <http://ExAmPlE.CoM> against <http://other.com/>
Pass Parsing origin: <http://GOOgoo.com> against <http://other.com/> Pass Parsing origin: <http://GOOgoo.com> against <http://other.com/>
Pass Parsing origin: < http://example.com/ > against <about:blank> Pass Parsing origin: < http://example.com/ > against <about:blank>
Pass Parsing origin: <non-special:opaque > against <about:blank>
Pass Parsing origin: <non-special:opaque ?hi> against <about:blank>
Pass Parsing origin: <non-special:opaque #hi> against <about:blank>
Pass Parsing origin: <non-special:opaque x?hi> against <about:blank>
Pass Parsing origin: <non-special:opaque x#hi> against <about:blank>
Pass Parsing origin: <non-special:opaque #hi> against <about:blank>
Pass Parsing origin: <non-special:opaque #hi> against <about:blank>
Pass Parsing origin: <non-special:opaque #hi> against <about:blank>
Pass Parsing origin: <http://www.foo。bar.com> against <http://other.com/> Pass Parsing origin: <http://www.foo。bar.com> against <http://other.com/>
Pass Parsing origin: <https://x/<2F>?<3F>#<23>> against <about:blank> Pass Parsing origin: <https://x/<2F>?<3F>#<23>> against <about:blank>
Pass Parsing origin: <http://.com> against <http://other.com/> Pass Parsing origin: <http://.com> against <http://other.com/>

View file

@ -1,8 +1,8 @@
Harness status: OK Harness status: OK
Found 863 tests Found 871 tests
863 Pass 871 Pass
Pass Loading data… Pass Loading data…
Pass Parsing: <http://example . Pass Parsing: <http://example .
org> against <http://example.org/foo/bar> org> against <http://example.org/foo/bar>
@ -283,6 +283,14 @@ Pass Parsing: <http://[:]> against <http://other.com/>
Pass Parsing: <http://GOO  goo.com> against <http://other.com/> Pass Parsing: <http://GOO  goo.com> against <http://other.com/>
Pass Parsing: <http://GOOgoo.com> against <http://other.com/> Pass Parsing: <http://GOOgoo.com> against <http://other.com/>
Pass Parsing: < http://example.com/ > without base Pass Parsing: < http://example.com/ > without base
Pass Parsing: <non-special:opaque > without base
Pass Parsing: <non-special:opaque ?hi> without base
Pass Parsing: <non-special:opaque #hi> without base
Pass Parsing: <non-special:opaque x?hi> without base
Pass Parsing: <non-special:opaque x#hi> without base
Pass Parsing: <non-special:opaque #hi> without base
Pass Parsing: <non-special:opaque #hi> without base
Pass Parsing: <non-special:opaque #hi> without base
Pass Parsing: <http://www.foo。bar.com> against <http://other.com/> Pass Parsing: <http://www.foo。bar.com> against <http://other.com/>
Pass Parsing: <http://﷐zyx.com> against <http://other.com/> Pass Parsing: <http://﷐zyx.com> against <http://other.com/>
Pass Parsing: <http://%ef%b7%90zyx.com> against <http://other.com/> Pass Parsing: <http://%ef%b7%90zyx.com> against <http://other.com/>

View file

@ -1,8 +1,8 @@
Harness status: OK Harness status: OK
Found 387 tests Found 395 tests
387 Pass 395 Pass
Pass Loading data… Pass Loading data…
Pass Origin parsing: <http://example . Pass Origin parsing: <http://example .
org> against <http://example.org/foo/bar> org> against <http://example.org/foo/bar>
@ -216,6 +216,14 @@ Pass Origin parsing: <//www.example2.com> against <http://www.example.com/test>
Pass Origin parsing: <http://ExAmPlE.CoM> against <http://other.com/> Pass Origin parsing: <http://ExAmPlE.CoM> against <http://other.com/>
Pass Origin parsing: <http://GOOgoo.com> against <http://other.com/> Pass Origin parsing: <http://GOOgoo.com> against <http://other.com/>
Pass Origin parsing: < http://example.com/ > without base Pass Origin parsing: < http://example.com/ > without base
Pass Origin parsing: <non-special:opaque > without base
Pass Origin parsing: <non-special:opaque ?hi> without base
Pass Origin parsing: <non-special:opaque #hi> without base
Pass Origin parsing: <non-special:opaque x?hi> without base
Pass Origin parsing: <non-special:opaque x#hi> without base
Pass Origin parsing: <non-special:opaque #hi> without base
Pass Origin parsing: <non-special:opaque #hi> without base
Pass Origin parsing: <non-special:opaque #hi> without base
Pass Origin parsing: <http://www.foo。bar.com> against <http://other.com/> Pass Origin parsing: <http://www.foo。bar.com> against <http://other.com/>
Pass Origin parsing: <https://x/<2F>?<3F>#<23>> without base Pass Origin parsing: <https://x/<2F>?<3F>#<23>> without base
Pass Origin parsing: <http://.com> against <http://other.com/> Pass Origin parsing: <http://.com> against <http://other.com/>

View file

@ -7,7 +7,7 @@ Pass Delete basics
Pass Deleting appended multiple Pass Deleting appended multiple
Pass Deleting all params removes ? from URL Pass Deleting all params removes ? from URL
Pass Removing non-existent param removes ? from URL Pass Removing non-existent param removes ? from URL
Pass Changing the query of a URL with an opaque path can impact the path Pass Changing the query of a URL with an opaque path with trailing spaces
Pass Changing the query of a URL with an opaque path can impact the path if the URL has no fragment Pass Changing the query of a URL with an opaque path with trailing spaces and a fragment
Pass Two-argument delete() Pass Two-argument delete()
Pass Two-argument delete() respects undefined as second arg Pass Two-argument delete() respects undefined as second arg

View file

@ -1141,6 +1141,42 @@
"host": "example.com", "host": "example.com",
"hostname": "example.com" "hostname": "example.com"
} }
},
{
"href": "https://test.invalid/",
"new_value": "*",
"expected": {
"href": "https://*/",
"host": "*",
"hostname": "*"
}
},
{
"href": "https://test.invalid/",
"new_value": "x@x",
"expected": {
"href": "https://test.invalid/",
"host": "test.invalid",
"hostname": "test.invalid"
}
},
{
"href": "https://test.invalid/",
"new_value": "foo\t\r\nbar",
"expected": {
"href": "https://foobar/",
"host": "foobar",
"hostname": "foobar"
}
},
{
"href": "https://test.invalid/",
"new_value": "><",
"expected": {
"href": "https://test.invalid/",
"host": "test.invalid",
"hostname": "test.invalid"
}
} }
], ],
"hostname": [ "hostname": [
@ -1552,6 +1588,42 @@
"host": "example.com", "host": "example.com",
"hostname": "example.com" "hostname": "example.com"
} }
},
{
"href": "https://test.invalid/",
"new_value": "*",
"expected": {
"href": "https://*/",
"host": "*",
"hostname": "*"
}
},
{
"href": "https://test.invalid/",
"new_value": "x@x",
"expected": {
"href": "https://test.invalid/",
"host": "test.invalid",
"hostname": "test.invalid"
}
},
{
"href": "https://test.invalid/",
"new_value": "foo\t\r\nbar",
"expected": {
"href": "https://foobar/",
"host": "foobar",
"hostname": "foobar"
}
},
{
"href": "https://test.invalid/",
"new_value": "><",
"expected": {
"href": "https://test.invalid/",
"host": "test.invalid",
"hostname": "test.invalid"
}
} }
], ],
"port": [ "port": [
@ -2169,12 +2241,12 @@
} }
}, },
{ {
"comment": "Drop trailing spaces from trailing opaque paths", "comment": "Trailing spaces and opaque paths",
"href": "data:space ?query", "href": "data:space ?query",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "data:space", "href": "data:space%20",
"pathname": "space", "pathname": "space%20",
"search": "" "search": ""
} }
}, },
@ -2182,17 +2254,17 @@
"href": "sc:space ?query", "href": "sc:space ?query",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "sc:space", "href": "sc:space%20",
"pathname": "space", "pathname": "space%20",
"search": "" "search": ""
} }
}, },
{ {
"comment": "Do not drop trailing spaces from non-trailing opaque paths", "comment": "Trailing spaces and opaque paths",
"href": "data:space ?query#fragment", "href": "data:space ?query#fragment",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "data:space #fragment", "href": "data:space %20#fragment",
"search": "" "search": ""
} }
}, },
@ -2200,7 +2272,7 @@
"href": "sc:space ?query#fragment", "href": "sc:space ?query#fragment",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "sc:space #fragment", "href": "sc:space %20#fragment",
"search": "" "search": ""
} }
}, },
@ -2357,12 +2429,12 @@
} }
}, },
{ {
"comment": "Drop trailing spaces from trailing opaque paths", "comment": "Trailing spaces and opaque paths",
"href": "data:space #fragment", "href": "data:space #fragment",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "data:space", "href": "data:space %20",
"pathname": "space", "pathname": "space %20",
"hash": "" "hash": ""
} }
}, },
@ -2370,17 +2442,17 @@
"href": "sc:space #fragment", "href": "sc:space #fragment",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "sc:space", "href": "sc:space %20",
"pathname": "space", "pathname": "space %20",
"hash": "" "hash": ""
} }
}, },
{ {
"comment": "Do not drop trailing spaces from non-trailing opaque paths", "comment": "Trailing spaces and opaque paths",
"href": "data:space ?query#fragment", "href": "data:space ?query#fragment",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "data:space ?query", "href": "data:space %20?query",
"hash": "" "hash": ""
} }
}, },
@ -2388,7 +2460,7 @@
"href": "sc:space ?query#fragment", "href": "sc:space ?query#fragment",
"new_value": "", "new_value": "",
"expected": { "expected": {
"href": "sc:space ?query", "href": "sc:space %20?query",
"hash": "" "hash": ""
} }
}, },

View file

@ -3778,6 +3778,126 @@
"search": "", "search": "",
"hash": "" "hash": ""
}, },
{
"input": "non-special:opaque ",
"base": null,
"href": "non-special:opaque",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque",
"search": "",
"hash": ""
},
{
"input": "non-special:opaque ?hi",
"base": null,
"href": "non-special:opaque %20?hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "?hi",
"hash": ""
},
{
"input": "non-special:opaque #hi",
"base": null,
"href": "non-special:opaque %20#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "",
"hash": "#hi"
},
{
"input": "non-special:opaque x?hi",
"base": null,
"href": "non-special:opaque x?hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque x",
"search": "?hi",
"hash": ""
},
{
"input": "non-special:opaque x#hi",
"base": null,
"href": "non-special:opaque x#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque x",
"search": "",
"hash": "#hi"
},
{
"input": "non-special:opaque \t\t \t#hi",
"base": null,
"href": "non-special:opaque %20#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "",
"hash": "#hi"
},
{
"input": "non-special:opaque \t\t #hi",
"base": null,
"href": "non-special:opaque %20#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "",
"hash": "#hi"
},
{
"input": "non-special:opaque\t\t \r #hi",
"base": null,
"href": "non-special:opaque %20#hi",
"origin": "null",
"protocol": "non-special:",
"username": "",
"password": "",
"host": "",
"hostname": "",
"port": "",
"pathname": "opaque %20",
"search": "",
"hash": "#hi"
},
"Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)", "Ideographic full stop (full-width period for Chinese, etc.) should be treated as a dot. U+3002 is mapped to U+002E (dot)",
{ {
"input": "http://www.foo。bar.com", "input": "http://www.foo。bar.com",

View file

@ -50,17 +50,17 @@ test(() => {
url.searchParams.delete('test'); url.searchParams.delete('test');
assert_false(url.searchParams.has('test')); assert_false(url.searchParams.has('test'));
assert_equals(url.search, ''); assert_equals(url.search, '');
assert_equals(url.pathname, 'space'); assert_equals(url.pathname, 'space %20');
assert_equals(url.href, 'data:space'); assert_equals(url.href, 'data:space %20');
}, 'Changing the query of a URL with an opaque path can impact the path'); }, 'Changing the query of a URL with an opaque path with trailing spaces');
test(() => { test(() => {
const url = new URL('data:space ?test#test'); const url = new URL('data:space ?test#test');
url.searchParams.delete('test'); url.searchParams.delete('test');
assert_equals(url.search, ''); assert_equals(url.search, '');
assert_equals(url.pathname, 'space '); assert_equals(url.pathname, 'space %20');
assert_equals(url.href, 'data:space #test'); assert_equals(url.href, 'data:space %20#test');
}, 'Changing the query of a URL with an opaque path can impact the path if the URL has no fragment'); }, 'Changing the query of a URL with an opaque path with trailing spaces and a fragment');
test(() => { test(() => {
const params = new URLSearchParams(); const params = new URLSearchParams();