mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-30 04:39:06 +00:00
LibURL+LibWeb: Ensure opaque paths always roundtrip
Corresponds to: 6c782003
This commit is contained in:
parent
01d1a9528b
commit
ec3c545426
Notes:
github-actions[bot]
2025-03-18 12:18:21 +00:00
Author: https://github.com/shannonbooth
Commit: ec3c545426
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3951
Reviewed-by: https://github.com/AtkinsSJ ✅
17 changed files with 280 additions and 99 deletions
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
|
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
|
||||||
* Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
|
* Copyright (c) 2023-2025, Shannon Booth <shannon@serenityos.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
@ -1558,23 +1558,32 @@ Optional<URL> Parser::basic_parse(StringView raw_input, Optional<URL const&> bas
|
||||||
buffer.clear();
|
buffer.clear();
|
||||||
state = State::Fragment;
|
state = State::Fragment;
|
||||||
}
|
}
|
||||||
// 3. Otherwise:
|
// 3. Otherwise, if c is U+0020 SPACE:
|
||||||
else {
|
else if (code_point == ' ') {
|
||||||
// 1. If c is not the EOF code point, not a URL code point, and not U+0025 (%), invalid-URL-unit validation error.
|
// 1. If remaining starts with U+003F (?) or U+003F (#), then append "%20" to url’s path.
|
||||||
if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%')
|
if (auto remaining = get_remaining(); remaining.starts_with('?') || remaining.starts_with('#')) {
|
||||||
|
buffer.append("%20"sv);
|
||||||
|
}
|
||||||
|
// 2. Otherwise, append U+0020 SPACE to url’s path.
|
||||||
|
else {
|
||||||
|
buffer.append(' ');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// 4. Otherwise, if c is not the EOF code point:
|
||||||
|
else if (code_point != end_of_file) {
|
||||||
|
// 1. If c is not a URL code point and not U+0025 (%), invalid-URL-unit validation error.
|
||||||
|
if (!is_url_code_point(code_point) && code_point != '%')
|
||||||
report_validation_error();
|
report_validation_error();
|
||||||
|
|
||||||
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
|
// 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, invalid-URL-unit validation error.
|
||||||
if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits())
|
if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits())
|
||||||
report_validation_error();
|
report_validation_error();
|
||||||
|
|
||||||
// 3. If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path.
|
// 3. UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path.
|
||||||
if (code_point != end_of_file) {
|
append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control);
|
||||||
append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control);
|
} else {
|
||||||
} else {
|
url->m_data->paths[0] = buffer.to_string_without_validation();
|
||||||
url->m_data->paths[0] = buffer.to_string_without_validation();
|
buffer.clear();
|
||||||
buffer.clear();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
// -> query state, https://url.spec.whatwg.org/#query-state
|
// -> query state, https://url.spec.whatwg.org/#query-state
|
||||||
|
|
|
@ -388,18 +388,10 @@ void DOMURL::set_search(String const& search)
|
||||||
// 1. Let url be this’s URL.
|
// 1. Let url be this’s URL.
|
||||||
auto& url = m_url;
|
auto& url = m_url;
|
||||||
|
|
||||||
// 2. If the given value is the empty string:
|
// 2. If the given value is the empty string, then set url’s query to null, empty this’s query object’s list, and return.
|
||||||
if (search.is_empty()) {
|
if (search.is_empty()) {
|
||||||
// 1. Set url’s query to null.
|
|
||||||
url.set_query({});
|
url.set_query({});
|
||||||
|
|
||||||
// 2. Empty this’s query object’s list.
|
|
||||||
m_query->m_list.clear();
|
m_query->m_list.clear();
|
||||||
|
|
||||||
// 3. Potentially strip trailing spaces from an opaque path with this.
|
|
||||||
strip_trailing_spaces_from_an_opaque_path(*this);
|
|
||||||
|
|
||||||
// 4. Return.
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -438,15 +430,9 @@ String DOMURL::hash() const
|
||||||
// https://url.spec.whatwg.org/#ref-for-dom-url-hash%E2%91%A0
|
// https://url.spec.whatwg.org/#ref-for-dom-url-hash%E2%91%A0
|
||||||
void DOMURL::set_hash(String const& hash)
|
void DOMURL::set_hash(String const& hash)
|
||||||
{
|
{
|
||||||
// 1. If the given value is the empty string:
|
// 1. If the given value is the empty string, then set this’s URL’s fragment to null and return.
|
||||||
if (hash.is_empty()) {
|
if (hash.is_empty()) {
|
||||||
// 1. Set this’s URL’s fragment to null.
|
|
||||||
m_url.set_fragment({});
|
m_url.set_fragment({});
|
||||||
|
|
||||||
// 2. Potentially strip trailing spaces from an opaque path with this.
|
|
||||||
strip_trailing_spaces_from_an_opaque_path(*this);
|
|
||||||
|
|
||||||
// 3. Return.
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -461,29 +447,6 @@ void DOMURL::set_hash(String const& hash)
|
||||||
(void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment);
|
(void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment);
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
|
|
||||||
void strip_trailing_spaces_from_an_opaque_path(DOMURL& url)
|
|
||||||
{
|
|
||||||
// 1. If url’s URL does not have an opaque path, then return.
|
|
||||||
// FIXME: Reimplement this step once we modernize the URL implementation to meet the spec.
|
|
||||||
if (!url.cannot_be_a_base_url())
|
|
||||||
return;
|
|
||||||
|
|
||||||
// 2. If url’s URL’s fragment is non-null, then return.
|
|
||||||
if (url.fragment().has_value())
|
|
||||||
return;
|
|
||||||
|
|
||||||
// 3. If url’s URL’s query is non-null, then return.
|
|
||||||
if (url.query().has_value())
|
|
||||||
return;
|
|
||||||
|
|
||||||
// 4. Remove all trailing U+0020 SPACE code points from url’s URL’s path.
|
|
||||||
// NOTE: At index 0 since the first step tells us that the URL only has one path segment.
|
|
||||||
auto opaque_path = url.path_segment_at_index(0);
|
|
||||||
auto trimmed_path = opaque_path.trim(" "sv, TrimMode::Right);
|
|
||||||
url.set_paths({ trimmed_path });
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#concept-url-parser
|
// https://url.spec.whatwg.org/#concept-url-parser
|
||||||
Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url, Optional<StringView> encoding)
|
Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url, Optional<StringView> encoding)
|
||||||
{
|
{
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
|
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
|
||||||
* Copyright (c) 2021, the SerenityOS developers.
|
* Copyright (c) 2021, the SerenityOS developers.
|
||||||
* Copyright (c) 2023, networkException <networkexception@serenityos.org>
|
* Copyright (c) 2023, networkException <networkexception@serenityos.org>
|
||||||
* Copyright (c) 2024, Shannon Booth <shannon@serenityos.org>
|
* Copyright (c) 2024-2025, Shannon Booth <shannon@serenityos.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
@ -92,9 +92,6 @@ private:
|
||||||
GC::Ref<URLSearchParams> m_query;
|
GC::Ref<URLSearchParams> m_query;
|
||||||
};
|
};
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
|
|
||||||
void strip_trailing_spaces_from_an_opaque_path(DOMURL& url);
|
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#concept-url-parser
|
// https://url.spec.whatwg.org/#concept-url-parser
|
||||||
Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url = {}, Optional<StringView> encoding = {});
|
Optional<URL::URL> parse(StringView input, Optional<URL::URL const&> base_url = {}, Optional<StringView> encoding = {});
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
|
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
|
||||||
* Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
|
* Copyright (c) 2023-2025, Shannon Booth <shannon@serenityos.org>
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
@ -229,10 +229,6 @@ void URLSearchParams::update()
|
||||||
|
|
||||||
// 4. Set query’s URL object’s URL’s query to serializedQuery.
|
// 4. Set query’s URL object’s URL’s query to serializedQuery.
|
||||||
m_url->set_query({}, serialized_query);
|
m_url->set_query({}, serialized_query);
|
||||||
|
|
||||||
// 5. If serializedQuery is null, then potentially strip trailing spaces from an opaque path with query’s URL object.
|
|
||||||
if (!serialized_query.has_value())
|
|
||||||
strip_trailing_spaces_from_an_opaque_path(*m_url);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://url.spec.whatwg.org/#dom-urlsearchparams-delete
|
// https://url.spec.whatwg.org/#dom-urlsearchparams-delete
|
||||||
|
|
|
@ -0,0 +1,4 @@
|
||||||
|
pathname => 'foobar %20'
|
||||||
|
pathname => 'foobar %20'
|
||||||
|
pathname => 'baz%20'
|
||||||
|
pathname => 'baz%20'
|
|
@ -1,6 +1,6 @@
|
||||||
URL pathname is 'space '
|
URL pathname is 'space %20'
|
||||||
URL href is 'data:space ?test'
|
URL href is 'data:space %20?test'
|
||||||
true
|
true
|
||||||
false
|
false
|
||||||
URL pathname is 'space'
|
URL pathname is 'space %20'
|
||||||
URL href is 'data:space'
|
URL href is 'data:space %20'
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
pathname => 'foobar '
|
|
||||||
pathname => 'foobar'
|
|
||||||
pathname => 'baz '
|
|
||||||
pathname => 'baz'
|
|
|
@ -1,8 +1,8 @@
|
||||||
Harness status: OK
|
Harness status: OK
|
||||||
|
|
||||||
Found 386 tests
|
Found 394 tests
|
||||||
|
|
||||||
386 Pass
|
394 Pass
|
||||||
Pass Loading data…
|
Pass Loading data…
|
||||||
Pass Parsing origin: <http://example .
|
Pass Parsing origin: <http://example .
|
||||||
org> against <http://example.org/foo/bar>
|
org> against <http://example.org/foo/bar>
|
||||||
|
@ -216,6 +216,14 @@ Pass Parsing origin: <//www.example2.com> against <http://www.example.com/test>
|
||||||
Pass Parsing origin: <http://ExAmPlE.CoM> against <http://other.com/>
|
Pass Parsing origin: <http://ExAmPlE.CoM> against <http://other.com/>
|
||||||
Pass Parsing origin: <http://GOOgoo.com> against <http://other.com/>
|
Pass Parsing origin: <http://GOOgoo.com> against <http://other.com/>
|
||||||
Pass Parsing origin: < |