diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 9e3849739b7..bbd80f04bc4 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -683,6 +683,24 @@ constexpr bool is_double_dot_path_segment(StringView input) return input == ".."sv || input.equals_ignoring_ascii_case(".%2e"sv) || input.equals_ignoring_ascii_case("%2e."sv) || input.equals_ignoring_ascii_case("%2e%2e"sv); } +// https://url.spec.whatwg.org/#shorten-a-urls-path +void URLParser::shorten_urls_path(URL& url) +{ + // 1. Assert: url does not have an opaque path. + VERIFY(!url.cannot_be_a_base_url()); + + // 2. Let path be url’s path. + auto& path = url.m_paths; + + // 3. If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return. + if (url.scheme() == "file" && path.size() == 1 && is_normalized_windows_drive_letter(path[0])) + return; + + // 4. Remove path’s last item, if any. + if (!path.is_empty()) + path.take_last(); +} + // https://url.spec.whatwg.org/#string-percent-encode-after-encoding ErrorOr URLParser::percent_encode_after_encoding(StringView input, URL::PercentEncodeSet percent_encode_set, bool space_as_plus) { @@ -1025,8 +1043,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, url->m_query = {}; // 2. Shorten url’s path. - if (url->m_paths.size()) - url->m_paths.remove(url->m_paths.size() - 1); + shorten_urls_path(*url); // 3. Set state to path state and decrease pointer by 1. state = State::Path; @@ -1337,8 +1354,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, // 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten url’s path. auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string(); if (!starts_with_windows_drive_letter(substring_from_pointer)) { - if (!url->m_paths.is_empty() && !(url->scheme() == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0]))) - url->m_paths.remove(url->m_paths.size() - 1); + shorten_urls_path(*url); } // 3. Otherwise: else { @@ -1505,8 +1521,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, // 2. If buffer is a double-dot URL path segment, then: if (is_double_dot_path_segment(buffer.string_view())) { // 1. Shorten url’s path. - if (!url->m_paths.is_empty()) - url->m_paths.remove(url->m_paths.size() - 1); + shorten_urls_path(*url); // 2. If neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path. if (code_point != '/' && !(url->is_special() && code_point == '\\')) diff --git a/AK/URLParser.h b/AK/URLParser.h index de6f9457619..e43ed9a52ee 100644 --- a/AK/URLParser.h +++ b/AK/URLParser.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, Max Wipfli + * Copyright (c) 2023, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ @@ -63,6 +64,9 @@ public: // https://url.spec.whatwg.org/#concept-host-serializer static ErrorOr serialize_host(URL::Host const&); + + // https://url.spec.whatwg.org/#shorten-a-urls-path + static void shorten_urls_path(URL&); }; #undef ENUMERATE_STATES diff --git a/Tests/LibWeb/Text/expected/URL/url.txt b/Tests/LibWeb/Text/expected/URL/url.txt index e2a1dd214d6..1129f807508 100644 --- a/Tests/LibWeb/Text/expected/URL/url.txt +++ b/Tests/LibWeb/Text/expected/URL/url.txt @@ -68,3 +68,13 @@ port => '' pathname => '/hello' search => '' hash => '' +new URL('//d:/..', 'file:///C:/a/b') +protocol => 'file:' +username => '' +password => '' +host => '' +hostname => '' +port => '' +pathname => '/d:/' +search => '' +hash => '' diff --git a/Tests/LibWeb/Text/input/URL/url.html b/Tests/LibWeb/Text/input/URL/url.html index 17fc181aeb6..3994ab61c2d 100644 --- a/Tests/LibWeb/Text/input/URL/url.html +++ b/Tests/LibWeb/Text/input/URL/url.html @@ -27,6 +27,7 @@ { input: 'unknown://serenityos.org:0' }, { input: 'http://serenityos.org/cat?dog#meow"woof' }, { input: '/hello', base: 'file://friends/' }, + { input: '//d:/..', base: 'file:///C:/a/b' }, ]) { printURL(url.input, url.base); }