From 936b76f36e87a6d4cf267c15c95786ef677515fc Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Fri, 2 Aug 2024 15:23:49 +0200 Subject: [PATCH] LibURL: Make URL a copy-on-write type This patch moves the data members of URL to an internal URL::Data struct that is also reference-counted. URL then uses a CopyOnWrite template to give itself copy-on-write behavior. This means that URL itself is now 8 bytes per instance, and copying is cheap as long as you don't mutate. This shrinks many data structures over in LibWeb land. As an example, CSS::ComputedValues goes from 3024 bytes to 2288 bytes per instance. --- Userland/Libraries/LibURL/Parser.cpp | 142 +++++++++++++-------------- Userland/Libraries/LibURL/URL.cpp | 140 +++++++++++++------------- Userland/Libraries/LibURL/URL.h | 137 ++++++++++++++++++-------- 3 files changed, 236 insertions(+), 183 deletions(-) diff --git a/Userland/Libraries/LibURL/Parser.cpp b/Userland/Libraries/LibURL/Parser.cpp index 55b483a6571..084a7cbc1b5 100644 --- a/Userland/Libraries/LibURL/Parser.cpp +++ b/Userland/Libraries/LibURL/Parser.cpp @@ -730,7 +730,7 @@ void Parser::shorten_urls_path(URL& url) VERIFY(!url.cannot_be_a_base_url()); // 2. Let path be url’s path. - auto& path = url.m_paths; + auto& path = url.m_data->paths; // 3. If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return. if (url.scheme() == "file" && path.size() == 1 && is_normalized_windows_drive_letter(path[0])) @@ -929,13 +929,13 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt } // 2. Set url’s scheme to buffer. - url->m_scheme = buffer.to_string_without_validation(); + url->m_data->scheme = buffer.to_string_without_validation(); // 3. If state override is given, then: if (state_override.has_value()) { // 1. If url’s port is url’s scheme’s default port, then set url’s port to null. if (url->port() == default_port_for_scheme(url->scheme())) - url->m_port = {}; + url->m_data->port = {}; // 2. Return. return *url; @@ -954,7 +954,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt state = State::File; } // 6. Otherwise, if url is special, base is non-null, and base’s scheme is url’s scheme: - else if (url->is_special() && base_url.has_value() && base_url->scheme() == url->m_scheme) { + else if (url->is_special() && base_url.has_value() && base_url->scheme() == url->m_data->scheme) { // 1. Assert: base is is special (and therefore does not have an opaque path). VERIFY(base_url->is_special()); @@ -972,7 +972,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt } // 9. Otherwise, set url’s path to the empty string and set state to opaque path state. else { - url->m_cannot_be_a_base_url = true; + url->m_data->cannot_be_a_base_url = true; url->append_slash(); state = State::CannotBeABaseUrlPath; } @@ -992,22 +992,22 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // -> no scheme state, https://url.spec.whatwg.org/#no-scheme-state case State::NoScheme: // 1. If base is null, or base has an opaque path and c is not U+0023 (#), missing-scheme-non-relative-URL validation error, return failure. - if (!base_url.has_value() || (base_url->m_cannot_be_a_base_url && code_point != '#')) { + if (!base_url.has_value() || (base_url->m_data->cannot_be_a_base_url && code_point != '#')) { report_validation_error(); return {}; } // 2. Otherwise, if base has an opaque path and c is U+0023 (#), set url’s scheme to base’s scheme, url’s path to base’s path, url’s query // to base’s query,url’s fragment to the empty string, and set state to fragment state. - else if (base_url->m_cannot_be_a_base_url && code_point == '#') { - url->m_scheme = base_url->m_scheme; - url->m_paths = base_url->m_paths; - url->m_query = base_url->m_query; - url->m_fragment = String {}; - url->m_cannot_be_a_base_url = true; + else if (base_url->m_data->cannot_be_a_base_url && code_point == '#') { + url->m_data->scheme = base_url->m_data->scheme; + url->m_data->paths = base_url->m_data->paths; + url->m_data->query = base_url->m_data->query; + url->m_data->fragment = String {}; + url->m_data->cannot_be_a_base_url = true; state = State::Fragment; } // 3. Otherwise, if base’s scheme is not "file", set state to relative state and decrease pointer by 1. - else if (base_url->m_scheme != "file") { + else if (base_url->m_data->scheme != "file") { state = State::Relative; continue; } @@ -1049,7 +1049,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt VERIFY(base_url->scheme() != "file"); // 2. Set url’s scheme to base’s scheme. - url->m_scheme = base_url->m_scheme; + url->m_data->scheme = base_url->m_data->scheme; // 3. If c is U+002F (/), then set state to relative slash state. if (code_point == '/') { @@ -1063,27 +1063,27 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // 5. Otherwise: else { // 1. Set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, url’s port to base’s port, url’s path to a clone of base’s path, and url’s query to base’s query. - url->m_username = base_url->m_username; - url->m_password = base_url->m_password; - url->m_host = base_url->m_host; - url->m_port = base_url->m_port; - url->m_paths = base_url->m_paths; - url->m_query = base_url->m_query; + url->m_data->username = base_url->m_data->username; + url->m_data->password = base_url->m_data->password; + url->m_data->host = base_url->m_data->host; + url->m_data->port = base_url->m_data->port; + url->m_data->paths = base_url->m_data->paths; + url->m_data->query = base_url->m_data->query; // 2. If c is U+003F (?), then set url’s query to the empty string, and state to query state. if (code_point == '?') { - url->m_query = String {}; + url->m_data->query = String {}; state = State::Query; } // 3. Otherwise, if c is U+0023 (#), set url’s fragment to the empty string and state to fragment state. else if (code_point == '#') { - url->m_fragment = String {}; + url->m_data->fragment = String {}; state = State::Fragment; } // 4. Otherwise, if c is not the EOF code point: else if (code_point != end_of_file) { // 1. Set url’s query to null. - url->m_query = {}; + url->m_data->query = {}; // 2. Shorten url’s path. shorten_urls_path(*url); @@ -1111,10 +1111,10 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt } // 3. Otherwise, set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, url’s port to base’s port, state to path state, and then, decrease pointer by 1. else { - url->m_username = base_url->m_username; - url->m_password = base_url->m_password; - url->m_host = base_url->m_host; - url->m_port = base_url->m_port; + url->m_data->username = base_url->m_data->username; + url->m_data->password = base_url->m_data->password; + url->m_data->host = base_url->m_data->host; + url->m_data->port = base_url->m_data->port; state = State::Path; continue; } @@ -1180,23 +1180,23 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // 3. If passwordTokenSeen is true, then append encodedCodePoints to url’s password. if (password_token_seen) { if (password_builder.is_empty()) - password_builder.append(url->m_password); + password_builder.append(url->m_data->password); append_percent_encoded_if_necessary(password_builder, c, PercentEncodeSet::Userinfo); } // 4. Otherwise, append encodedCodePoints to url’s username. else { if (username_builder.is_empty()) - username_builder.append(url->m_username); + username_builder.append(url->m_data->username); append_percent_encoded_if_necessary(username_builder, c, PercentEncodeSet::Userinfo); } } - if (username_builder.string_view().length() > url->m_username.bytes().size()) - url->m_username = username_builder.to_string().release_value_but_fixme_should_propagate_errors(); - if (password_builder.string_view().length() > url->m_password.bytes().size()) - url->m_password = password_builder.to_string().release_value_but_fixme_should_propagate_errors(); + if (username_builder.string_view().length() > url->m_data->username.bytes().size()) + url->m_data->username = username_builder.to_string().release_value_but_fixme_should_propagate_errors(); + if (password_builder.string_view().length() > url->m_data->password.bytes().size()) + url->m_data->password = password_builder.to_string().release_value_but_fixme_should_propagate_errors(); // 5. Set buffer to the empty string. buffer.clear(); @@ -1255,7 +1255,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt return {}; // 5. Set url’s host to host, buffer to the empty string, and state to port state. - url->m_host = host.release_value(); + url->m_data->host = host.release_value(); buffer.clear(); state = State::Port; } @@ -1285,7 +1285,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt return {}; // 5. Set url’s host to host, buffer to the empty string, and state to path start state. - url->m_host = host.value(); + url->m_data->host = host.value(); buffer.clear(); state = State::Port; @@ -1341,9 +1341,9 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // 3. Set url’s port to null, if port is url’s scheme’s default port; otherwise to port. if (port.value() == default_port_for_scheme(url->scheme())) - url->m_port = {}; + url->m_data->port = {}; else - url->m_port = port.value(); + url->m_data->port = port.value(); // 4. Set buffer to the empty string. buffer.clear(); @@ -1366,10 +1366,10 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // -> file state, https://url.spec.whatwg.org/#file-state case State::File: // 1. Set url’s scheme to "file". - url->m_scheme = "file"_string; + url->m_data->scheme = "file"_string; // 2. Set url’s host to the empty string. - url->m_host = String {}; + url->m_data->host = String {}; // 3. If c is U+002F (/) or U+005C (\), then: if (code_point == '/' || code_point == '\\') { @@ -1381,26 +1381,26 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt state = State::FileSlash; } // 4. Otherwise, if base is non-null and base’s scheme is "file": - else if (base_url.has_value() && base_url->m_scheme == "file") { + else if (base_url.has_value() && base_url->m_data->scheme == "file") { // 1. Set url’s host to base’s host, url’s path to a clone of base’s path, and url’s query to base’s query. - url->m_host = base_url->m_host; - url->m_paths = base_url->m_paths; - url->m_query = base_url->m_query; + url->m_data->host = base_url->m_data->host; + url->m_data->paths = base_url->m_data->paths; + url->m_data->query = base_url->m_data->query; // 2. If c is U+003F (?), then set url’s query to the empty string and state to query state. if (code_point == '?') { - url->m_query = String {}; + url->m_data->query = String {}; state = State::Query; } // 3. Otherwise, if c is U+0023 (#), set url’s fragment to the empty string and state to fragment state. else if (code_point == '#') { - url->m_fragment = String {}; + url->m_data->fragment = String {}; state = State::Fragment; } // 4. Otherwise, if c is not the EOF code point: else if (code_point != end_of_file) { // 1. Set url’s query to null. - url->m_query = {}; + url->m_data->query = {}; // 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten url’s path. auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string(); @@ -1413,7 +1413,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt report_validation_error(); // 2. Set url’s path to « ». - url->m_paths.clear(); + url->m_data->paths.clear(); } // 4. Set state to path state and decrease pointer by 1. @@ -1442,18 +1442,18 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // 2. Otherwise: else { // 1. If base is non-null and base’s scheme is "file", then: - if (base_url.has_value() && base_url->m_scheme == "file") { + if (base_url.has_value() && base_url->m_data->scheme == "file") { // 1. Set url’s host to base’s host. - url->m_host = base_url->m_host; + url->m_data->host = base_url->m_data->host; // FIXME: The spec does not seem to mention these steps. - url->m_paths = base_url->m_paths; - url->m_paths.remove(url->m_paths.size() - 1); + url->m_data->paths = base_url->m_data->paths; + url->m_data->paths.remove(url->m_data->paths.size() - 1); // 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter and base’s path[0] is a normalized Windows drive letter, then append base’s path[0] to url’s path. auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string(); - if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0])) - url->m_paths.append(base_url->m_paths[0]); + if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_data->paths[0])) + url->m_data->paths.append(base_url->m_data->paths[0]); } // 2. Set state to path state, and decrease pointer by 1. @@ -1474,7 +1474,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // 2. Otherwise, if buffer is the empty string, then: else if (buffer.is_empty()) { // 1. Set url’s host to the empty string. - url->m_host = String {}; + url->m_data->host = String {}; // 2. If state override is given, then return. if (state_override.has_value()) @@ -1498,7 +1498,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt host = String {}; // 4. Set url’s host to host. - url->m_host = host.release_value(); + url->m_data->host = host.release_value(); // 5. If state override is given, then return. if (state_override.has_value()) @@ -1532,12 +1532,12 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt } // 2. Otherwise, if state override is not given and c is U+003F (?), set url’s query to the empty string and state to query state. else if (!state_override.has_value() && code_point == '?') { - url->m_query = String {}; + url->m_data->query = String {}; state = State::Query; } // 3. Otherwise, if state override is not given and c is U+0023 (#), set url’s fragment to the empty string and state to fragment state. else if (!state_override.has_value() && code_point == '#') { - url->m_fragment = String {}; + url->m_data->fragment = String {}; state = State::Fragment; } // 4. Otherwise, if c is not the EOF code point: @@ -1585,14 +1585,14 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // 4. Otherwise, if buffer is not a single-dot URL path segment, then: else if (!is_single_dot_path_segment(buffer.string_view())) { // 1. If url’s scheme is "file", url’s path is empty, and buffer is a Windows drive letter, then replace the second code point in buffer with U+003A (:). - if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) { + if (url->m_data->scheme == "file" && url->m_data->paths.is_empty() && is_windows_drive_letter(buffer.string_view())) { auto drive_letter = buffer.string_view()[0]; buffer.clear(); buffer.append(drive_letter); buffer.append(':'); } // 2. Append buffer to url’s path. - url->m_paths.append(buffer.to_string_without_validation()); + url->m_data->paths.append(buffer.to_string_without_validation()); } // 5. Set buffer to the empty string. @@ -1600,12 +1600,12 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // 6. If c is U+003F (?), then set url’s query to the empty string and state to query state. if (code_point == '?') { - url->m_query = String {}; + url->m_data->query = String {}; state = State::Query; } // 7. If c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state. else if (code_point == '#') { - url->m_fragment = String {}; + url->m_data->fragment = String {}; state = State::Fragment; } } @@ -1626,20 +1626,20 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // -> opaque path state, https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state case State::CannotBeABaseUrlPath: // NOTE: This does not follow the spec exactly but rather uses the buffer and only sets the path on EOF. - VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty()); + VERIFY(url->m_data->paths.size() == 1 && url->m_data->paths[0].is_empty()); // 1. If c is U+003F (?), then set url’s query to the empty string and state to query state. if (code_point == '?') { - url->m_paths[0] = buffer.to_string_without_validation(); - url->m_query = String {}; + url->m_data->paths[0] = buffer.to_string_without_validation(); + url->m_data->query = String {}; buffer.clear(); state = State::Query; } // 2. Otherwise, if c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state. else if (code_point == '#') { // NOTE: This needs to be percent decoded since the member variables contain decoded data. - url->m_paths[0] = buffer.to_string_without_validation(); - url->m_fragment = String {}; + url->m_data->paths[0] = buffer.to_string_without_validation(); + url->m_data->fragment = String {}; buffer.clear(); state = State::Fragment; } @@ -1657,7 +1657,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt if (code_point != end_of_file) { append_percent_encoded_if_necessary(buffer, code_point, PercentEncodeSet::C0Control); } else { - url->m_paths[0] = buffer.to_string_without_validation(); + url->m_data->paths[0] = buffer.to_string_without_validation(); buffer.clear(); } } @@ -1680,14 +1680,14 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt auto query_percent_encode_set = url->is_special() ? PercentEncodeSet::SpecialQuery : PercentEncodeSet::Query; // 2. Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query. - url->m_query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set).release_value_but_fixme_should_propagate_errors(); + url->m_data->query = percent_encode_after_encoding(buffer.string_view(), query_percent_encode_set).release_value_but_fixme_should_propagate_errors(); // 3. Set buffer to the empty string. buffer.clear(); // 4. If c is U+0023 (#), then set url’s fragment to the empty string and state to fragment state. if (code_point == '#') { - url->m_fragment = String {}; + url->m_data->fragment = String {}; state = State::Fragment; } } @@ -1722,7 +1722,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt // NOTE: The percent-encode is done on EOF on the entire buffer. buffer.append_code_point(code_point); } else { - url->m_fragment = percent_encode_after_encoding(buffer.string_view(), PercentEncodeSet::Fragment).release_value_but_fixme_should_propagate_errors(); + url->m_data->fragment = percent_encode_after_encoding(buffer.string_view(), PercentEncodeSet::Fragment).release_value_but_fixme_should_propagate_errors(); buffer.clear(); } break; @@ -1735,7 +1735,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, Opt ++iterator; } - url->m_valid = true; + url->m_data->valid = true; dbgln_if(URL_PARSER_DEBUG, "URL::Parser::parse: Parsed URL to be '{}'.", url->serialize()); // 10. Return url. diff --git a/Userland/Libraries/LibURL/URL.cpp b/Userland/Libraries/LibURL/URL.cpp index 9068d8f2f1b..d739419764b 100644 --- a/Userland/Libraries/LibURL/URL.cpp +++ b/Userland/Libraries/LibURL/URL.cpp @@ -21,7 +21,7 @@ URL::URL(StringView string) : URL(Parser::basic_parse(string)) { if constexpr (URL_PARSER_DEBUG) { - if (m_valid) + if (m_data->valid) dbgln("URL constructor: Parsed URL to be '{}'.", serialize()); else dbgln("URL constructor: Parsed URL to be invalid."); @@ -38,42 +38,42 @@ URL URL::complete_url(StringView relative_url) const ErrorOr URL::username() const { - return String::from_byte_string(percent_decode(m_username)); + return String::from_byte_string(percent_decode(m_data->username)); } ErrorOr URL::password() const { - return String::from_byte_string(percent_decode(m_password)); + return String::from_byte_string(percent_decode(m_data->password)); } ByteString URL::path_segment_at_index(size_t index) const { VERIFY(index < path_segment_count()); - return percent_decode(m_paths[index]); + return percent_decode(m_data->paths[index]); } ByteString URL::basename() const { - if (!m_valid) + if (!m_data->valid) return {}; - if (m_paths.is_empty()) + if (m_data->paths.is_empty()) return {}; - auto& last_segment = m_paths.last(); + auto& last_segment = m_data->paths.last(); return percent_decode(last_segment); } void URL::set_scheme(String scheme) { - m_scheme = move(scheme); - m_valid = compute_validity(); + m_data->scheme = move(scheme); + m_data->valid = compute_validity(); } // https://url.spec.whatwg.org/#set-the-username ErrorOr URL::set_username(StringView username) { // To set the username given a url and username, set url’s username to the result of running UTF-8 percent-encode on username using the userinfo percent-encode set. - m_username = TRY(String::from_byte_string(percent_encode(username, PercentEncodeSet::Userinfo))); - m_valid = compute_validity(); + m_data->username = TRY(String::from_byte_string(percent_encode(username, PercentEncodeSet::Userinfo))); + m_data->valid = compute_validity(); return {}; } @@ -81,76 +81,76 @@ ErrorOr URL::set_username(StringView username) ErrorOr URL::set_password(StringView password) { // To set the password given a url and password, set url’s password to the result of running UTF-8 percent-encode on password using the userinfo percent-encode set. - m_password = TRY(String::from_byte_string(percent_encode(password, PercentEncodeSet::Userinfo))); - m_valid = compute_validity(); + m_data->password = TRY(String::from_byte_string(percent_encode(password, PercentEncodeSet::Userinfo))); + m_data->valid = compute_validity(); return {}; } void URL::set_host(Host host) { - m_host = move(host); - m_valid = compute_validity(); + m_data->host = move(host); + m_data->valid = compute_validity(); } // https://url.spec.whatwg.org/#concept-host-serializer ErrorOr URL::serialized_host() const { - return Parser::serialize_host(m_host); + return Parser::serialize_host(m_data->host); } void URL::set_port(Optional port) { - if (port == default_port_for_scheme(m_scheme)) { - m_port = {}; + if (port == default_port_for_scheme(m_data->scheme)) { + m_data->port = {}; return; } - m_port = move(port); - m_valid = compute_validity(); + m_data->port = move(port); + m_data->valid = compute_validity(); } void URL::set_paths(Vector const& paths) { - m_paths.clear_with_capacity(); - m_paths.ensure_capacity(paths.size()); + m_data->paths.clear_with_capacity(); + m_data->paths.ensure_capacity(paths.size()); for (auto const& segment : paths) - m_paths.unchecked_append(String::from_byte_string(percent_encode(segment, PercentEncodeSet::Path)).release_value_but_fixme_should_propagate_errors()); - m_valid = compute_validity(); + m_data->paths.unchecked_append(String::from_byte_string(percent_encode(segment, PercentEncodeSet::Path)).release_value_but_fixme_should_propagate_errors()); + m_data->valid = compute_validity(); } void URL::append_path(StringView path) { - m_paths.append(String::from_byte_string(percent_encode(path, PercentEncodeSet::Path)).release_value_but_fixme_should_propagate_errors()); + m_data->paths.append(String::from_byte_string(percent_encode(path, PercentEncodeSet::Path)).release_value_but_fixme_should_propagate_errors()); } // https://url.spec.whatwg.org/#cannot-have-a-username-password-port bool URL::cannot_have_a_username_or_password_or_port() const { // A URL cannot have a username/password/port if its host is null or the empty string, or its scheme is "file". - return m_host.has() || m_host == String {} || m_scheme == "file"sv; + return m_data->host.has() || m_data->host == String {} || m_data->scheme == "file"sv; } // FIXME: This is by no means complete. // NOTE: This relies on some assumptions about how the spec-defined URL parser works that may turn out to be wrong. bool URL::compute_validity() const { - if (m_scheme.is_empty()) + if (m_data->scheme.is_empty()) return false; - if (m_cannot_be_a_base_url) { - if (m_paths.size() != 1) + if (m_data->cannot_be_a_base_url) { + if (m_data->paths.size() != 1) return false; - if (m_paths[0].is_empty()) + if (m_data->paths[0].is_empty()) return false; } else { - if (m_scheme.is_one_of("about", "mailto")) + if (m_data->scheme.is_one_of("about", "mailto")) return false; // NOTE: Maybe it is allowed to have a zero-segment path. - if (m_paths.size() == 0) + if (m_data->paths.size() == 0) return false; } // NOTE: A file URL's host should be the empty string for localhost, not null. - if (m_scheme == "file" && m_host.has()) + if (m_data->scheme == "file" && m_data->host.has()) return false; return true; @@ -251,13 +251,13 @@ ByteString URL::serialize_path(ApplyPercentDecoding apply_percent_decoding) cons // 1. If url has an opaque path, then return url’s path. // FIXME: Reimplement this step once we modernize the URL implementation to meet the spec. if (cannot_be_a_base_url()) - return m_paths[0].to_byte_string(); + return m_data->paths[0].to_byte_string(); // 2. Let output be the empty string. StringBuilder output; // 3. For each segment of url’s path: append U+002F (/) followed by segment to output. - for (auto const& segment : m_paths) { + for (auto const& segment : m_data->paths) { output.append('/'); output.append(apply_percent_decoding == ApplyPercentDecoding::Yes ? percent_decode(segment) : segment.to_byte_string()); } @@ -271,23 +271,23 @@ ByteString URL::serialize(ExcludeFragment exclude_fragment) const { // 1. Let output be url’s scheme and U+003A (:) concatenated. StringBuilder output; - output.append(m_scheme); + output.append(m_data->scheme); output.append(':'); // 2. If url’s host is non-null: - if (!m_host.has()) { + if (!m_data->host.has()) { // 1. Append "//" to output. output.append("//"sv); // 2. If url includes credentials, then: if (includes_credentials()) { // 1. Append url’s username to output. - output.append(m_username); + output.append(m_data->username); // 2. If url’s password is not the empty string, then append U+003A (:), followed by url’s password, to output. - if (!m_password.is_empty()) { + if (!m_data->password.is_empty()) { output.append(':'); - output.append(m_password); + output.append(m_data->password); } // 3. Append U+0040 (@) to output. @@ -298,34 +298,34 @@ ByteString URL::serialize(ExcludeFragment exclude_fragment) const output.append(serialized_host().release_value_but_fixme_should_propagate_errors()); // 4. If url’s port is non-null, append U+003A (:) followed by url’s port, serialized, to output. - if (m_port.has_value()) - output.appendff(":{}", *m_port); + if (m_data->port.has_value()) + output.appendff(":{}", *m_data->port); } // 3. If url’s host is null, url does not have an opaque path, url’s path’s size is greater than 1, and url’s path[0] is the empty string, then append U+002F (/) followed by U+002E (.) to output. // 4. Append the result of URL path serializing url to output. // FIXME: Implement this closer to spec steps. if (cannot_be_a_base_url()) { - output.append(m_paths[0]); + output.append(m_data->paths[0]); } else { - if (m_host.has() && m_paths.size() > 1 && m_paths[0].is_empty()) + if (m_data->host.has() && m_data->paths.size() > 1 && m_data->paths[0].is_empty()) output.append("/."sv); - for (auto& segment : m_paths) { + for (auto& segment : m_data->paths) { output.append('/'); output.append(segment); } } // 5. If url’s query is non-null, append U+003F (?), followed by url’s query, to output. - if (m_query.has_value()) { + if (m_data->query.has_value()) { output.append('?'); - output.append(*m_query); + output.append(*m_data->query); } // 6. If exclude fragment is false and url’s fragment is non-null, then append U+0023 (#), followed by url’s fragment, to output. - if (exclude_fragment == ExcludeFragment::No && m_fragment.has_value()) { + if (exclude_fragment == ExcludeFragment::No && m_data->fragment.has_value()) { output.append('#'); - output.append(*m_fragment); + output.append(*m_data->fragment); } // 7. Return output. @@ -338,38 +338,38 @@ ByteString URL::serialize(ExcludeFragment exclude_fragment) const // resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible. ByteString URL::serialize_for_display() const { - VERIFY(m_valid); + VERIFY(m_data->valid); StringBuilder builder; - builder.append(m_scheme); + builder.append(m_data->scheme); builder.append(':'); - if (!m_host.has()) { + if (!m_data->host.has()) { builder.append("//"sv); builder.append(serialized_host().release_value_but_fixme_should_propagate_errors()); - if (m_port.has_value()) - builder.appendff(":{}", *m_port); + if (m_data->port.has_value()) + builder.appendff(":{}", *m_data->port); } if (cannot_be_a_base_url()) { - builder.append(m_paths[0]); + builder.append(m_data->paths[0]); } else { - if (m_host.has() && m_paths.size() > 1 && m_paths[0].is_empty()) + if (m_data->host.has() && m_data->paths.size() > 1 && m_data->paths[0].is_empty()) builder.append("/."sv); - for (auto& segment : m_paths) { + for (auto& segment : m_data->paths) { builder.append('/'); builder.append(segment); } } - if (m_query.has_value()) { + if (m_data->query.has_value()) { builder.append('?'); - builder.append(*m_query); + builder.append(*m_data->query); } - if (m_fragment.has_value()) { + if (m_data->fragment.has_value()) { builder.append('#'); - builder.append(*m_fragment); + builder.append(*m_data->fragment); } return builder.to_byte_string(); @@ -384,27 +384,27 @@ ErrorOr URL::to_string() const // https://url.spec.whatwg.org/#concept-url-origin ByteString URL::serialize_origin() const { - VERIFY(m_valid); + VERIFY(m_data->valid); - if (m_scheme == "blob"sv) { + if (m_data->scheme == "blob"sv) { // TODO: 1. If URL’s blob URL entry is non-null, then return URL’s blob URL entry’s environment’s origin. // 2. Let url be the result of parsing URL’s path[0]. - VERIFY(!m_paths.is_empty()); - URL url = m_paths[0]; + VERIFY(!m_data->paths.is_empty()); + URL url = m_data->paths[0]; // 3. Return a new opaque origin, if url is failure, and url’s origin otherwise. if (!url.is_valid()) return "null"; return url.serialize_origin(); - } else if (!m_scheme.is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin." + } else if (!m_data->scheme.is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin." return "null"; } StringBuilder builder; - builder.append(m_scheme); + builder.append(m_data->scheme); builder.append("://"sv); builder.append(serialized_host().release_value_but_fixme_should_propagate_errors()); - if (m_port.has_value()) - builder.appendff(":{}", *m_port); + if (m_data->port.has_value()) + builder.appendff(":{}", *m_data->port); return builder.to_byte_string(); } @@ -412,7 +412,7 @@ bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const { if (this == &other) return true; - if (!m_valid || !other.m_valid) + if (!m_data->valid || !other.m_data->valid) return false; return serialize(exclude_fragments) == other.serialize(exclude_fragments); } diff --git a/Userland/Libraries/LibURL/URL.h b/Userland/Libraries/LibURL/URL.h index 820c1a64917..1f696bc8f3e 100644 --- a/Userland/Libraries/LibURL/URL.h +++ b/Userland/Libraries/LibURL/URL.h @@ -78,6 +78,34 @@ enum class SpaceAsPlus { ByteString percent_encode(StringView input, PercentEncodeSet set = PercentEncodeSet::Userinfo, SpaceAsPlus = SpaceAsPlus::No); ByteString percent_decode(StringView input); +template +class CopyOnWrite { +public: + CopyOnWrite() + : m_value(adopt_ref(*new T)) + { + } + T& mutable_value() + { + if (m_value->ref_count() > 1) + m_value = m_value->clone(); + return *m_value; + } + T const& value() const { return *m_value; } + + operator T const&() const { return value(); } + operator T&() { return mutable_value(); } + + T const* operator->() const { return &value(); } + T* operator->() { return &mutable_value(); } + + T const* ptr() const { return m_value.ptr(); } + T* ptr() { return m_value.ptr(); } + +private: + NonnullRefPtr m_value; +}; + // https://url.spec.whatwg.org/#url-representation // A URL is a struct that represents a universal identifier. To disambiguate from a valid URL string it can also be referred to as a URL record. class URL { @@ -95,26 +123,26 @@ public: { } - bool is_valid() const { return m_valid; } + bool is_valid() const { return m_data->valid; } - String const& scheme() const { return m_scheme; } + String const& scheme() const { return m_data->scheme; } ErrorOr username() const; ErrorOr password() const; - Host const& host() const { return m_host; } + Host const& host() const { return m_data->host; } ErrorOr serialized_host() const; ByteString basename() const; - Optional const& query() const { return m_query; } - Optional const& fragment() const { return m_fragment; } - Optional port() const { return m_port; } + Optional const& query() const { return m_data->query; } + Optional const& fragment() const { return m_data->fragment; } + Optional port() const { return m_data->port; } ByteString path_segment_at_index(size_t index) const; - size_t path_segment_count() const { return m_paths.size(); } + size_t path_segment_count() const { return m_data->paths.size(); } - u16 port_or_default() const { return m_port.value_or(default_port_for_scheme(m_scheme).value_or(0)); } - bool cannot_be_a_base_url() const { return m_cannot_be_a_base_url; } + u16 port_or_default() const { return m_data->port.value_or(default_port_for_scheme(m_data->scheme).value_or(0)); } + bool cannot_be_a_base_url() const { return m_data->cannot_be_a_base_url; } bool cannot_have_a_username_or_password_or_port() const; - bool includes_credentials() const { return !m_username.is_empty() || !m_password.is_empty(); } - bool is_special() const { return is_special_scheme(m_scheme); } + bool includes_credentials() const { return !m_data->username.is_empty() || !m_data->password.is_empty(); } + bool is_special() const { return is_special_scheme(m_data->scheme); } void set_scheme(String); ErrorOr set_username(StringView); @@ -122,14 +150,14 @@ public: void set_host(Host); void set_port(Optional); void set_paths(Vector const&); - void set_query(Optional query) { m_query = move(query); } - void set_fragment(Optional fragment) { m_fragment = move(fragment); } - void set_cannot_be_a_base_url(bool value) { m_cannot_be_a_base_url = value; } + void set_query(Optional query) { m_data->query = move(query); } + void set_fragment(Optional fragment) { m_data->fragment = move(fragment); } + void set_cannot_be_a_base_url(bool value) { m_data->cannot_be_a_base_url = value; } void append_path(StringView); void append_slash() { // NOTE: To indicate that we want to end the path with a slash, we have to append an empty path segment. - m_paths.append(String {}); + m_data->paths.append(String {}); } ByteString serialize_path(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const; @@ -145,49 +173,74 @@ public: URL complete_url(StringView) const; - bool operator==(URL const& other) const { return equals(other, ExcludeFragment::No); } + [[nodiscard]] bool operator==(URL const& other) const + { + if (m_data.ptr() == other.m_data.ptr()) + return true; + return equals(other, ExcludeFragment::No); + } - String const& raw_username() const { return m_username; } - String const& raw_password() const { return m_password; } + String const& raw_username() const { return m_data->username; } + String const& raw_password() const { return m_data->password; } - Optional const& blob_url_entry() const { return m_blob_url_entry; } - void set_blob_url_entry(Optional entry) { m_blob_url_entry = move(entry); } + Optional const& blob_url_entry() const { return m_data->blob_url_entry; } + void set_blob_url_entry(Optional entry) { m_data->blob_url_entry = move(entry); } private: bool compute_validity() const; - bool m_valid { false }; + struct Data : public RefCounted { + NonnullRefPtr clone() + { + auto clone = adopt_ref(*new Data); + clone->valid = valid; + clone->scheme = scheme; + clone->username = username; + clone->password = password; + clone->host = host; + clone->port = port; + clone->paths = paths; + clone->query = query; + clone->fragment = fragment; + clone->cannot_be_a_base_url = cannot_be_a_base_url; + clone->blob_url_entry = blob_url_entry; + return clone; + } - // A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string. - String m_scheme; + bool valid { false }; - // A URL’s username is an ASCII string identifying a username. It is initially the empty string. - String m_username; + // A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string. + String scheme; - // A URL’s password is an ASCII string identifying a password. It is initially the empty string. - String m_password; + // A URL’s username is an ASCII string identifying a username. It is initially the empty string. + String username; - // A URL’s host is null or a host. It is initially null. - Host m_host; + // A URL’s password is an ASCII string identifying a password. It is initially the empty string. + String password; - // A URL’s port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null. - Optional m_port; + // A URL’s host is null or a host. It is initially null. + Host host; - // A URL’s path is either a URL path segment or a list of zero or more URL path segments, usually identifying a location. It is initially « ». - // A URL path segment is an ASCII string. It commonly refers to a directory or a file, but has no predefined meaning. - Vector m_paths; + // A URL’s port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null. + Optional port; - // A URL’s query is either null or an ASCII string. It is initially null. - Optional m_query; + // A URL’s path is either a URL path segment or a list of zero or more URL path segments, usually identifying a location. It is initially « ». + // A URL path segment is an ASCII string. It commonly refers to a directory or a file, but has no predefined meaning. + Vector paths; - // A URL’s fragment is either null or an ASCII string that can be used for further processing on the resource the URL’s other components identify. It is initially null. - Optional m_fragment; + // A URL’s query is either null or an ASCII string. It is initially null. + Optional query; - bool m_cannot_be_a_base_url { false }; + // A URL’s fragment is either null or an ASCII string that can be used for further processing on the resource the URL’s other components identify. It is initially null. + Optional fragment; - // https://url.spec.whatwg.org/#concept-url-blob-entry - // A URL also has an associated blob URL entry that is either null or a blob URL entry. It is initially null. - Optional m_blob_url_entry; + bool cannot_be_a_base_url { false }; + + // https://url.spec.whatwg.org/#concept-url-blob-entry + // A URL also has an associated blob URL entry that is either null or a blob URL entry. It is initially null. + Optional blob_url_entry; + }; + CopyOnWrite m_data; }; URL create_with_url_or_path(ByteString const&);