From 90e763de4c8396a036870e0b803a992c3acb66c8 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Wed, 27 Nov 2024 12:48:28 +0000 Subject: [PATCH] LibURL: Replace Host's Empty state with making Url's Host optional A couple of reasons: - Origin's Host (when in the tuple state) can't be null - There's an "empty host" concept in the spec which is NOT the same as a null Host, and that was confusing me. --- Libraries/LibCore/Proxy.h | 4 ++-- Libraries/LibURL/Host.h | 2 +- Libraries/LibURL/Parser.cpp | 2 +- Libraries/LibURL/URL.cpp | 18 +++++++++--------- Libraries/LibURL/URL.h | 4 ++-- Libraries/LibWeb/DOMURL/DOMURL.cpp | 5 +++-- Libraries/LibWeb/Fetch/Fetching/Fetching.cpp | 2 +- Libraries/LibWeb/HTML/BrowsingContext.cpp | 4 ++-- .../LibWeb/HTML/HTMLHyperlinkElementUtils.cpp | 5 +++-- Libraries/LibWeb/HTML/Location.cpp | 4 ++-- Libraries/LibWeb/HTML/WorkerLocation.cpp | 6 +++--- .../LibWeb/MixedContent/AbstractOperations.cpp | 2 +- Libraries/LibWeb/XHR/XMLHttpRequest.cpp | 2 +- Libraries/LibWebView/CookieJar.cpp | 2 +- Libraries/LibWebView/ViewImplementation.cpp | 2 +- Tests/LibURL/TestURL.cpp | 6 +++--- Tests/LibWeb/TestFetchURL.cpp | 16 ++++++++-------- 17 files changed, 44 insertions(+), 42 deletions(-) diff --git a/Libraries/LibCore/Proxy.h b/Libraries/LibCore/Proxy.h index 6a0688dc4cb..c014fb6ef28 100644 --- a/Libraries/LibCore/Proxy.h +++ b/Libraries/LibCore/Proxy.h @@ -36,9 +36,9 @@ struct ProxyData { proxy_data.type = ProxyData::Type::SOCKS5; - if (!url.host().has()) + if (!url.host().has_value() || !url.host()->has()) return Error::from_string_literal("Invalid proxy host, must be an IPv4 address"); - proxy_data.host_ipv4 = url.host().get(); + proxy_data.host_ipv4 = url.host()->get(); auto port = url.port(); if (!port.has_value()) diff --git a/Libraries/LibURL/Host.h b/Libraries/LibURL/Host.h index ee82c868e6b..e615d5fb264 100644 --- a/Libraries/LibURL/Host.h +++ b/Libraries/LibURL/Host.h @@ -26,6 +26,6 @@ using IPv6Address = Array; // https://url.spec.whatwg.org/#concept-host // A host is a domain, an IP address, an opaque host, or an empty host. Typically a host serves as a network address, // but it is sometimes used as opaque identifier in URLs where a network address is not necessary. -using Host = Variant; +using Host = Variant; } diff --git a/Libraries/LibURL/Parser.cpp b/Libraries/LibURL/Parser.cpp index 01f6f821d63..ecb2df72b2f 100644 --- a/Libraries/LibURL/Parser.cpp +++ b/Libraries/LibURL/Parser.cpp @@ -1574,7 +1574,7 @@ URL Parser::basic_parse(StringView raw_input, Optional const& base_url, URL continue; } // 5. Otherwise, if state override is given and url’s host is null, append the empty string to url’s path. - else if (state_override.has_value() && url->host().has()) { + else if (state_override.has_value() && !url->host().has_value()) { url->append_slash(); } break; diff --git a/Libraries/LibURL/URL.cpp b/Libraries/LibURL/URL.cpp index ba96886c857..b36bc83d747 100644 --- a/Libraries/LibURL/URL.cpp +++ b/Libraries/LibURL/URL.cpp @@ -88,7 +88,7 @@ void URL::set_host(Host host) // https://url.spec.whatwg.org/#concept-host-serializer ErrorOr URL::serialized_host() const { - return Parser::serialize_host(m_data->host); + return Parser::serialize_host(m_data->host.value()); } void URL::set_port(Optional port) @@ -119,7 +119,7 @@ void URL::append_path(StringView path) bool URL::cannot_have_a_username_or_password_or_port() const { // A URL cannot have a username/password/port if its host is null or the empty string, or its scheme is "file". - return m_data->host.has() || m_data->host == String {} || m_data->scheme == "file"sv; + return !m_data->host.has_value() || m_data->host == String {} || m_data->scheme == "file"sv; } // FIXME: This is by no means complete. @@ -142,8 +142,8 @@ bool URL::compute_validity() const return false; } - // NOTE: A file URL's host should be the empty string for localhost, not null. - if (m_data->scheme == "file" && m_data->host.has()) + // FIXME: A file URL's host should be the empty string for localhost, not null. + if (m_data->scheme == "file" && !m_data->host.has_value()) return false; return true; @@ -252,7 +252,7 @@ ByteString URL::serialize(ExcludeFragment exclude_fragment) const output.append(':'); // 2. If url’s host is non-null: - if (!m_data->host.has()) { + if (m_data->host.has_value()) { // 1. Append "//" to output. output.append("//"sv); @@ -285,7 +285,7 @@ ByteString URL::serialize(ExcludeFragment exclude_fragment) const if (cannot_be_a_base_url()) { output.append(m_data->paths[0]); } else { - if (m_data->host.has() && m_data->paths.size() > 1 && m_data->paths[0].is_empty()) + if (!m_data->host.has_value() && m_data->paths.size() > 1 && m_data->paths[0].is_empty()) output.append("/."sv); for (auto& segment : m_data->paths) { output.append('/'); @@ -321,7 +321,7 @@ ByteString URL::serialize_for_display() const builder.append(m_data->scheme); builder.append(':'); - if (!m_data->host.has()) { + if (m_data->host.has_value()) { builder.append("//"sv); builder.append(serialized_host().release_value_but_fixme_should_propagate_errors()); if (m_data->port.has_value()) @@ -331,7 +331,7 @@ ByteString URL::serialize_for_display() const if (cannot_be_a_base_url()) { builder.append(m_data->paths[0]); } else { - if (m_data->host.has() && m_data->paths.size() > 1 && m_data->paths[0].is_empty()) + if (!m_data->host.has_value() && m_data->paths.size() > 1 && m_data->paths[0].is_empty()) builder.append("/."sv); for (auto& segment : m_data->paths) { builder.append('/'); @@ -391,7 +391,7 @@ Origin URL::origin() const // -> "wss" if (scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // Return the tuple origin (url’s scheme, url’s host, url’s port, null). - return Origin(scheme().to_byte_string(), host(), port()); + return Origin(scheme().to_byte_string(), host().value(), port()); } // -> "file" diff --git a/Libraries/LibURL/URL.h b/Libraries/LibURL/URL.h index cb111dcc660..337bc64cead 100644 --- a/Libraries/LibURL/URL.h +++ b/Libraries/LibURL/URL.h @@ -83,7 +83,7 @@ public: String const& scheme() const { return m_data->scheme; } String const& username() const { return m_data->username; } String const& password() const { return m_data->password; } - Host const& host() const { return m_data->host; } + Optional const& host() const { return m_data->host; } ErrorOr serialized_host() const; ByteString basename() const; Optional const& query() const { return m_data->query; } @@ -171,7 +171,7 @@ private: String password; // A URL’s host is null or a host. It is initially null. - Host host; + Optional host; // A URL’s port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null. Optional port; diff --git a/Libraries/LibWeb/DOMURL/DOMURL.cpp b/Libraries/LibWeb/DOMURL/DOMURL.cpp index 2a087ee6fc2..ef7b89f4c88 100644 --- a/Libraries/LibWeb/DOMURL/DOMURL.cpp +++ b/Libraries/LibWeb/DOMURL/DOMURL.cpp @@ -284,7 +284,7 @@ WebIDL::ExceptionOr DOMURL::host() const auto& url = m_url; // 2. If url’s host is null, then return the empty string. - if (url.host().has()) + if (!url.host().has_value()) return String {}; // 3. If url’s port is null, return url’s host, serialized. @@ -312,7 +312,7 @@ WebIDL::ExceptionOr DOMURL::hostname() const auto& vm = realm().vm(); // 1. If this’s URL’s host is null, then return the empty string. - if (m_url.host().has()) + if (!m_url.host().has_value()) return String {}; // 2. Return this’s URL’s host, serialized. @@ -477,6 +477,7 @@ void DOMURL::set_hash(String const& hash) } // https://url.spec.whatwg.org/#concept-domain +// FIXME: Move into URL::Host bool host_is_domain(URL::Host const& host) { // A domain is a non-empty ASCII string that identifies a realm within a network. diff --git a/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp b/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp index ea23873ee81..b439c831a95 100644 --- a/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp +++ b/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp @@ -322,7 +322,7 @@ WebIDL::ExceptionOr> main_fetch(JS::Realm& realm, Infra // - request’s current URL’s scheme is "http" request->current_url().scheme() == "http"sv // - request’s current URL’s host is a domain - && DOMURL::host_is_domain(request->current_url().host()) + && request->current_url().host().has_value() && DOMURL::host_is_domain(request->current_url().host().value()) // FIXME: - Matching request’s current URL’s host per Known HSTS Host Domain Name Matching results in either a // superdomain match with an asserted includeSubDomains directive or a congruent match (with or without an // asserted includeSubDomains directive) [HSTS]; or DNS resolution for the request finds a matching HTTPS RR diff --git a/Libraries/LibWeb/HTML/BrowsingContext.cpp b/Libraries/LibWeb/HTML/BrowsingContext.cpp index c9ca1fb51ee..a7d51c787d6 100644 --- a/Libraries/LibWeb/HTML/BrowsingContext.cpp +++ b/Libraries/LibWeb/HTML/BrowsingContext.cpp @@ -44,7 +44,7 @@ bool url_matches_about_blank(URL::URL const& url) && url.paths().size() == 1 && url.paths()[0] == "blank"sv && url.username().is_empty() && url.password().is_empty() - && url.host().has(); + && !url.host().has_value(); } // https://html.spec.whatwg.org/multipage/urls-and-fetching.html#matches-about:srcdoc @@ -56,7 +56,7 @@ bool url_matches_about_srcdoc(URL::URL const& url) && !url.query().has_value() && url.username().is_empty() && url.password().is_empty() - && url.host().has(); + && !url.host().has_value(); } // https://html.spec.whatwg.org/multipage/document-sequences.html#determining-the-origin diff --git a/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp b/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp index e2d3aee52b4..545839c6f84 100644 --- a/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp +++ b/Libraries/LibWeb/HTML/HTMLHyperlinkElementUtils.cpp @@ -167,7 +167,7 @@ String HTMLHyperlinkElementUtils::host() const auto const& url = m_url; // 3. If url or url's host is null, return the empty string. - if (!url.has_value() || url->host().has()) + if (!url.has_value() || !url->host().has_value()) return String {}; // 4. If url's port is null, return url's host, serialized. @@ -206,7 +206,8 @@ String HTMLHyperlinkElementUtils::hostname() const URL::URL url(href()); // 3. If url or url's host is null, return the empty string. - if (url.host().has()) + // FIXME: How can url be null here? + if (!url.host().has_value()) return String {}; // 4. Return url's host, serialized. diff --git a/Libraries/LibWeb/HTML/Location.cpp b/Libraries/LibWeb/HTML/Location.cpp index 6e0105b546f..3760e61f165 100644 --- a/Libraries/LibWeb/HTML/Location.cpp +++ b/Libraries/LibWeb/HTML/Location.cpp @@ -203,7 +203,7 @@ WebIDL::ExceptionOr Location::host() const auto url = this->url(); // 3. If url's host is null, return the empty string. - if (url.host().has()) + if (!url.host().has_value()) return String {}; // 4. If url's port is null, return url's host, serialized. @@ -233,7 +233,7 @@ WebIDL::ExceptionOr Location::hostname() const auto url = this->url(); // 2. If this's url's host is null, return the empty string. - if (url.host().has()) + if (!url.host().has_value()) return String {}; // 3. Return this's url's host, serialized. diff --git a/Libraries/LibWeb/HTML/WorkerLocation.cpp b/Libraries/LibWeb/HTML/WorkerLocation.cpp index 9a5232ccab5..f4902c64930 100644 --- a/Libraries/LibWeb/HTML/WorkerLocation.cpp +++ b/Libraries/LibWeb/HTML/WorkerLocation.cpp @@ -46,7 +46,7 @@ WebIDL::ExceptionOr WorkerLocation::host() const auto const& url = m_global_scope->url(); // 2. If url's host is null, return the empty string. - if (url.host().has()) + if (!url.host().has_value()) return String {}; // 3. If url's port is null, return url's host, serialized. @@ -67,11 +67,11 @@ WebIDL::ExceptionOr WorkerLocation::hostname() const auto const& host = m_global_scope->url().host(); // 2. If host is null, return the empty string. - if (host.has()) + if (!host.has_value()) return String {}; // 3. Return host, serialized. - return TRY_OR_THROW_OOM(vm, URL::Parser::serialize_host(host)); + return TRY_OR_THROW_OOM(vm, URL::Parser::serialize_host(host.value())); } // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-port diff --git a/Libraries/LibWeb/MixedContent/AbstractOperations.cpp b/Libraries/LibWeb/MixedContent/AbstractOperations.cpp index e51c88bb628..f31ad8342a8 100644 --- a/Libraries/LibWeb/MixedContent/AbstractOperations.cpp +++ b/Libraries/LibWeb/MixedContent/AbstractOperations.cpp @@ -20,7 +20,7 @@ void upgrade_a_mixed_content_request_to_a_potentially_trustworthy_url_if_appropr SecureContexts::is_url_potentially_trustworthy(request.url()) == SecureContexts::Trustworthiness::PotentiallyTrustworthy // 2. request’s URL’s host is an IP address. - || request.url().host().has() || request.url().host().has() + || (request.url().host().has_value() && (request.url().host()->has() || request.url().host()->has())) // 3. § 4.3 Does settings prohibit mixed security contexts? returns "Does Not Restrict Mixed Security Contents" when applied to request’s client. || does_settings_prohibit_mixed_security_contexts(request.client()) == ProhibitsMixedSecurityContexts::DoesNotRestrictMixedSecurityContexts diff --git a/Libraries/LibWeb/XHR/XMLHttpRequest.cpp b/Libraries/LibWeb/XHR/XMLHttpRequest.cpp index fd84dbf9a6f..026b6a6c9a8 100644 --- a/Libraries/LibWeb/XHR/XMLHttpRequest.cpp +++ b/Libraries/LibWeb/XHR/XMLHttpRequest.cpp @@ -494,7 +494,7 @@ WebIDL::ExceptionOr XMLHttpRequest::open(String const& method_string, Stri // NOTE: This is handled in the overload lacking the async argument. // 8. If parsedURL’s host is non-null, then: - if (!parsed_url.host().has()) { + if (parsed_url.host().has_value()) { // 1. If the username argument is not null, set the username given parsedURL and username. if (username.has_value()) parsed_url.set_username(username.value()); diff --git a/Libraries/LibWebView/CookieJar.cpp b/Libraries/LibWebView/CookieJar.cpp index 9203c78ebac..179d04a97c8 100644 --- a/Libraries/LibWebView/CookieJar.cpp +++ b/Libraries/LibWebView/CookieJar.cpp @@ -220,7 +220,7 @@ void CookieJar::expire_cookies_with_time_offset(AK::Duration offset) // https://www.ietf.org/archive/id/draft-ietf-httpbis-rfc6265bis-15.html#section-5.1.2 Optional CookieJar::canonicalize_domain(const URL::URL& url) { - if (!url.is_valid() || url.host().has()) + if (!url.is_valid() || !url.host().has_value()) return {}; // 1. Convert the host name to a sequence of individual domain name labels. diff --git a/Libraries/LibWebView/ViewImplementation.cpp b/Libraries/LibWebView/ViewImplementation.cpp index 553e15c754f..e2242cb39c3 100644 --- a/Libraries/LibWebView/ViewImplementation.cpp +++ b/Libraries/LibWebView/ViewImplementation.cpp @@ -570,7 +570,7 @@ void ViewImplementation::handle_web_content_process_crash(LoadErrorPage load_err builder.append(escape_html_entities(m_url.to_byte_string())); builder.append(""sv); builder.append("

Web page crashed"sv); - if (!m_url.host().has()) { + if (m_url.host().has_value()) { builder.appendff(" on {}", escape_html_entities(m_url.serialized_host().release_value_but_fixme_should_propagate_errors())); } builder.append("

"sv); diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp index 072c92f64d6..b1b225c4b41 100644 --- a/Tests/LibURL/TestURL.cpp +++ b/Tests/LibURL/TestURL.cpp @@ -206,7 +206,7 @@ TEST_CASE(about_url) URL::URL url("about:blank"sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "about"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.serialize_path(), "blank"); EXPECT(!url.query().has_value()); EXPECT(!url.fragment().has_value()); @@ -218,7 +218,7 @@ TEST_CASE(mailto_url) URL::URL url("mailto:mail@example.com"sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "mailto"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.port_or_default(), 0); EXPECT_EQ(url.path_segment_count(), 1u); EXPECT_EQ(url.path_segment_at_index(0), "mail@example.com"); @@ -232,7 +232,7 @@ TEST_CASE(mailto_url_with_subject) URL::URL url("mailto:mail@example.com?subject=test"sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "mailto"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.port_or_default(), 0); EXPECT_EQ(url.path_segment_count(), 1u); EXPECT_EQ(url.path_segment_at_index(0), "mail@example.com"); diff --git a/Tests/LibWeb/TestFetchURL.cpp b/Tests/LibWeb/TestFetchURL.cpp index 3c373fb82b7..031ab0ed1ac 100644 --- a/Tests/LibWeb/TestFetchURL.cpp +++ b/Tests/LibWeb/TestFetchURL.cpp @@ -14,7 +14,7 @@ TEST_CASE(data_url) URL::URL url("data:text/html,test"sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.serialize(), "data:text/html,test"); auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); @@ -27,7 +27,7 @@ TEST_CASE(data_url_default_mime_type) URL::URL url("data:,test"sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.serialize(), "data:,test"); auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); @@ -40,7 +40,7 @@ TEST_CASE(data_url_encoded) URL::URL url("data:text/html,Hello%20friends%2C%0X%X0"sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.serialize(), "data:text/html,Hello%20friends%2C%0X%X0"); auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); @@ -53,7 +53,7 @@ TEST_CASE(data_url_base64_encoded) URL::URL url("data:text/html;base64,dGVzdA=="sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.serialize(), "data:text/html;base64,dGVzdA=="); auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); @@ -66,7 +66,7 @@ TEST_CASE(data_url_base64_encoded_default_mime_type) URL::URL url("data:;base64,dGVzdA=="sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.serialize(), "data:;base64,dGVzdA=="); auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); @@ -79,7 +79,7 @@ TEST_CASE(data_url_base64_encoded_with_whitespace) URL::URL url("data: text/html ; bAsE64 , dGVz dA== "sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); EXPECT_EQ(url.serialize(), "data: text/html ; bAsE64 , dGVz dA=="); auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); @@ -92,7 +92,7 @@ TEST_CASE(data_url_base64_encoded_with_inline_whitespace) URL::URL url("data:text/javascript;base64,%20ZD%20Qg%0D%0APS%20An%20Zm91cic%0D%0A%207%20"sv); EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "data"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); EXPECT_EQ(data_url.mime_type.serialized(), "text/javascript"); @@ -105,7 +105,7 @@ TEST_CASE(data_url_completed_with_fragment) EXPECT(url.is_valid()); EXPECT_EQ(url.scheme(), "data"); EXPECT_EQ(url.fragment(), "a"); - EXPECT(url.host().has()); + EXPECT(!url.host().has_value()); auto data_url = TRY_OR_FAIL(Web::Fetch::Infrastructure::process_data_url(url)); EXPECT_EQ(data_url.mime_type.serialized(), "text/plain");