From 8e37cd2f71171d183856b62be8edd7632756c0b4 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Sat, 19 Apr 2025 17:11:24 +1200 Subject: [PATCH] LibURL: Remove URL's valid state No code now relies on using URL's valid state. A URL can still be _technically_ invalid through use of the URL constructor or by directly changing URL fields. However, all URLs should be constructed through the URL parser, and we should ideally be getting rid of the default constructor at some stage. Also, any code which is manually setting URL fields need to be aware that this is full of pitfalls since there are many different forms of canonicalization which is bypassed by not going through the URL parser. --- Libraries/LibURL/Parser.cpp | 1 - Libraries/LibURL/URL.cpp | 43 ------------------------------------- Libraries/LibURL/URL.h | 10 ++------- Tests/LibURL/TestURL.cpp | 5 ----- 4 files changed, 2 insertions(+), 57 deletions(-) diff --git a/Libraries/LibURL/Parser.cpp b/Libraries/LibURL/Parser.cpp index ed42a7854ba..8e075994f13 100644 --- a/Libraries/LibURL/Parser.cpp +++ b/Libraries/LibURL/Parser.cpp @@ -1663,7 +1663,6 @@ Optional Parser::basic_parse(StringView raw_input, Optional bas ++iterator; } - url->m_data->valid = true; dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsed URL to be '{}'.", url->serialize()); // 10. Return url. diff --git a/Libraries/LibURL/URL.cpp b/Libraries/LibURL/URL.cpp index d49a58491ce..cd8843dd2ea 100644 --- a/Libraries/LibURL/URL.cpp +++ b/Libraries/LibURL/URL.cpp @@ -24,9 +24,6 @@ namespace URL { Optional URL::complete_url(StringView relative_url) const { - if (!is_valid()) - return {}; - return Parser::basic_parse(relative_url, *this); } @@ -38,8 +35,6 @@ ByteString URL::path_segment_at_index(size_t index) const ByteString URL::basename() const { - if (!m_data->valid) - return {}; if (m_data->paths.is_empty()) return {}; auto& last_segment = m_data->paths.last(); @@ -49,7 +44,6 @@ ByteString URL::basename() const void URL::set_scheme(String scheme) { m_data->scheme = move(scheme); - m_data->valid = compute_validity(); } // https://url.spec.whatwg.org/#set-the-username @@ -57,7 +51,6 @@ void URL::set_username(StringView username) { // To set the username given a url and username, set url’s username to the result of running UTF-8 percent-encode on username using the userinfo percent-encode set. m_data->username = percent_encode(username, PercentEncodeSet::Userinfo); - m_data->valid = compute_validity(); } // https://url.spec.whatwg.org/#set-the-password @@ -65,13 +58,11 @@ void URL::set_password(StringView password) { // To set the password given a url and password, set url’s password to the result of running UTF-8 percent-encode on password using the userinfo percent-encode set. m_data->password = percent_encode(password, PercentEncodeSet::Userinfo); - m_data->valid = compute_validity(); } void URL::set_host(Host host) { m_data->host = move(host); - m_data->valid = compute_validity(); } // https://url.spec.whatwg.org/#concept-host-serializer @@ -87,7 +78,6 @@ void URL::set_port(Optional port) return; } m_data->port = move(port); - m_data->valid = compute_validity(); } void URL::set_paths(Vector const& paths) @@ -96,7 +86,6 @@ void URL::set_paths(Vector const& paths) m_data->paths.ensure_capacity(paths.size()); for (auto const& segment : paths) m_data->paths.unchecked_append(percent_encode(segment, PercentEncodeSet::Path)); - m_data->valid = compute_validity(); } void URL::append_path(StringView path) @@ -112,33 +101,6 @@ bool URL::cannot_have_a_username_or_password_or_port() const return !m_data->host.has_value() || m_data->host->is_empty_host() || m_data->scheme == "file"sv; } -// FIXME: This is by no means complete. -// NOTE: This relies on some assumptions about how the spec-defined URL parser works that may turn out to be wrong. -bool URL::compute_validity() const -{ - if (m_data->scheme.is_empty()) - return false; - - if (m_data->has_an_opaque_path) { - if (m_data->paths.size() != 1) - return false; - if (m_data->paths[0].is_empty()) - return false; - } else { - if (m_data->scheme.is_one_of("about", "mailto")) - return false; - // NOTE: Maybe it is allowed to have a zero-segment path. - if (m_data->paths.size() == 0) - return false; - } - - // FIXME: A file URL's host should be the empty string for localhost, not null. - if (m_data->scheme == "file" && !m_data->host.has_value()) - return false; - - return true; -} - // https://url.spec.whatwg.org/#default-port Optional default_port_for_scheme(StringView scheme) { @@ -333,8 +295,6 @@ String URL::serialize(ExcludeFragment exclude_fragment) const // resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible. ByteString URL::serialize_for_display() const { - VERIFY(m_data->valid); - StringBuilder builder; builder.append(m_data->scheme); builder.append(':'); @@ -422,8 +382,6 @@ bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const { if (this == &other) return true; - if (!m_data->valid || !other.m_data->valid) - return false; return serialize(exclude_fragments) == other.serialize(exclude_fragments); } @@ -495,7 +453,6 @@ String percent_encode(StringView input, PercentEncodeSet set, SpaceAsPlus space_ URL URL::about(String path) { URL url; - url.m_data->valid = true; url.m_data->scheme = "about"_string; url.m_data->paths = { move(path) }; url.m_data->has_an_opaque_path = true; diff --git a/Libraries/LibURL/URL.h b/Libraries/LibURL/URL.h index 40ae8637ad7..db2ccc04411 100644 --- a/Libraries/LibURL/URL.h +++ b/Libraries/LibURL/URL.h @@ -77,10 +77,9 @@ class URL { friend class Parser; public: + // FIXME: We should get rid of the default constructor, all URLs should be constructed through the Parser. URL() = default; - bool is_valid() const { return m_data->valid; } - String const& scheme() const { return m_data->scheme; } String const& username() const { return m_data->username; } String const& password() const { return m_data->password; } @@ -147,13 +146,10 @@ public: static URL about(String path); private: - bool compute_validity() const; - struct Data : public RefCounted { - NonnullRefPtr clone() + NonnullRefPtr clone() const { auto clone = adopt_ref(*new Data); - clone->valid = valid; clone->scheme = scheme; clone->username = username; clone->password = password; @@ -167,8 +163,6 @@ private: return clone; } - bool valid { false }; - // A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string. String scheme; diff --git a/Tests/LibURL/TestURL.cpp b/Tests/LibURL/TestURL.cpp index 81d1279c3ef..7c850964708 100644 --- a/Tests/LibURL/TestURL.cpp +++ b/Tests/LibURL/TestURL.cpp @@ -10,11 +10,6 @@ #include #include -TEST_CASE(construct) -{ - EXPECT_EQ(URL::URL().is_valid(), false); -} - TEST_CASE(basic) { {