LibURL: Remove URL's valid state
Some checks are pending
CI / Lagom (arm64, Sanitizer_CI, false, macos-15, macOS, Clang) (push) Waiting to run
CI / Lagom (x86_64, Fuzzers_CI, false, ubuntu-24.04, Linux, Clang) (push) Waiting to run
CI / Lagom (x86_64, Sanitizer_CI, false, ubuntu-24.04, Linux, GNU) (push) Waiting to run
CI / Lagom (x86_64, Sanitizer_CI, true, ubuntu-24.04, Linux, Clang) (push) Waiting to run
Package the js repl as a binary artifact / build-and-package (arm64, macos-15, macOS, macOS-universal2) (push) Waiting to run
Package the js repl as a binary artifact / build-and-package (x86_64, ubuntu-24.04, Linux, Linux-x86_64) (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run

No code now relies on using URL's valid state.

A URL can still be _technically_ invalid through use of the URL
constructor or by directly changing URL fields.

However, all URLs should be constructed through the URL parser,
and we should ideally be getting rid of the default constructor
at some stage.

Also, any code which is manually setting URL fields need to be
aware that this is full of pitfalls since there are many different
forms of canonicalization which is bypassed by not going through
the URL parser.
This commit is contained in:
Shannon Booth 2025-04-19 17:11:24 +12:00 committed by Tim Flynn
parent 8d0fb91450
commit 8e37cd2f71
Notes: github-actions[bot] 2025-04-19 11:19:39 +00:00
4 changed files with 2 additions and 57 deletions

View file

@ -1663,7 +1663,6 @@ Optional<URL> Parser::basic_parse(StringView raw_input, Optional<URL const&> bas
++iterator;
}
url->m_data->valid = true;
dbgln_if(URL_PARSER_DEBUG, "URL::Parser::basic_parse: Parsed URL to be '{}'.", url->serialize());
// 10. Return url.

View file

@ -24,9 +24,6 @@ namespace URL {
Optional<URL> URL::complete_url(StringView relative_url) const
{
if (!is_valid())
return {};
return Parser::basic_parse(relative_url, *this);
}
@ -38,8 +35,6 @@ ByteString URL::path_segment_at_index(size_t index) const
ByteString URL::basename() const
{
if (!m_data->valid)
return {};
if (m_data->paths.is_empty())
return {};
auto& last_segment = m_data->paths.last();
@ -49,7 +44,6 @@ ByteString URL::basename() const
void URL::set_scheme(String scheme)
{
m_data->scheme = move(scheme);
m_data->valid = compute_validity();
}
// https://url.spec.whatwg.org/#set-the-username
@ -57,7 +51,6 @@ void URL::set_username(StringView username)
{
// To set the username given a url and username, set urls username to the result of running UTF-8 percent-encode on username using the userinfo percent-encode set.
m_data->username = percent_encode(username, PercentEncodeSet::Userinfo);
m_data->valid = compute_validity();
}
// https://url.spec.whatwg.org/#set-the-password
@ -65,13 +58,11 @@ void URL::set_password(StringView password)
{
// To set the password given a url and password, set urls password to the result of running UTF-8 percent-encode on password using the userinfo percent-encode set.
m_data->password = percent_encode(password, PercentEncodeSet::Userinfo);
m_data->valid = compute_validity();
}
void URL::set_host(Host host)
{
m_data->host = move(host);
m_data->valid = compute_validity();
}
// https://url.spec.whatwg.org/#concept-host-serializer
@ -87,7 +78,6 @@ void URL::set_port(Optional<u16> port)
return;
}
m_data->port = move(port);
m_data->valid = compute_validity();
}
void URL::set_paths(Vector<ByteString> const& paths)
@ -96,7 +86,6 @@ void URL::set_paths(Vector<ByteString> const& paths)
m_data->paths.ensure_capacity(paths.size());
for (auto const& segment : paths)
m_data->paths.unchecked_append(percent_encode(segment, PercentEncodeSet::Path));
m_data->valid = compute_validity();
}
void URL::append_path(StringView path)
@ -112,33 +101,6 @@ bool URL::cannot_have_a_username_or_password_or_port() const
return !m_data->host.has_value() || m_data->host->is_empty_host() || m_data->scheme == "file"sv;
}
// FIXME: This is by no means complete.
// NOTE: This relies on some assumptions about how the spec-defined URL parser works that may turn out to be wrong.
bool URL::compute_validity() const
{
if (m_data->scheme.is_empty())
return false;
if (m_data->has_an_opaque_path) {
if (m_data->paths.size() != 1)
return false;
if (m_data->paths[0].is_empty())
return false;
} else {
if (m_data->scheme.is_one_of("about", "mailto"))
return false;
// NOTE: Maybe it is allowed to have a zero-segment path.
if (m_data->paths.size() == 0)
return false;
}
// FIXME: A file URL's host should be the empty string for localhost, not null.
if (m_data->scheme == "file" && !m_data->host.has_value())
return false;
return true;
}
// https://url.spec.whatwg.org/#default-port
Optional<u16> default_port_for_scheme(StringView scheme)
{
@ -333,8 +295,6 @@ String URL::serialize(ExcludeFragment exclude_fragment) const
// resulting from percent-decoding those sequences converted to bytes, unless that renders those sequences invisible.
ByteString URL::serialize_for_display() const
{
VERIFY(m_data->valid);
StringBuilder builder;
builder.append(m_data->scheme);
builder.append(':');
@ -422,8 +382,6 @@ bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const
{
if (this == &other)
return true;
if (!m_data->valid || !other.m_data->valid)
return false;
return serialize(exclude_fragments) == other.serialize(exclude_fragments);
}
@ -495,7 +453,6 @@ String percent_encode(StringView input, PercentEncodeSet set, SpaceAsPlus space_
URL URL::about(String path)
{
URL url;
url.m_data->valid = true;
url.m_data->scheme = "about"_string;
url.m_data->paths = { move(path) };
url.m_data->has_an_opaque_path = true;

View file

@ -77,10 +77,9 @@ class URL {
friend class Parser;
public:
// FIXME: We should get rid of the default constructor, all URLs should be constructed through the Parser.
URL() = default;
bool is_valid() const { return m_data->valid; }
String const& scheme() const { return m_data->scheme; }
String const& username() const { return m_data->username; }
String const& password() const { return m_data->password; }
@ -147,13 +146,10 @@ public:
static URL about(String path);
private:
bool compute_validity() const;
struct Data : public RefCounted<Data> {
NonnullRefPtr<Data> clone()
NonnullRefPtr<Data> clone() const
{
auto clone = adopt_ref(*new Data);
clone->valid = valid;
clone->scheme = scheme;
clone->username = username;
clone->password = password;
@ -167,8 +163,6 @@ private:
return clone;
}
bool valid { false };
// A URLs scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for further processing after parsing. It is initially the empty string.
String scheme;

View file

@ -10,11 +10,6 @@
#include <LibURL/Parser.h>
#include <LibURL/URL.h>
TEST_CASE(construct)
{
EXPECT_EQ(URL::URL().is_valid(), false);
}
TEST_CASE(basic)
{
{