ladybird/Libraries/LibURL/Parser.h
Shannon Booth 5bed8f4055 LibURL+LibWeb: Make URL::basic_parse return an Optional<URL>
URL::basic_parse has a subtle bug where the resulting URL is not set
to valid when StateOveride is provided and the URL parser early returns
a valid URL.

This has not surfaced as a problem so far, as the only users of the
state override API provide an already valid URL buffer and also ignore
the result of basic parsing with a state override.

However, this bug surfaces implementing the URL pattern spec, which as
part of URL canonicalization:
 * Provides a dummy URL record
 * Basic URL parses that URL with state override
 * Checks the result of the URL parser to validate the URL

While we could set URL validity on every early return of the URL parser
during state override, it has been a long standing FIXME around the code
to try and remove the awkward validity state of the URL class. So this
commit makes the first stage of this change by migrating the basic
parser API to return Optional, which also happens to make this subtle
issue not a problem any more.
2025-01-11 10:08:29 -05:00

72 lines
2.2 KiB
C++

/*
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
* Copyright (c) 2023-2024, Shannon Booth <shannon@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Optional.h>
#include <AK/StringView.h>
#include <LibTextCodec/Encoder.h>
#include <LibURL/URL.h>
namespace URL {
#define ENUMERATE_STATES \
STATE(SchemeStart) \
STATE(Scheme) \
STATE(NoScheme) \
STATE(SpecialRelativeOrAuthority) \
STATE(PathOrAuthority) \
STATE(Relative) \
STATE(RelativeSlash) \
STATE(SpecialAuthoritySlashes) \
STATE(SpecialAuthorityIgnoreSlashes) \
STATE(Authority) \
STATE(Host) \
STATE(Hostname) \
STATE(Port) \
STATE(File) \
STATE(FileSlash) \
STATE(FileHost) \
STATE(PathStart) \
STATE(Path) \
STATE(CannotBeABaseUrlPath) \
STATE(Query) \
STATE(Fragment)
class Parser {
public:
enum class State {
#define STATE(state) state,
ENUMERATE_STATES
#undef STATE
};
static char const* state_name(State const& state)
{
switch (state) {
#define STATE(state) \
case State::state: \
return #state;
ENUMERATE_STATES
#undef STATE
}
VERIFY_NOT_REACHED();
}
// https://url.spec.whatwg.org/#concept-basic-url-parser
static Optional<URL> basic_parse(StringView input, Optional<URL const&> base_url = {}, URL* url = nullptr, Optional<State> state_override = {}, Optional<StringView> encoding = {});
// https://url.spec.whatwg.org/#string-percent-encode-after-encoding
static String percent_encode_after_encoding(TextCodec::Encoder&, StringView input, PercentEncodeSet percent_encode_set, bool space_as_plus = false);
// https://url.spec.whatwg.org/#shorten-a-urls-path
static void shorten_urls_path(URL&);
};
#undef ENUMERATE_STATES
}