LibWeb+LibURL: Consolidate Origin parsing and serialization into LibURL

Because of the previous awkward factoring of Origin we had two
implementations of Origin serializing and creation. Move the
implementation of DOMURL::url_origin into URL::origin, and
instead use the implemenation of URL::Origin::serialize for
serialization (replacing URL::serialize_origin).

This happens to fix 8 URL subtests as the two implemenations had
diverged, and URL::serialize_origin was previously missing the spec
changes of: whatwg/url@eee49fd and whatwg/url@fff33c3
This commit is contained in:
Shannon Booth 2024-10-05 17:03:51 +13:00 committed by Andreas Kling
commit 501f92b54e
Notes: github-actions[bot] 2024-10-05 08:47:31 +00:00
21 changed files with 77 additions and 101 deletions

View file

@ -0,0 +1,7 @@
<a id="a" href='blob:ftp://host/path'>
<script src="../include.js"></script>
<script>
test(() => {
println(document.getElementById("a").origin);
})
</script>

View file

@ -352,32 +352,54 @@ ErrorOr<String> URL::to_string() const
return String::from_byte_string(serialize());
}
// https://html.spec.whatwg.org/multipage/origin.html#ascii-serialisation-of-an-origin
// https://url.spec.whatwg.org/#concept-url-origin
ByteString URL::serialize_origin() const
Origin URL::origin() const
{
VERIFY(m_data->valid);
// The origin of a URL url is the origin returned by running these steps, switching on urls scheme:
// -> "blob"
if (scheme() == "blob"sv) {
auto url_string = to_string().release_value_but_fixme_should_propagate_errors();
if (m_data->scheme == "blob"sv) {
// TODO: 1. If URLs blob URL entry is non-null, then return URLs blob URL entrys environments origin.
// 2. Let url be the result of parsing URLs path[0].
VERIFY(!m_data->paths.is_empty());
URL url = m_data->paths[0];
// 3. Return a new opaque origin, if url is failure, and urls origin otherwise.
if (!url.is_valid())
return "null";
return url.serialize_origin();
} else if (!m_data->scheme.is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin."
return "null";
// 1. If urls blob URL entry is non-null, then return urls blob URL entrys environments origin.
if (blob_url_entry().has_value())
return blob_url_entry()->environment_origin;
// 2. Let pathURL be the result of parsing the result of URL path serializing url.
auto path_url = Parser::basic_parse(serialize_path());
// 3. If pathURL is failure, then return a new opaque origin.
if (!path_url.is_valid())
return Origin {};
// 4. If pathURLs scheme is "http", "https", or "file", then return pathURLs origin.
if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
return path_url.origin();
// 5. Return a new opaque origin.
return Origin {};
}
StringBuilder builder;
builder.append(m_data->scheme);
builder.append("://"sv);
builder.append(serialized_host().release_value_but_fixme_should_propagate_errors());
if (m_data->port.has_value())
builder.appendff(":{}", *m_data->port);
return builder.to_byte_string();
// -> "ftp"
// -> "http"
// -> "https"
// -> "ws"
// -> "wss"
if (scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
// Return the tuple origin (urls scheme, urls host, urls port, null).
return Origin(scheme().to_byte_string(), host(), port().value_or(0));
}
// -> "file"
// AD-HOC: Our resource:// is basically an alias to file://
if (scheme() == "file"sv || scheme() == "resource"sv) {
// Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
// Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
return Origin(scheme().to_byte_string(), String {}, 0);
}
// -> Otherwise
// Return a new opaque origin.
return Origin {};
}
bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const

View file

@ -122,8 +122,7 @@ public:
ByteString to_byte_string() const { return serialize(); }
ErrorOr<String> to_string() const;
// HTML origin
ByteString serialize_origin() const;
Origin origin() const;
bool equals(URL const& other, ExcludeFragment = ExcludeFragment::No) const;

View file

@ -141,7 +141,7 @@ WebIDL::ExceptionOr<void> DOMURL::revoke_object_url(JS::VM& vm, StringView url)
return {};
// 3. Let origin be the origin of url record.
auto origin = url_origin(url_record);
auto origin = url_record.origin();
// 4. Let settings be the current settings object.
auto& settings = HTML::current_settings_object();
@ -218,7 +218,7 @@ WebIDL::ExceptionOr<String> DOMURL::origin() const
auto& vm = realm().vm();
// The origin getter steps are to return the serialization of thiss URLs origin. [HTML]
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.serialize_origin()));
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.origin().serialize()));
}
// https://url.spec.whatwg.org/#dom-url-protocol
@ -478,58 +478,6 @@ void DOMURL::set_hash(String const& hash)
(void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment);
}
// https://url.spec.whatwg.org/#concept-url-origin
URL::Origin url_origin(URL::URL const& url)
{
// FIXME: We should probably have an extended version of URL::URL for LibWeb instead of standalone functions like this.
// The origin of a URL url is the origin returned by running these steps, switching on urls scheme:
// -> "blob"
if (url.scheme() == "blob"sv) {
auto url_string = url.to_string().release_value_but_fixme_should_propagate_errors();
// 1. If urls blob URL entry is non-null, then return urls blob URL entrys environments origin.
if (url.blob_url_entry().has_value())
return url.blob_url_entry()->environment_origin;
// 2. Let pathURL be the result of parsing the result of URL path serializing url.
auto path_url = parse(url.serialize_path());
// 3. If pathURL is failure, then return a new opaque origin.
if (!path_url.is_valid())
return URL::Origin {};
// 4. If pathURLs scheme is "http", "https", or "file", then return pathURLs origin.
if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
return url_origin(path_url);
// 5. Return a new opaque origin.
return URL::Origin {};
}
// -> "ftp"
// -> "http"
// -> "https"
// -> "ws"
// -> "wss"
if (url.scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
// Return the tuple origin (urls scheme, urls host, urls port, null).
return URL::Origin(url.scheme().to_byte_string(), url.host(), url.port().value_or(0));
}
// -> "file"
// AD-HOC: Our resource:// is basically an alias to file://
if (url.scheme() == "file"sv || url.scheme() == "resource"sv) {
// Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
// Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
return URL::Origin(url.scheme().to_byte_string(), String {}, 0);
}
// -> Otherwise
// Return a new opaque origin.
return URL::Origin {};
}
// https://url.spec.whatwg.org/#concept-domain
bool host_is_domain(URL::Host const& host)
{

View file

@ -92,7 +92,6 @@ private:
JS::NonnullGCPtr<URLSearchParams> m_query;
};
URL::Origin url_origin(URL::URL const&);
bool host_is_domain(URL::Host const&);
// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path

View file

@ -70,7 +70,7 @@ bool tao_check(Infrastructure::Request const& request, Infrastructure::Response
// information, but the container document would not.
if (request.mode() == Infrastructure::Request::Mode::Navigate
&& request.origin().has<URL::Origin>()
&& !DOMURL::url_origin(request.current_url()).is_same_origin(request.origin().get<URL::Origin>())) {
&& !request.current_url().origin().is_same_origin(request.origin().get<URL::Origin>())) {
return false;
}

View file

@ -353,7 +353,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> main_fetch(JS::Realm& realm, Inf
// -> requests current URLs scheme is "data"
// -> requests mode is "navigate" or "websocket"
else if (
(request->origin().has<URL::Origin>() && DOMURL::url_origin(request->current_url()).is_same_origin(request->origin().get<URL::Origin>()) && request->response_tainting() == Infrastructure::Request::ResponseTainting::Basic)
(request->origin().has<URL::Origin>() && request->current_url().origin().is_same_origin(request->origin().get<URL::Origin>()) && request->response_tainting() == Infrastructure::Request::ResponseTainting::Basic)
|| request->current_url().scheme() == "data"sv
|| (request->mode() == Infrastructure::Request::Mode::Navigate || request->mode() == Infrastructure::Request::Mode::WebSocket)) {
// 1. Set requests response tainting to "basic".
@ -1201,7 +1201,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> http_redirect_fetch(JS::Realm& r
if (request->mode() == Infrastructure::Request::Mode::CORS
&& location_url.includes_credentials()
&& request->origin().has<URL::Origin>()
&& !request->origin().get<URL::Origin>().is_same_origin(DOMURL::url_origin(location_url))) {
&& !request->origin().get<URL::Origin>().is_same_origin(location_url.origin())) {
return PendingResponse::create(vm, request, Infrastructure::Response::network_error(vm, "Request with 'cors' mode and different URL and request origin must not include credentials in redirect URL"sv));
}
@ -1244,7 +1244,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> http_redirect_fetch(JS::Realm& r
// 13. If requests current URLs origin is not same origin with locationURLs origin, then for each headerName of
// CORS non-wildcard request-header name, delete headerName from requests header list.
// NOTE: I.e., the moment another origin is seen after the initial request, the `Authorization` header is removed.
if (!DOMURL::url_origin(request->current_url()).is_same_origin(DOMURL::url_origin(location_url))) {
if (!request->current_url().origin().is_same_origin(location_url.origin())) {
static constexpr Array cors_non_wildcard_request_header_names {
"Authorization"sv
};
@ -2578,7 +2578,7 @@ void set_sec_fetch_site_header(Infrastructure::Request& request)
if (!header_value.equals_ignoring_ascii_case("none"sv)) {
for (auto& url : request.url_list()) {
// 1. If url is same origin with rs origin, continue.
if (DOMURL::url_origin(url).is_same_origin(DOMURL::url_origin(request.current_url())))
if (url.origin().is_same_origin(request.current_url().origin()))
continue;
// 2. Set headers value to cross-site.

View file

@ -173,8 +173,8 @@ bool Request::has_redirect_tainted_origin() const
// 2. If urls origin is not same origin with lastURLs origin and requests origin is not same origin with lastURLs origin, then return true.
auto const* request_origin = m_origin.get_pointer<URL::Origin>();
if (!DOMURL::url_origin(url).is_same_origin(DOMURL::url_origin(*last_url))
&& (request_origin == nullptr || !request_origin->is_same_origin(DOMURL::url_origin(*last_url)))) {
if (!url.origin().is_same_origin(last_url->origin())
&& (request_origin == nullptr || !request_origin->is_same_origin(last_url->origin()))) {
return true;
}
@ -328,7 +328,7 @@ void Request::add_origin_header()
case ReferrerPolicy::ReferrerPolicy::SameOrigin:
// If requests origin is not same origin with requests current URLs origin, then set serializedOrigin
// to `null`.
if (m_origin.has<URL::Origin>() && !m_origin.get<URL::Origin>().is_same_origin(DOMURL::url_origin(current_url())))
if (m_origin.has<URL::Origin>() && !m_origin.get<URL::Origin>().is_same_origin(current_url().origin()))
serialized_origin = MUST(ByteBuffer::copy("null"sv.bytes()));
break;
// -> Otherwise
@ -368,7 +368,7 @@ bool Request::cross_origin_embedder_policy_allows_credentials() const
if (request_origin == nullptr)
return false;
return request_origin->is_same_origin(DOMURL::url_origin(current_url())) && !has_redirect_tainted_origin();
return request_origin->is_same_origin(current_url().origin()) && !has_redirect_tainted_origin();
}
StringView request_destination_to_string(Request::Destination destination)

View file

@ -309,7 +309,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<Request>> Request::construct_impl(JS::Realm
// - parsedReferrers scheme is "about" and path is the string "client"
// - parsedReferrers origin is not same origin with origin
// then set requests referrer to "client".
auto parsed_referrer_origin = DOMURL::url_origin(parsed_referrer);
auto parsed_referrer_origin = parsed_referrer.origin();
if ((parsed_referrer.scheme() == "about"sv && parsed_referrer.paths().size() == 1 && parsed_referrer.paths()[0] == "client"sv)
|| !parsed_referrer_origin.is_same_origin(origin)) {
request->set_referrer(Infrastructure::Request::Referrer::Client);

View file

@ -85,7 +85,7 @@ URL::Origin determine_the_origin(Optional<URL::URL> const& url, SandboxingFlagSe
return source_origin.release_value();
// 5. Return url's origin.
return DOMURL::url_origin(*url);
return url->origin();
}
// https://html.spec.whatwg.org/multipage/document-sequences.html#creating-a-new-auxiliary-browsing-context

View file

@ -449,7 +449,7 @@ void EventSource::dispatch_the_event()
// the value of the event type buffer.
MessageEventInit init {};
init.data = JS::PrimitiveString::create(vm(), data_buffer);
init.origin = MUST(String::from_byte_string(m_url.serialize_origin()));
init.origin = MUST(String::from_byte_string(m_url.origin().serialize()));
init.last_event_id = last_event_id;
auto type = m_event_type.is_empty() ? HTML::EventNames::message : m_event_type;

View file

@ -50,7 +50,7 @@ String HTMLHyperlinkElementUtils::origin() const
return String {};
// 3. Return the serialization of this element's url's origin.
return MUST(String::from_byte_string(m_url->serialize_origin()));
return MUST(String::from_byte_string(m_url->origin().serialize()));
}
// https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-protocol

View file

@ -139,7 +139,7 @@ WebIDL::ExceptionOr<String> Location::origin() const
return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string);
// 2. Return the serialization of this's url's origin.
return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().serialize_origin()));
return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().origin().serialize()));
}
// https://html.spec.whatwg.org/multipage/history.html#dom-location-protocol

View file

@ -1033,7 +1033,7 @@ WebIDL::ExceptionOr<void> Window::window_post_message_steps(JS::Value message, W
return WebIDL::SyntaxError::create(target_realm, MUST(String::formatted("Invalid URL for targetOrigin: '{}'", options.target_origin)));
// 3. Set targetOrigin to parsedURL's origin.
target_origin = DOMURL::url_origin(parsed_url);
target_origin = parsed_url.origin();
}
// 6. Let transfer be options["transfer"].

View file

@ -26,7 +26,7 @@ WebIDL::ExceptionOr<String> WorkerLocation::origin() const
{
auto& vm = realm().vm();
// The origin getter steps are to return the serialization of this's WorkerGlobalScope object's url's origin.
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().serialize_origin()));
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().origin().serialize()));
}
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-protocol

View file

@ -143,7 +143,7 @@ void Internals::spoof_current_url(String const& url_string)
VERIFY(url.is_valid());
auto origin = DOMURL::url_origin(url);
auto origin = url.origin();
auto& window = internals_window();
window.associated_document().set_url(url);

View file

@ -82,7 +82,7 @@ ErrorOr<void> AutoplayAllowlist::enable_for_origins(ReadonlySpan<String> origins
continue;
}
TRY(allowlist.try_append(DOMURL::url_origin(url)));
TRY(allowlist.try_append(url.origin()));
}
return {};

View file

@ -146,7 +146,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
case ReferrerPolicy::StrictOriginWhenCrossOrigin:
// 1. If the origin of referrerURL and the origin of requests current URL are the same, then return
// referrerURL.
if (referrer_url.has_value() && DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url())))
if (referrer_url.has_value() && referrer_url->origin().is_same_origin(request.current_url().origin()))
return referrer_url;
// 2. If referrerURL is a potentially trustworthy URL and requests current URL is not a potentially
@ -164,7 +164,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
// 1. If the origin of referrerURL and the origin of requests current URL are the same, then return
// referrerURL.
if (referrer_url.has_value()
&& DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url()))) {
&& referrer_url->origin().is_same_origin(request.current_url().origin())) {
return referrer_url;
}
@ -175,7 +175,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
// 1. If the origin of referrerURL and the origin of requests current URL are the same, then return
// referrerURL.
if (referrer_url.has_value()
&& DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url()))) {
&& referrer_url->origin().is_same_origin(request.current_url().origin())) {
return referrer_url;
}

View file

@ -80,7 +80,7 @@ Trustworthiness is_url_potentially_trustworthy(URL::URL const& url)
return Trustworthiness::PotentiallyTrustworthy;
// 3. Return the result of executing §3.1 Is origin potentially trustworthy? on urls origin.
return is_origin_potentially_trustworthy(DOMURL::url_origin(url));
return is_origin_potentially_trustworthy(url.origin());
}
}

View file

@ -36,7 +36,7 @@ StorageKey obtain_a_storage_key_for_non_storage_purposes(HTML::Environment const
auto& mutable_settings = const_cast<HTML::EnvironmentSettingsObject&>(settings);
return { mutable_settings.origin() };
}
return { DOMURL::url_origin(environment.creation_url) };
return { environment.creation_url.origin() };
}
}