LibWeb+LibURL: Consolidate Origin parsing and serialization into LibURL

Because of the previous awkward factoring of Origin we had two
implementations of Origin serializing and creation. Move the
implementation of DOMURL::url_origin into URL::origin, and
instead use the implemenation of URL::Origin::serialize for
serialization (replacing URL::serialize_origin).

This happens to fix 8 URL subtests as the two implemenations had
diverged, and URL::serialize_origin was previously missing the spec
changes of: whatwg/url@eee49fd and whatwg/url@fff33c3
This commit is contained in:
Shannon Booth 2024-10-05 17:03:51 +13:00 committed by Andreas Kling
commit 501f92b54e
Notes: github-actions[bot] 2024-10-05 08:47:31 +00:00
21 changed files with 77 additions and 101 deletions

View file

@ -0,0 +1,7 @@
<a id="a" href='blob:ftp://host/path'>
<script src="../include.js"></script>
<script>
test(() => {
println(document.getElementById("a").origin);
})
</script>

View file

@ -352,32 +352,54 @@ ErrorOr<String> URL::to_string() const
return String::from_byte_string(serialize()); return String::from_byte_string(serialize());
} }
// https://html.spec.whatwg.org/multipage/origin.html#ascii-serialisation-of-an-origin
// https://url.spec.whatwg.org/#concept-url-origin // https://url.spec.whatwg.org/#concept-url-origin
ByteString URL::serialize_origin() const Origin URL::origin() const
{ {
VERIFY(m_data->valid); // The origin of a URL url is the origin returned by running these steps, switching on urls scheme:
// -> "blob"
if (scheme() == "blob"sv) {
auto url_string = to_string().release_value_but_fixme_should_propagate_errors();
if (m_data->scheme == "blob"sv) { // 1. If urls blob URL entry is non-null, then return urls blob URL entrys environments origin.
// TODO: 1. If URLs blob URL entry is non-null, then return URLs blob URL entrys environments origin. if (blob_url_entry().has_value())
// 2. Let url be the result of parsing URLs path[0]. return blob_url_entry()->environment_origin;
VERIFY(!m_data->paths.is_empty());
URL url = m_data->paths[0]; // 2. Let pathURL be the result of parsing the result of URL path serializing url.
// 3. Return a new opaque origin, if url is failure, and urls origin otherwise. auto path_url = Parser::basic_parse(serialize_path());
if (!url.is_valid())
return "null"; // 3. If pathURL is failure, then return a new opaque origin.
return url.serialize_origin(); if (!path_url.is_valid())
} else if (!m_data->scheme.is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) { // file: "Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin." return Origin {};
return "null";
// 4. If pathURLs scheme is "http", "https", or "file", then return pathURLs origin.
if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
return path_url.origin();
// 5. Return a new opaque origin.
return Origin {};
} }
StringBuilder builder; // -> "ftp"
builder.append(m_data->scheme); // -> "http"
builder.append("://"sv); // -> "https"
builder.append(serialized_host().release_value_but_fixme_should_propagate_errors()); // -> "ws"
if (m_data->port.has_value()) // -> "wss"
builder.appendff(":{}", *m_data->port); if (scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
return builder.to_byte_string(); // Return the tuple origin (urls scheme, urls host, urls port, null).
return Origin(scheme().to_byte_string(), host(), port().value_or(0));
}
// -> "file"
// AD-HOC: Our resource:// is basically an alias to file://
if (scheme() == "file"sv || scheme() == "resource"sv) {
// Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
// Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
return Origin(scheme().to_byte_string(), String {}, 0);
}
// -> Otherwise
// Return a new opaque origin.
return Origin {};
} }
bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const bool URL::equals(URL const& other, ExcludeFragment exclude_fragments) const

View file

@ -122,8 +122,7 @@ public:
ByteString to_byte_string() const { return serialize(); } ByteString to_byte_string() const { return serialize(); }
ErrorOr<String> to_string() const; ErrorOr<String> to_string() const;
// HTML origin Origin origin() const;
ByteString serialize_origin() const;
bool equals(URL const& other, ExcludeFragment = ExcludeFragment::No) const; bool equals(URL const& other, ExcludeFragment = ExcludeFragment::No) const;

View file

@ -141,7 +141,7 @@ WebIDL::ExceptionOr<void> DOMURL::revoke_object_url(JS::VM& vm, StringView url)
return {}; return {};
// 3. Let origin be the origin of url record. // 3. Let origin be the origin of url record.
auto origin = url_origin(url_record); auto origin = url_record.origin();
// 4. Let settings be the current settings object. // 4. Let settings be the current settings object.
auto& settings = HTML::current_settings_object(); auto& settings = HTML::current_settings_object();
@ -218,7 +218,7 @@ WebIDL::ExceptionOr<String> DOMURL::origin() const
auto& vm = realm().vm(); auto& vm = realm().vm();
// The origin getter steps are to return the serialization of thiss URLs origin. [HTML] // The origin getter steps are to return the serialization of thiss URLs origin. [HTML]
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.serialize_origin())); return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_url.origin().serialize()));
} }
// https://url.spec.whatwg.org/#dom-url-protocol // https://url.spec.whatwg.org/#dom-url-protocol
@ -478,58 +478,6 @@ void DOMURL::set_hash(String const& hash)
(void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment); (void)URL::Parser::basic_parse(input, {}, &m_url, URL::Parser::State::Fragment);
} }
// https://url.spec.whatwg.org/#concept-url-origin
URL::Origin url_origin(URL::URL const& url)
{
// FIXME: We should probably have an extended version of URL::URL for LibWeb instead of standalone functions like this.
// The origin of a URL url is the origin returned by running these steps, switching on urls scheme:
// -> "blob"
if (url.scheme() == "blob"sv) {
auto url_string = url.to_string().release_value_but_fixme_should_propagate_errors();
// 1. If urls blob URL entry is non-null, then return urls blob URL entrys environments origin.
if (url.blob_url_entry().has_value())
return url.blob_url_entry()->environment_origin;
// 2. Let pathURL be the result of parsing the result of URL path serializing url.
auto path_url = parse(url.serialize_path());
// 3. If pathURL is failure, then return a new opaque origin.
if (!path_url.is_valid())
return URL::Origin {};
// 4. If pathURLs scheme is "http", "https", or "file", then return pathURLs origin.
if (path_url.scheme().is_one_of("http"sv, "https"sv, "file"sv))
return url_origin(path_url);
// 5. Return a new opaque origin.
return URL::Origin {};
}
// -> "ftp"
// -> "http"
// -> "https"
// -> "ws"
// -> "wss"
if (url.scheme().is_one_of("ftp"sv, "http"sv, "https"sv, "ws"sv, "wss"sv)) {
// Return the tuple origin (urls scheme, urls host, urls port, null).
return URL::Origin(url.scheme().to_byte_string(), url.host(), url.port().value_or(0));
}
// -> "file"
// AD-HOC: Our resource:// is basically an alias to file://
if (url.scheme() == "file"sv || url.scheme() == "resource"sv) {
// Unfortunate as it is, this is left as an exercise to the reader. When in doubt, return a new opaque origin.
// Note: We must return an origin with the `file://' protocol for `file://' iframes to work from `file://' pages.
return URL::Origin(url.scheme().to_byte_string(), String {}, 0);
}
// -> Otherwise
// Return a new opaque origin.
return URL::Origin {};
}
// https://url.spec.whatwg.org/#concept-domain // https://url.spec.whatwg.org/#concept-domain
bool host_is_domain(URL::Host const& host) bool host_is_domain(URL::Host const& host)
{ {

View file

@ -92,7 +92,6 @@ private:
JS::NonnullGCPtr<URLSearchParams> m_query; JS::NonnullGCPtr<URLSearchParams> m_query;
}; };
URL::Origin url_origin(URL::URL const&);
bool host_is_domain(URL::Host const&); bool host_is_domain(URL::Host const&);
// https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path // https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path

View file

@ -70,7 +70,7 @@ bool tao_check(Infrastructure::Request const& request, Infrastructure::Response
// information, but the container document would not. // information, but the container document would not.
if (request.mode() == Infrastructure::Request::Mode::Navigate if (request.mode() == Infrastructure::Request::Mode::Navigate
&& request.origin().has<URL::Origin>() && request.origin().has<URL::Origin>()
&& !DOMURL::url_origin(request.current_url()).is_same_origin(request.origin().get<URL::Origin>())) { && !request.current_url().origin().is_same_origin(request.origin().get<URL::Origin>())) {
return false; return false;
} }

View file

@ -353,7 +353,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> main_fetch(JS::Realm& realm, Inf
// -> requests current URLs scheme is "data" // -> requests current URLs scheme is "data"
// -> requests mode is "navigate" or "websocket" // -> requests mode is "navigate" or "websocket"
else if ( else if (
(request->origin().has<URL::Origin>() && DOMURL::url_origin(request->current_url()).is_same_origin(request->origin().get<URL::Origin>()) && request->response_tainting() == Infrastructure::Request::ResponseTainting::Basic) (request->origin().has<URL::Origin>() && request->current_url().origin().is_same_origin(request->origin().get<URL::Origin>()) && request->response_tainting() == Infrastructure::Request::ResponseTainting::Basic)
|| request->current_url().scheme() == "data"sv || request->current_url().scheme() == "data"sv
|| (request->mode() == Infrastructure::Request::Mode::Navigate || request->mode() == Infrastructure::Request::Mode::WebSocket)) { || (request->mode() == Infrastructure::Request::Mode::Navigate || request->mode() == Infrastructure::Request::Mode::WebSocket)) {
// 1. Set requests response tainting to "basic". // 1. Set requests response tainting to "basic".
@ -1201,7 +1201,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> http_redirect_fetch(JS::Realm& r
if (request->mode() == Infrastructure::Request::Mode::CORS if (request->mode() == Infrastructure::Request::Mode::CORS
&& location_url.includes_credentials() && location_url.includes_credentials()
&& request->origin().has<URL::Origin>() && request->origin().has<URL::Origin>()
&& !request->origin().get<URL::Origin>().is_same_origin(DOMURL::url_origin(location_url))) { && !request->origin().get<URL::Origin>().is_same_origin(location_url.origin())) {
return PendingResponse::create(vm, request, Infrastructure::Response::network_error(vm, "Request with 'cors' mode and different URL and request origin must not include credentials in redirect URL"sv)); return PendingResponse::create(vm, request, Infrastructure::Response::network_error(vm, "Request with 'cors' mode and different URL and request origin must not include credentials in redirect URL"sv));
} }
@ -1244,7 +1244,7 @@ WebIDL::ExceptionOr<JS::GCPtr<PendingResponse>> http_redirect_fetch(JS::Realm& r
// 13. If requests current URLs origin is not same origin with locationURLs origin, then for each headerName of // 13. If requests current URLs origin is not same origin with locationURLs origin, then for each headerName of
// CORS non-wildcard request-header name, delete headerName from requests header list. // CORS non-wildcard request-header name, delete headerName from requests header list.
// NOTE: I.e., the moment another origin is seen after the initial request, the `Authorization` header is removed. // NOTE: I.e., the moment another origin is seen after the initial request, the `Authorization` header is removed.
if (!DOMURL::url_origin(request->current_url()).is_same_origin(DOMURL::url_origin(location_url))) { if (!request->current_url().origin().is_same_origin(location_url.origin())) {
static constexpr Array cors_non_wildcard_request_header_names { static constexpr Array cors_non_wildcard_request_header_names {
"Authorization"sv "Authorization"sv
}; };
@ -2578,7 +2578,7 @@ void set_sec_fetch_site_header(Infrastructure::Request& request)
if (!header_value.equals_ignoring_ascii_case("none"sv)) { if (!header_value.equals_ignoring_ascii_case("none"sv)) {
for (auto& url : request.url_list()) { for (auto& url : request.url_list()) {
// 1. If url is same origin with rs origin, continue. // 1. If url is same origin with rs origin, continue.
if (DOMURL::url_origin(url).is_same_origin(DOMURL::url_origin(request.current_url()))) if (url.origin().is_same_origin(request.current_url().origin()))
continue; continue;
// 2. Set headers value to cross-site. // 2. Set headers value to cross-site.

View file

@ -173,8 +173,8 @@ bool Request::has_redirect_tainted_origin() const
// 2. If urls origin is not same origin with lastURLs origin and requests origin is not same origin with lastURLs origin, then return true. // 2. If urls origin is not same origin with lastURLs origin and requests origin is not same origin with lastURLs origin, then return true.
auto const* request_origin = m_origin.get_pointer<URL::Origin>(); auto const* request_origin = m_origin.get_pointer<URL::Origin>();
if (!DOMURL::url_origin(url).is_same_origin(DOMURL::url_origin(*last_url)) if (!url.origin().is_same_origin(last_url->origin())
&& (request_origin == nullptr || !request_origin->is_same_origin(DOMURL::url_origin(*last_url)))) { && (request_origin == nullptr || !request_origin->is_same_origin(last_url->origin()))) {
return true; return true;
} }
@ -328,7 +328,7 @@ void Request::add_origin_header()
case ReferrerPolicy::ReferrerPolicy::SameOrigin: case ReferrerPolicy::ReferrerPolicy::SameOrigin:
// If requests origin is not same origin with requests current URLs origin, then set serializedOrigin // If requests origin is not same origin with requests current URLs origin, then set serializedOrigin
// to `null`. // to `null`.
if (m_origin.has<URL::Origin>() && !m_origin.get<URL::Origin>().is_same_origin(DOMURL::url_origin(current_url()))) if (m_origin.has<URL::Origin>() && !m_origin.get<URL::Origin>().is_same_origin(current_url().origin()))
serialized_origin = MUST(ByteBuffer::copy("null"sv.bytes())); serialized_origin = MUST(ByteBuffer::copy("null"sv.bytes()));
break; break;
// -> Otherwise // -> Otherwise
@ -368,7 +368,7 @@ bool Request::cross_origin_embedder_policy_allows_credentials() const
if (request_origin == nullptr) if (request_origin == nullptr)
return false; return false;
return request_origin->is_same_origin(DOMURL::url_origin(current_url())) && !has_redirect_tainted_origin(); return request_origin->is_same_origin(current_url().origin()) && !has_redirect_tainted_origin();
} }
StringView request_destination_to_string(Request::Destination destination) StringView request_destination_to_string(Request::Destination destination)

View file

@ -309,7 +309,7 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<Request>> Request::construct_impl(JS::Realm
// - parsedReferrers scheme is "about" and path is the string "client" // - parsedReferrers scheme is "about" and path is the string "client"
// - parsedReferrers origin is not same origin with origin // - parsedReferrers origin is not same origin with origin
// then set requests referrer to "client". // then set requests referrer to "client".
auto parsed_referrer_origin = DOMURL::url_origin(parsed_referrer); auto parsed_referrer_origin = parsed_referrer.origin();
if ((parsed_referrer.scheme() == "about"sv && parsed_referrer.paths().size() == 1 && parsed_referrer.paths()[0] == "client"sv) if ((parsed_referrer.scheme() == "about"sv && parsed_referrer.paths().size() == 1 && parsed_referrer.paths()[0] == "client"sv)
|| !parsed_referrer_origin.is_same_origin(origin)) { || !parsed_referrer_origin.is_same_origin(origin)) {
request->set_referrer(Infrastructure::Request::Referrer::Client); request->set_referrer(Infrastructure::Request::Referrer::Client);

View file

@ -85,7 +85,7 @@ URL::Origin determine_the_origin(Optional<URL::URL> const& url, SandboxingFlagSe
return source_origin.release_value(); return source_origin.release_value();
// 5. Return url's origin. // 5. Return url's origin.
return DOMURL::url_origin(*url); return url->origin();
} }
// https://html.spec.whatwg.org/multipage/document-sequences.html#creating-a-new-auxiliary-browsing-context // https://html.spec.whatwg.org/multipage/document-sequences.html#creating-a-new-auxiliary-browsing-context

View file

@ -449,7 +449,7 @@ void EventSource::dispatch_the_event()
// the value of the event type buffer. // the value of the event type buffer.
MessageEventInit init {}; MessageEventInit init {};
init.data = JS::PrimitiveString::create(vm(), data_buffer); init.data = JS::PrimitiveString::create(vm(), data_buffer);
init.origin = MUST(String::from_byte_string(m_url.serialize_origin())); init.origin = MUST(String::from_byte_string(m_url.origin().serialize()));
init.last_event_id = last_event_id; init.last_event_id = last_event_id;
auto type = m_event_type.is_empty() ? HTML::EventNames::message : m_event_type; auto type = m_event_type.is_empty() ? HTML::EventNames::message : m_event_type;

View file

@ -50,7 +50,7 @@ String HTMLHyperlinkElementUtils::origin() const
return String {}; return String {};
// 3. Return the serialization of this element's url's origin. // 3. Return the serialization of this element's url's origin.
return MUST(String::from_byte_string(m_url->serialize_origin())); return MUST(String::from_byte_string(m_url->origin().serialize()));
} }
// https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-protocol // https://html.spec.whatwg.org/multipage/links.html#dom-hyperlink-protocol

View file

@ -139,7 +139,7 @@ WebIDL::ExceptionOr<String> Location::origin() const
return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string); return WebIDL::SecurityError::create(realm(), "Location's relevant document is not same origin-domain with the entry settings object's origin"_fly_string);
// 2. Return the serialization of this's url's origin. // 2. Return the serialization of this's url's origin.
return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().serialize_origin())); return TRY_OR_THROW_OOM(vm, String::from_byte_string(url().origin().serialize()));
} }
// https://html.spec.whatwg.org/multipage/history.html#dom-location-protocol // https://html.spec.whatwg.org/multipage/history.html#dom-location-protocol

View file

@ -1033,7 +1033,7 @@ WebIDL::ExceptionOr<void> Window::window_post_message_steps(JS::Value message, W
return WebIDL::SyntaxError::create(target_realm, MUST(String::formatted("Invalid URL for targetOrigin: '{}'", options.target_origin))); return WebIDL::SyntaxError::create(target_realm, MUST(String::formatted("Invalid URL for targetOrigin: '{}'", options.target_origin)));
// 3. Set targetOrigin to parsedURL's origin. // 3. Set targetOrigin to parsedURL's origin.
target_origin = DOMURL::url_origin(parsed_url); target_origin = parsed_url.origin();
} }
// 6. Let transfer be options["transfer"]. // 6. Let transfer be options["transfer"].

View file

@ -26,7 +26,7 @@ WebIDL::ExceptionOr<String> WorkerLocation::origin() const
{ {
auto& vm = realm().vm(); auto& vm = realm().vm();
// The origin getter steps are to return the serialization of this's WorkerGlobalScope object's url's origin. // The origin getter steps are to return the serialization of this's WorkerGlobalScope object's url's origin.
return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().serialize_origin())); return TRY_OR_THROW_OOM(vm, String::from_byte_string(m_global_scope->url().origin().serialize()));
} }
// https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-protocol // https://html.spec.whatwg.org/multipage/workers.html#dom-workerlocation-protocol

View file

@ -143,7 +143,7 @@ void Internals::spoof_current_url(String const& url_string)
VERIFY(url.is_valid()); VERIFY(url.is_valid());
auto origin = DOMURL::url_origin(url); auto origin = url.origin();
auto& window = internals_window(); auto& window = internals_window();
window.associated_document().set_url(url); window.associated_document().set_url(url);

View file

@ -82,7 +82,7 @@ ErrorOr<void> AutoplayAllowlist::enable_for_origins(ReadonlySpan<String> origins
continue; continue;
} }
TRY(allowlist.try_append(DOMURL::url_origin(url))); TRY(allowlist.try_append(url.origin()));
} }
return {}; return {};

View file

@ -146,7 +146,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
case ReferrerPolicy::StrictOriginWhenCrossOrigin: case ReferrerPolicy::StrictOriginWhenCrossOrigin:
// 1. If the origin of referrerURL and the origin of requests current URL are the same, then return // 1. If the origin of referrerURL and the origin of requests current URL are the same, then return
// referrerURL. // referrerURL.
if (referrer_url.has_value() && DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url()))) if (referrer_url.has_value() && referrer_url->origin().is_same_origin(request.current_url().origin()))
return referrer_url; return referrer_url;
// 2. If referrerURL is a potentially trustworthy URL and requests current URL is not a potentially // 2. If referrerURL is a potentially trustworthy URL and requests current URL is not a potentially
@ -164,7 +164,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
// 1. If the origin of referrerURL and the origin of requests current URL are the same, then return // 1. If the origin of referrerURL and the origin of requests current URL are the same, then return
// referrerURL. // referrerURL.
if (referrer_url.has_value() if (referrer_url.has_value()
&& DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url()))) { && referrer_url->origin().is_same_origin(request.current_url().origin())) {
return referrer_url; return referrer_url;
} }
@ -175,7 +175,7 @@ Optional<URL::URL> determine_requests_referrer(Fetch::Infrastructure::Request co
// 1. If the origin of referrerURL and the origin of requests current URL are the same, then return // 1. If the origin of referrerURL and the origin of requests current URL are the same, then return
// referrerURL. // referrerURL.
if (referrer_url.has_value() if (referrer_url.has_value()
&& DOMURL::url_origin(*referrer_url).is_same_origin(DOMURL::url_origin(request.current_url()))) { && referrer_url->origin().is_same_origin(request.current_url().origin())) {
return referrer_url; return referrer_url;
} }

View file

@ -80,7 +80,7 @@ Trustworthiness is_url_potentially_trustworthy(URL::URL const& url)
return Trustworthiness::PotentiallyTrustworthy; return Trustworthiness::PotentiallyTrustworthy;
// 3. Return the result of executing §3.1 Is origin potentially trustworthy? on urls origin. // 3. Return the result of executing §3.1 Is origin potentially trustworthy? on urls origin.
return is_origin_potentially_trustworthy(DOMURL::url_origin(url)); return is_origin_potentially_trustworthy(url.origin());
} }
} }

View file

@ -36,7 +36,7 @@ StorageKey obtain_a_storage_key_for_non_storage_purposes(HTML::Environment const
auto& mutable_settings = const_cast<HTML::EnvironmentSettingsObject&>(settings); auto& mutable_settings = const_cast<HTML::EnvironmentSettingsObject&>(settings);
return { mutable_settings.origin() }; return { mutable_settings.origin() };
} }
return { DOMURL::url_origin(environment.creation_url) }; return { environment.creation_url.origin() };
} }
} }