From d835a00bee393cb73eec233e5728fd883ba0f7d1 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sat, 7 Dec 2024 08:49:29 -0500 Subject: [PATCH] LibWeb: Use Fetch to retrieve HTMLObjectElement data URLs This eliminates the use of ResourceLoader in HTMLObjectElement. The spec steps around fetching have been slightly updated since we've last looked at this, so those are updated here. Regarding the text test change: we cannot rely on the data: URL being fetched synchronously. It will occur on a deferred task now. This does match the behavior of other browsers, as they also will not have run the fallback representation steps as of DOMContentLoaded. --- Libraries/LibWeb/HTML/HTMLObjectElement.cpp | 319 +++++++++++------- Libraries/LibWeb/HTML/HTMLObjectElement.h | 16 +- ...th-unsupported-type-in-data-attribute.html | 17 +- 3 files changed, 216 insertions(+), 136 deletions(-) diff --git a/Libraries/LibWeb/HTML/HTMLObjectElement.cpp b/Libraries/LibWeb/HTML/HTMLObjectElement.cpp index 64681cf0a85..708ac29a620 100644 --- a/Libraries/LibWeb/HTML/HTMLObjectElement.cpp +++ b/Libraries/LibWeb/HTML/HTMLObjectElement.cpp @@ -11,8 +11,12 @@ #include #include #include +#include #include #include +#include +#include +#include #include #include #include @@ -39,8 +43,8 @@ HTMLObjectElement::HTMLObjectElement(DOM::Document& document, DOM::QualifiedName // https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-object-element // Whenever one of the following conditions occur: // - the element is created, - // ...the user agent must queue an element task on the DOM manipulation task source given - // the object element to run the following steps to (re)determine what the object element represents. + // ...the user agent must queue an element task on the DOM manipulation task source given the object element to run + // the following steps to (re)determine what the object element represents. queue_element_task_to_run_object_representation_steps(); } @@ -69,10 +73,8 @@ void HTMLObjectElement::form_associated_element_attribute_changed(FlyString cons (!has_attribute(HTML::AttributeNames::classid) && name == HTML::AttributeNames::data) || // - neither the element's classid attribute nor its data attribute are present, and its type attribute is set, changed, or removed, (!has_attribute(HTML::AttributeNames::classid) && !has_attribute(HTML::AttributeNames::data) && name == HTML::AttributeNames::type)) { - - // ...the user agent must queue an element task on the DOM manipulation task source given - // the object element to run the following steps to (re)determine what the object element represents. - // This task being queued or actively running must delay the load event of the element's node document. + // ...the user agent must queue an element task on the DOM manipulation task source given the object element to run + // the following steps to (re)determine what the object element represents. queue_element_task_to_run_object_representation_steps(); } } @@ -145,7 +147,7 @@ GC::Ptr HTMLObjectElement::create_layout_node(CSS::StyleProperties switch (m_representation) { case Representation::Children: return NavigableContainer::create_layout_node(move(style)); - case Representation::NestedBrowsingContext: + case Representation::ContentNavigable: return heap().allocate(document(), *this, move(style)); case Representation::Image: if (image_data()) @@ -184,152 +186,213 @@ bool HTMLObjectElement::has_ancestor_media_element_or_object_element_not_showing // https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-object-element:queue-an-element-task void HTMLObjectElement::queue_element_task_to_run_object_representation_steps() { - queue_an_element_task(HTML::Task::Source::DOMManipulation, [&]() { - // FIXME: 1. If the user has indicated a preference that this object element's fallback content be shown instead of the element's usual behavior, then jump to the step below labeled fallback. + // This task being queued or actively running must delay the load event of the element's node document. + m_document_load_event_delayer_for_object_representation_task.emplace(document()); - // 2. If the element has an ancestor media element, or has an ancestor object element that is not showing its fallback content, or if the element is not in a document whose browsing context is non-null, or if the element's node document is not fully active, or if the element is still in the stack of open elements of an HTML parser or XML parser, or if the element is not being rendered, then jump to the step below labeled fallback. - if (!document().browsing_context() || !document().is_fully_active()) - return run_object_representation_fallback_steps(); - if (has_ancestor_media_element_or_object_element_not_showing_fallback_content()) - return run_object_representation_fallback_steps(); + queue_an_element_task(HTML::Task::Source::DOMManipulation, [this]() { + ScopeGuard guard { [&]() { m_document_load_event_delayer_for_object_representation_task.clear(); } }; - // FIXME: 3. If the classid attribute is present, and has a value that isn't the empty string, then: if the user agent can find a plugin suitable according to the value of the classid attribute, and plugins aren't being sandboxed, then that plugin should be used, and the value of the data attribute, if any, should be passed to the plugin. If no suitable plugin can be found, or if the plugin reports an error, jump to the step below labeled fallback. + auto& realm = this->realm(); + auto& vm = realm.vm(); - // 4. If the data attribute is present and its value is not the empty string, then: - if (auto maybe_data = get_attribute(HTML::AttributeNames::data); maybe_data.has_value() && !maybe_data->is_empty()) { - // 1. If the type attribute is present and its value is not a type that the user agent supports, and is not a type that the user agent can find a plugin for, then the user agent may jump to the step below labeled fallback without fetching the content to examine its real type. + // FIXME: 1. If the user has indicated a preference that this object element's fallback content be shown instead of the + // element's usual behavior, then jump to the step below labeled fallback. - // 2. Parse a URL given the data attribute, relative to the element's node document. - auto url = document().parse_url(*maybe_data); - - // 3. If that failed, fire an event named error at the element, then jump to the step below labeled fallback. - if (!url.is_valid()) { - dispatch_event(DOM::Event::create(realm(), HTML::EventNames::error)); - return run_object_representation_fallback_steps(); - } - - // 4. Let request be a new request whose URL is the resulting URL record, client is the element's node document's relevant settings object, destination is "object", credentials mode is "include", mode is "navigate", and whose use-URL-credentials flag is set. - auto request = LoadRequest::create_for_url_on_page(url, &document().page()); - - // 5. Fetch request, with processResponseEndOfBody given response res set to finalize and report timing with res, the element's node document's relevant global object, and "object". - // Fetching the resource must delay the load event of the element's node document until the task that is queued by the networking task source once the resource has been fetched (defined next) has been run. - set_resource(ResourceLoader::the().load_resource(Resource::Type::Generic, request)); - m_document_load_event_delayer_for_resource_load.emplace(document()); - - // 6. If the resource is not yet available (e.g. because the resource was not available in the cache, so that loading the resource required making a request over the network), then jump to the step below labeled fallback. The task that is queued by the networking task source once the resource is available must restart this algorithm from this step. Resources can load incrementally; user agents may opt to consider a resource "available" whenever enough data has been obtained to begin processing the resource. - - // NOTE: The request is always asynchronous, even if it is cached or succeeded/failed immediately. Allow the callbacks below to invoke - // the fallback steps. This prevents the fallback layout from flashing very briefly between here and the resource loading. + // 2. If the element has an ancestor media element, or has an ancestor object element that is not showing its + // fallback content, or if the element is not in a document whose browsing context is non-null, or if the + // element's node document is not fully active, or if the element is still in the stack of open elements of + // an HTML parser or XML parser, or if the element is not being rendered, then jump to the step below labeled + // fallback. + // FIXME: Handle the element being in the stack of open elements. + // FIXME: Handle the element not being rendered. + if (!document().browsing_context() || !document().is_fully_active()) { + run_object_representation_fallback_steps(); + return; + } + if (has_ancestor_media_element_or_object_element_not_showing_fallback_content()) { + run_object_representation_fallback_steps(); return; } - // 5. If the data attribute is absent but the type attribute is present, and the user agent can find a plugin suitable according to the value of the type attribute, and plugins aren't being sandboxed, then that plugin should be used. If these conditions cannot be met, or if the plugin reports an error, jump to the step below labeled fallback. Otherwise return; once the plugin is completely loaded, queue an element task on the DOM manipulation task source given the object element to fire an event named load at the element. - run_object_representation_fallback_steps(); + // 3. If the data attribute is present and its value is not the empty string, then: + if (auto data = get_attribute(HTML::AttributeNames::data); data.has_value() && !data->is_empty()) { + // 1. If the type attribute is present and its value is not a type that the user agent supports, then the user + // agent may jump to the step below labeled fallback without fetching the content to examine its real type. + + // 2. Let url be the result of encoding-parsing a URL given the data attribute's value, relative to the element's node document. + auto url = document().encoding_parse_url(*data); + + // 3. If url is failure, then fire an event named error at the element and jump to the step below labeled fallback. + if (!url.is_valid()) { + dispatch_event(DOM::Event::create(realm, HTML::EventNames::error)); + run_object_representation_fallback_steps(); + return; + } + + // 4. Let request be a new request whose URL is url, client is the element's node document's relevant settings + // object, destination is "object", credentials mode is "include", mode is "navigate", initiator type is + // "object", and whose use-URL-credentials flag is set. + auto request = Fetch::Infrastructure::Request::create(vm); + request->set_url(move(url)); + request->set_client(&document().relevant_settings_object()); + request->set_destination(Fetch::Infrastructure::Request::Destination::Object); + request->set_credentials_mode(Fetch::Infrastructure::Request::CredentialsMode::Include); + request->set_mode(Fetch::Infrastructure::Request::Mode::Navigate); + request->set_initiator_type(Fetch::Infrastructure::Request::InitiatorType::Object); + request->set_use_url_credentials(true); + + Fetch::Infrastructure::FetchAlgorithms::Input fetch_algorithms_input {}; + fetch_algorithms_input.process_response = [this](GC::Ref response) { + auto& realm = this->realm(); + auto& global = document().realm().global_object(); + + if (response->is_network_error()) { + resource_did_fail(); + return; + } + + if (response->type() == Fetch::Infrastructure::Response::Type::Opaque || response->type() == Fetch::Infrastructure::Response::Type::OpaqueRedirect) { + auto& filtered_response = static_cast(*response); + response = filtered_response.internal_response(); + } + + auto on_data_read = GC::create_function(realm.heap(), [this, response](ByteBuffer data) { + resource_did_load(response, data); + }); + auto on_error = GC::create_function(realm.heap(), [this](JS::Value) { + resource_did_fail(); + }); + + VERIFY(response->body()); + response->body()->fully_read(realm, on_data_read, on_error, GC::Ref { global }); + }; + + // 5. Fetch request. + auto result = Fetch::Fetching::fetch(realm, request, Fetch::Infrastructure::FetchAlgorithms::create(vm, move(fetch_algorithms_input))); + if (result.is_error()) { + resource_did_fail(); + return; + } + + // Fetching the resource must delay the load event of the element's node document until the task that is + // queued by the networking task source once the resource has been fetched (defined next) has been run. + m_document_load_event_delayer_for_resource_load.emplace(document()); + + // 6. If the resource is not yet available (e.g. because the resource was not available in the cache, so that + // loading the resource required making a request over the network), then jump to the step below labeled + // fallback. The task that is queued by the networking task source once the resource is available must + // restart this algorithm from this step. Resources can load incrementally; user agents may opt to consider + // a resource "available" whenever enough data has been obtained to begin processing the resource. + + // NOTE: The request is always asynchronous, even if it is cached or succeeded/failed immediately. Allow the + // response callback to invoke the fallback steps. This prevents the fallback layout from flashing very + // briefly between here and the resource loading. + } }); } // https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-object-element:concept-event-fire-2 void HTMLObjectElement::resource_did_fail() { - // 4.7. If the load failed (e.g. there was an HTTP 404 error, there was a DNS error), fire an event named error at the element, then jump to the step below labeled fallback. + ScopeGuard guard { [&]() { m_document_load_event_delayer_for_resource_load.clear(); } }; + + // 3.7. If the load failed (e.g. there was an HTTP 404 error, there was a DNS error), fire an event named error at + // the element, then jump to the step below labeled fallback. dispatch_event(DOM::Event::create(realm(), HTML::EventNames::error)); run_object_representation_fallback_steps(); - m_document_load_event_delayer_for_resource_load.clear(); } // https://html.spec.whatwg.org/multipage/iframe-embed-object.html#object-type-detection -void HTMLObjectElement::resource_did_load() +void HTMLObjectElement::resource_did_load(Fetch::Infrastructure::Response const& response, ReadonlyBytes data) { - ScopeGuard load_event_delayer_guard = [&] { - m_document_load_event_delayer_for_resource_load.clear(); - }; + ScopeGuard guard { [&]() { m_document_load_event_delayer_for_resource_load.clear(); } }; - // 4.8. Determine the resource type, as follows: + // 3.8. Determine the resource type, as follows: // 1. Let the resource type be unknown. - Optional resource_type; + Optional resource_type; - // FIXME: 2. If the user agent is configured to strictly obey Content-Type headers for this resource, and the resource has associated Content-Type metadata, then let the resource type be the type specified in the resource's Content-Type metadata, and jump to the step below labeled handler. - // FIXME: 3. If there is a type attribute present on the object element, and that attribute's value is not a type that the user agent supports, but it is a type that a plugin supports, then let the resource type be the type specified in that type attribute, and jump to the step below labeled handler. - - // 4. Run the appropriate set of steps from the following list: - // * If the resource has associated Content-Type metadata - if (auto maybe_content_type = resource()->response_headers().get("Content-Type"sv); maybe_content_type.has_value()) { - auto& content_type = maybe_content_type.value(); + // FIXME: 3. If the user agent is configured to strictly obey Content-Type headers for this resource, and the resource has + // associated Content-Type metadata, then let the resource type be the type specified in the resource's Content-Type + // metadata, and jump to the step below labeled handler. + // 3. Run the appropriate set of steps from the following list: + // -> If the resource has associated Content-Type metadata + if (auto content_type = response.header_list()->extract_mime_type(); content_type.has_value()) { // 1. Let binary be false. bool binary = false; - // 2. If the type specified in the resource's Content-Type metadata is "text/plain", and the result of applying the rules for distinguishing if a resource is text or binary to the resource is that the resource is not text/plain, then set binary to true. - if (content_type == "text/plain"sv) { - auto supplied_type = MimeSniff::MimeType::parse(content_type); - auto computed_type = MimeSniff::Resource::sniff(resource()->encoded_data(), MimeSniff::SniffingConfiguration { - .sniffing_context = MimeSniff::SniffingContext::TextOrBinary, - .supplied_type = move(supplied_type), - }); + // 2. If the type specified in the resource's Content-Type metadata is "text/plain", and the result of applying + // the rules for distinguishing if a resource is text or binary to the resource is that the resource is not + // text/plain, then set binary to true. + if (content_type->essence() == "text/plain"sv) { + auto computed_type = MimeSniff::Resource::sniff( + data, + MimeSniff::SniffingConfiguration { + .sniffing_context = MimeSniff::SniffingContext::TextOrBinary, + .supplied_type = content_type, + }); + if (computed_type.essence() != "text/plain"sv) binary = true; } // 3. If the type specified in the resource's Content-Type metadata is "application/octet-stream", then set binary to true. - if (content_type == "application/octet-stream"sv) + else if (content_type->essence() == "application/octet-stream"sv) { binary = true; + } - // 4. If binary is false, then let the resource type be the type specified in the resource's Content-Type metadata, and jump to the step below labeled handler. - if (!binary) - return run_object_representation_handler_steps(content_type); + // 4. If binary is false, then let the resource type be the type specified in the resource's Content-Type metadata, + // and jump to the step below labeled handler. + if (!binary) { + resource_type = move(content_type); + } - // 5. If there is a type attribute present on the object element, and its value is not application/octet-stream, then run the following steps: - if (auto type = this->type(); !type.is_empty() && (type != "application/octet-stream"sv)) { - // 1. If the attribute's value is a type that a plugin supports, or the attribute's value is a type that starts with "image/" that is not also an XML MIME type, then let the resource type be the type specified in that type attribute. - // FIXME: This only partially implements this step. - if (type.starts_with_bytes("image/"sv)) - resource_type = move(type); + // 5. If there is a type attribute present on the object element, and its value is not application/octet-stream, + // then run the following steps: + else if (auto type = this->type(); !type.is_empty() && (type != "application/octet-stream"sv)) { + // 1. If the attribute's value is a type that starts with "image/" that is not also an XML MIME type, then + // let the resource type be the type specified in that type attribute. + if (type.starts_with_bytes("image/"sv)) { + auto parsed_type = MimeSniff::MimeType::parse(type); + + if (parsed_type.has_value() && !parsed_type->is_xml()) + resource_type = move(parsed_type); + } // 2. Jump to the step below labeled handler. } } - // * Otherwise, if the resource does not have associated Content-Type metadata + // -> Otherwise, if the resource does not have associated Content-Type metadata else { - Optional tentative_type; + Optional tentative_type; // 1. If there is a type attribute present on the object element, then let the tentative type be the type specified in that type attribute. // Otherwise, let tentative type be the computed type of the resource. if (auto type = this->type(); !type.is_empty()) - tentative_type = move(type); + tentative_type = MimeSniff::MimeType::parse(type); + else + tentative_type = MimeSniff::Resource::sniff(data); - // FIXME: For now, ignore application/ MIME types as we cannot render yet them anyways. We will need to implement the MIME type sniffing - // algorithm in order to map all unknown MIME types to "application/octet-stream". - else if (auto type = resource()->mime_type(); !type.starts_with("application/"sv)) - tentative_type = MUST(String::from_byte_string(type)); - - // 2. If tentative type is not application/octet-stream, then let resource type be tentative type and jump to the step below labeled handler. - if (tentative_type.has_value() && tentative_type != "application/octet-stream"sv) + // 2. If tentative type is not application/octet-stream, then let resource type be tentative type and jump to the + // step below labeled handler. + if (tentative_type.has_value() && tentative_type->essence() != "application/octet-stream"sv) resource_type = move(tentative_type); } - // FIXME: 5. If applying the URL parser algorithm to the URL of the specified resource (after any redirects) results in a URL record whose path component matches a pattern that a plugin supports, then let resource type be the type that that plugin can handle. - run_object_representation_handler_steps(resource_type.has_value() ? resource_type->to_byte_string() : ByteString::empty()); + if (resource_type.has_value()) + run_object_representation_handler_steps(response, *resource_type, data); + else + run_object_representation_fallback_steps(); } // https://html.spec.whatwg.org/multipage/iframe-embed-object.html#the-object-element:plugin-11 -void HTMLObjectElement::run_object_representation_handler_steps(Optional resource_type) +void HTMLObjectElement::run_object_representation_handler_steps(Fetch::Infrastructure::Response const& response, MimeSniff::MimeType const& resource_type, ReadonlyBytes data) { - // 4.9. Handler: Handle the content as given by the first of the following cases that matches: + // 3.9. Handler: Handle the content as given by the first of the following cases that matches: - // * FIXME: If the resource type is not a type that the user agent supports, but it is a type that a plugin supports - // If the object element's nested browsing context is non-null, then it must be discarded and then set to null. - // If plugins are being sandboxed, then jump to the step below labeled fallback. - // Otherwise, the user agent should use the plugin that supports resource type and pass the content of the resource to that plugin. If the plugin reports an error, then jump to the step below labeled fallback. - - if (!resource_type.has_value()) { - run_object_representation_fallback_steps(); - return; - } - auto mime_type = MimeSniff::MimeType::parse(*resource_type); - - // * If the resource type is an XML MIME type, or if the resource type does not start with "image/" - if (mime_type.has_value() && can_load_document_with_type(*mime_type) && (mime_type->is_xml() || !mime_type->is_image())) { + // -> If the resource type is an XML MIME type, or if the resource type does not start with "image/" + if (can_load_document_with_type(resource_type) && (resource_type.is_xml() || !resource_type.is_image())) { // If the object element's content navigable is null, then create a new child navigable for the element. if (!m_content_navigable && in_a_document_tree()) { MUST(create_new_child_navigable()); @@ -340,32 +403,41 @@ void HTMLObjectElement::run_object_representation_handler_steps(Optionalurl(); url != "about:blank"sv) - MUST(m_content_navigable->navigate({ .url = url, - .source_document = document(), - .history_handling = Bindings::NavigationHistoryBehavior::Replace })); + // Let response be the response from fetch. - // The object element represents its nested browsing context. - run_object_representation_completed_steps(Representation::NestedBrowsingContext); + // If response's URL does not match about:blank, then navigate the element's content navigable to response's URL + // using the element's node document, with historyHandling set to "replace". + if (response.url().has_value() && !url_matches_about_blank(*response.url())) { + MUST(m_content_navigable->navigate({ + .url = *response.url(), + .source_document = document(), + .history_handling = Bindings::NavigationHistoryBehavior::Replace, + })); + } + + // The object element represents its content navigable. + run_object_representation_completed_steps(Representation::ContentNavigable); } - // * If the resource type starts with "image/", and support for images has not been disabled + // -> If the resource type starts with "image/", and support for images has not been disabled // FIXME: Handle disabling image support. - else if (resource_type.has_value() && resource_type->starts_with("image/"sv)) { - // Destroy the child navigable of the object element. + else if (resource_type.is_image()) { + // Destroy a child navigable given the object element. destroy_the_child_navigable(); // Apply the image sniffing rules to determine the type of the image. // The object element represents the specified image. - // If the image cannot be rendered, e.g. because it is malformed or in an unsupported format, jump to the step below labeled fallback. - if (!resource()->has_encoded_data()) - return run_object_representation_fallback_steps(); + // If the image cannot be rendered, e.g. because it is malformed or in an unsupported format, jump to the step + // below labeled fallback. + if (data.is_empty()) { + run_object_representation_fallback_steps(); + return; + } load_image(); } - // * Otherwise + // -> Otherwise else { // The given resource type is not supported. Jump to the step below labeled fallback. run_object_representation_fallback_steps(); @@ -375,9 +447,12 @@ void HTMLObjectElement::run_object_representation_handler_steps(Optional([](auto& object) { object.queue_element_task_to_run_object_representation_steps(); return IterationDecision::Continue; diff --git a/Libraries/LibWeb/HTML/HTMLObjectElement.h b/Libraries/LibWeb/HTML/HTMLObjectElement.h index 960ac1f6bab..4617ef7a4d5 100644 --- a/Libraries/LibWeb/HTML/HTMLObjectElement.h +++ b/Libraries/LibWeb/HTML/HTMLObjectElement.h @@ -12,14 +12,12 @@ #include #include #include -#include namespace Web::HTML { class HTMLObjectElement final : public NavigableContainer , public FormAssociatedElement - , public ResourceClient , public Layout::ImageProvider { WEB_PLATFORM_OBJECT(HTMLObjectElement, NavigableContainer) GC_DECLARE_ALLOCATOR(HTMLObjectElement); @@ -28,7 +26,7 @@ class HTMLObjectElement final enum class Representation { Unknown, Image, - NestedBrowsingContext, + ContentNavigable, Children, }; @@ -64,16 +62,15 @@ private: bool has_ancestor_media_element_or_object_element_not_showing_fallback_content() const; void queue_element_task_to_run_object_representation_steps(); - void run_object_representation_handler_steps(Optional resource_type); + void run_object_representation_handler_steps(Fetch::Infrastructure::Response const&, MimeSniff::MimeType const&, ReadonlyBytes); void run_object_representation_completed_steps(Representation); void run_object_representation_fallback_steps(); void load_image(); void update_layout_and_child_objects(Representation); - // ^ResourceClient - virtual void resource_did_load() override; - virtual void resource_did_fail() override; + void resource_did_load(Fetch::Infrastructure::Response const&, ReadonlyBytes); + void resource_did_fail(); // ^DOM::Element virtual i32 default_tab_index_value() const override; @@ -87,13 +84,12 @@ private: virtual void set_visible_in_viewport(bool) override; virtual GC::Ref to_html_element() const override { return *this; } - Representation m_representation { Representation::Unknown }; - GC::Ptr image_data() const; + Representation m_representation { Representation::Unknown }; + GC::Ptr m_resource_request; -public: Optional m_document_load_event_delayer_for_object_representation_task; Optional m_document_load_event_delayer_for_resource_load; }; diff --git a/Tests/LibWeb/Text/input/object-with-unsupported-type-in-data-attribute.html b/Tests/LibWeb/Text/input/object-with-unsupported-type-in-data-attribute.html index aeffa145300..c1e29db00a9 100644 --- a/Tests/LibWeb/Text/input/object-with-unsupported-type-in-data-attribute.html +++ b/Tests/LibWeb/Text/input/object-with-unsupported-type-in-data-attribute.html @@ -1,10 +1,19 @@ -
Fallback
+
Fallback