LibWeb: Support unbuffered fetch requests

Supporting unbuffered fetches is actually part of the fetch spec in its HTTP-network-fetch algorithm. We had previously implemented this method in a very ad-hoc manner as a simple wrapper around ResourceLoader. This is still the case, but we now implement a good amount of these steps according to spec, using ResourceLoader's unbuffered API. The response data is forwarded through to the fetch response using streams. This will eventually let us remove the use of ResourceLoader's buffered API, as all responses should just be streamed this way. The streams spec then supplies ways to wait for completion, thus allowing fully buffered responses. However, we have more work to do to make the other parts of our fetch implementation (namely, Body::fully_read) use streams before we can do this.
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/6056428cb5 Pull-request: https://github.com/SerenityOS/serenity/pull/24452 Issue: https://github.com/SerenityOS/serenity/issues/23847
2025-04-21 03:55:24 +00:00 · 2024-05-26 08:03:29 -04:00 · 2024-05-26 08:03:29 -04:00 · 6056428cb5 · 2024-07-18 05:01:22 +09:00
commit 6056428cb5
parent 1e97ae66e5
8 changed files with 266 additions and 8 deletions
--- a/Meta/gn/secondary/Userland/Libraries/LibWeb/Fetch/Fetching/BUILD.gn
+++ b/Meta/gn/secondary/Userland/Libraries/LibWeb/Fetch/Fetching/BUILD.gn
@ -3,6 +3,7 @@ source_set("Fetching") {
  deps = [ "//Userland/Libraries/LibWeb:all_generated" ]
  sources = [
    "Checks.cpp",
+    "FetchedDataReceiver.cpp",
    "Fetching.cpp",
    "PendingResponse.cpp",
    "RefCountedFlag.cpp",
--- a/Userland/Libraries/LibWeb/CMakeLists.txt
+++ b/Userland/Libraries/LibWeb/CMakeLists.txt
@ -204,6 +204,7 @@ set(SOURCES
    Fetch/BodyInit.cpp
    Fetch/Enums.cpp
    Fetch/Fetching/Checks.cpp
+    Fetch/Fetching/FetchedDataReceiver.cpp
    Fetch/Fetching/Fetching.cpp
    Fetch/Fetching/PendingResponse.cpp
    Fetch/Fetching/RefCountedFlag.cpp
--- a/Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.cpp
+++ b/Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.cpp
@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include <LibJS/Heap/HeapFunction.h>
+#include <LibWeb/Bindings/ExceptionOrUtils.h>
+#include <LibWeb/Bindings/HostDefined.h>
+#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
+#include <LibWeb/Fetch/Infrastructure/FetchParams.h>
+#include <LibWeb/Fetch/Infrastructure/Task.h>
+#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
+#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
+#include <LibWeb/Streams/AbstractOperations.h>
+#include <LibWeb/WebIDL/Promise.h>
+
+namespace Web::Fetch::Fetching {
+
+JS_DEFINE_ALLOCATOR(FetchedDataReceiver);
+
+FetchedDataReceiver::FetchedDataReceiver(JS::NonnullGCPtr<Infrastructure::FetchParams const> fetch_params, JS::NonnullGCPtr<Streams::ReadableStream> stream)
+    : m_fetch_params(fetch_params)
+    , m_stream(stream)
+{
+}
+
+FetchedDataReceiver::~FetchedDataReceiver() = default;
+
+void FetchedDataReceiver::visit_edges(Visitor& visitor)
+{
+    Base::visit_edges(visitor);
+    visitor.visit(m_fetch_params);
+    visitor.visit(m_stream);
+    visitor.visit(m_pending_promise);
+}
+
+void FetchedDataReceiver::set_pending_promise(JS::NonnullGCPtr<WebIDL::Promise> promise)
+{
+    auto had_pending_promise = m_pending_promise != nullptr;
+    m_pending_promise = promise;
+
+    if (!had_pending_promise && !m_buffer.is_empty()) {
+        on_data_received(m_buffer);
+        m_buffer.clear();
+    }
+}
+
+// This implements the parallel steps of the pullAlgorithm in HTTP-network-fetch.
+// https://fetch.spec.whatwg.org/#ref-for-in-parallel④
+void FetchedDataReceiver::on_data_received(ReadonlyBytes bytes)
+{
+    // FIXME: 1. If the size of buffer is smaller than a lower limit chosen by the user agent and the ongoing fetch
+    //           is suspended, resume the fetch.
+    // FIXME: 2. Wait until buffer is not empty.
+
+    // If the remote end sends data immediately after we receive headers, we will often get that data here before the
+    // stream tasks have all been queued internally. Just hold onto that data.
+    if (!m_pending_promise) {
+        m_buffer.append(bytes);
+        return;
+    }
+
+    // 3. Queue a fetch task to run the following steps, with fetchParams’s task destination.
+    Infrastructure::queue_fetch_task(
+        m_fetch_params->controller(),
+        m_fetch_params->task_destination().get<JS::NonnullGCPtr<JS::Object>>(),
+        JS::create_heap_function(heap(), [this, bytes = MUST(ByteBuffer::copy(bytes))]() mutable {
+            HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(m_stream->realm()), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
+
+            // 1. Pull from bytes buffer into stream.
+            if (auto result = Streams::readable_stream_pull_from_bytes(m_stream, move(bytes)); result.is_error()) {
+                auto throw_completion = Bindings::dom_exception_to_throw_completion(m_stream->vm(), result.release_error());
+
+                dbgln("FetchedDataReceiver: Stream error pulling bytes");
+                HTML::report_exception(throw_completion, m_stream->realm());
+
+                return;
+            }
+
+            // 2. If stream is errored, then terminate fetchParams’s controller.
+            if (m_stream->is_errored())
+                m_fetch_params->controller()->terminate();
+
+            // 3. Resolve promise with undefined.
+            WebIDL::resolve_promise(m_stream->realm(), *m_pending_promise, JS::js_undefined());
+        }));
+}
+
+}
--- a/Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.h
+++ b/Userland/Libraries/LibWeb/Fetch/Fetching/FetchedDataReceiver.h
@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/ByteBuffer.h>
+#include <LibJS/Heap/Cell.h>
+#include <LibJS/Heap/CellAllocator.h>
+#include <LibWeb/Forward.h>
+
+namespace Web::Fetch::Fetching {
+
+class FetchedDataReceiver final : public JS::Cell {
+    JS_CELL(FetchedDataReceiver, JS::Cell);
+    JS_DECLARE_ALLOCATOR(FetchedDataReceiver);
+
+public:
+    virtual ~FetchedDataReceiver() override;
+
+    void set_pending_promise(JS::NonnullGCPtr<WebIDL::Promise>);
+    void on_data_received(ReadonlyBytes);
+
+private:
+    FetchedDataReceiver(JS::NonnullGCPtr<Infrastructure::FetchParams const>, JS::NonnullGCPtr<Streams::ReadableStream>);
+
+    virtual void visit_edges(Visitor& visitor) override;
+
+    JS::NonnullGCPtr<Infrastructure::FetchParams const> m_fetch_params;
+    JS::NonnullGCPtr<Streams::ReadableStream> m_stream;
+    JS::GCPtr<WebIDL::Promise> m_pending_promise;
+    ByteBuffer m_buffer;
+};
+
+}
--- a/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp
+++ b/Userland/Libraries/LibWeb/Fetch/Fetching/Fetching.cpp
@ -17,6 +17,7 @@
 #include <LibWeb/DOMURL/DOMURL.h>
 #include <LibWeb/Fetch/BodyInit.h>
 #include <LibWeb/Fetch/Fetching/Checks.h>
+#include <LibWeb/Fetch/Fetching/FetchedDataReceiver.h>
 #include <LibWeb/Fetch/Fetching/Fetching.h>
 #include <LibWeb/Fetch/Fetching/PendingResponse.h>
 #include <LibWeb/Fetch/Fetching/RefCountedFlag.h>
@ -1962,8 +1963,10 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
    load_request.set_url(request->current_url());
    load_request.set_page(page);
    load_request.set_method(ByteString::copy(request->method()));
+
    for (auto const& header : *request->header_list())
        load_request.set_header(ByteString::copy(header.name), ByteString::copy(header.value));
+
    if (auto const* body = request->body().get_pointer<JS::NonnullGCPtr<Infrastructure::Body>>()) {
        TRY((*body)->source().visit(
            [&](ByteBuffer const& byte_buffer) -> WebIDL::ExceptionOr<void> {
@ -1981,13 +1984,121 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load

    auto pending_response = PendingResponse::create(vm, request);

-    dbgln_if(WEB_FETCH_DEBUG, "Fetch: Invoking ResourceLoader");
-    if constexpr (WEB_FETCH_DEBUG)
+    if constexpr (WEB_FETCH_DEBUG) {
+        dbgln("Fetch: Invoking ResourceLoader");
        log_load_request(load_request);
+    }

-    ResourceLoader::the().load(
-        load_request,
-        [&realm, &vm, request, pending_response](auto data, auto& response_headers, auto status_code) {
+    // FIXME: This check should be removed and all HTTP requests should go through the `ResourceLoader::load_unbuffered`
+    //        path. The buffer option should then be supplied to the steps below that allow us to buffer data up to a
+    //        user-agent-defined limit (or not). However, we will need to fully use stream operations throughout the
+    //        fetch process to enable this (e.g. Body::fully_read must use streams for this to work).
+    if (request->buffer_policy() == Infrastructure::Request::BufferPolicy::DoNotBufferResponse) {
+        HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(realm), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
+
+        // 12. Let stream be a new ReadableStream.
+        auto stream = realm.heap().allocate<Streams::ReadableStream>(realm, realm);
+        auto fetched_data_receiver = realm.heap().allocate<FetchedDataReceiver>(realm, fetch_params, stream);
+
+        // 10. Let pullAlgorithm be the followings steps:
+        auto pull_algorithm = JS::create_heap_function(realm.heap(), [&realm, fetched_data_receiver]() {
+            // 1. Let promise be a new promise.
+            auto promise = WebIDL::create_promise(realm);
+
+            // 2. Run the following steps in parallel:
+            // NOTE: This is handled by FetchedDataReceiver.
+            fetched_data_receiver->set_pending_promise(promise);
+
+            // 3. Return promise.
+            return promise;
+        });
+
+        // 11. Let cancelAlgorithm be an algorithm that aborts fetchParams’s controller with reason, given reason.
+        auto cancel_algorithm = JS::create_heap_function(realm.heap(), [&realm, &fetch_params](JS::Value reason) {
+            fetch_params.controller()->abort(realm, reason);
+            return WebIDL::create_resolved_promise(realm, JS::js_undefined());
+        });
+
+        // 13. Set up stream with byte reading support with pullAlgorithm set to pullAlgorithm, cancelAlgorithm set to cancelAlgorithm.
+        Streams::set_up_readable_stream_controller_with_byte_reading_support(stream, pull_algorithm, cancel_algorithm);
+
+        auto on_headers_received = [&vm, request, pending_response, stream](auto const& response_headers, Optional<u32> status_code) {
+            if (pending_response->is_resolved()) {
+                // RequestServer will send us the response headers twice, the second time being for HTTP trailers. This
+                // fetch algorithm is not interested in trailers, so just drop them here.
+                return;
+            }
+
+            auto response = Infrastructure::Response::create(vm);
+            response->set_status(status_code.value_or(200));
+            // FIXME: Set response status message
+
+            if constexpr (WEB_FETCH_DEBUG) {
+                dbgln("Fetch: ResourceLoader load for '{}' {}: (status {})",
+                    request->url(),
+                    Infrastructure::is_ok_status(response->status()) ? "complete"sv : "failed"sv,
+                    response->status());
+                log_response(status_code, response_headers, ReadonlyBytes {});
+            }
+
+            for (auto const& [name, value] : response_headers) {
+                auto header = Infrastructure::Header::from_string_pair(name, value);
+                response->header_list()->append(move(header));
+            }
+
+            // 14. Set response’s body to a new body whose stream is stream.
+            response->set_body(Infrastructure::Body::create(vm, stream));
+
+            // 17. Return response.
+            // NOTE: Typically response’s body’s stream is still being enqueued to after returning.
+            pending_response->resolve(response);
+        };
+
+        // 16. Run these steps in parallel:
+        //    FIXME: 1. Run these steps, but abort when fetchParams is canceled:
+        auto on_data_received = [fetched_data_receiver](auto bytes) {
+            // 1. If one or more bytes have been transmitted from response’s message body, then:
+            if (!bytes.is_empty()) {
+                // 1. Let bytes be the transmitted bytes.
+
+                // FIXME: 2. Let codings be the result of extracting header list values given `Content-Encoding` and response’s header list.
+                // FIXME: 3. Increase response’s body info’s encoded size by bytes’s length.
+                // FIXME: 4. Set bytes to the result of handling content codings given codings and bytes.
+                // FIXME: 5. Increase response’s body info’s decoded size by bytes’s length.
+                // FIXME: 6. If bytes is failure, then terminate fetchParams’s controller.
+
+                // 7. Append bytes to buffer.
+                fetched_data_receiver->on_data_received(bytes);
+
+                // FIXME: 8. If the size of buffer is larger than an upper limit chosen by the user agent, ask the user agent
+                //           to suspend the ongoing fetch.
+            }
+        };
+
+        auto on_complete = [&vm, &realm, pending_response, stream](auto success, auto error_message) {
+            HTML::TemporaryExecutionContext execution_context { Bindings::host_defined_environment_settings_object(realm), HTML::TemporaryExecutionContext::CallbacksEnabled::Yes };
+
+            // 16.1.1.2. Otherwise, if the bytes transmission for response’s message body is done normally and stream is readable,
+            //           then close stream, and abort these in-parallel steps.
+            if (success) {
+                if (stream->is_readable())
+                    stream->close();
+            }
+            // 16.1.2.2. Otherwise, if stream is readable, error stream with a TypeError.
+            else {
+                auto error = MUST(String::formatted("Load failed: {}", error_message));
+
+                if (stream->is_readable())
+                    stream->error(JS::TypeError::create(realm, error));
+
+                if (!pending_response->is_resolved())
+                    pending_response->resolve(Infrastructure::Response::network_error(vm, error));
+            }
+        };
+
+        ResourceLoader::the().load_unbuffered(load_request, move(on_headers_received), move(on_data_received), move(on_complete));
+    } else {
+        auto on_load_success = [&realm, &vm, request, pending_response](auto data, auto& response_headers, auto status_code) {
            dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' complete", request->url());
            if constexpr (WEB_FETCH_DEBUG)
                log_response(status_code, response_headers, data);
@ -2001,8 +2112,9 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
            }
            // FIXME: Set response status message
            pending_response->resolve(response);
-        },
-        [&realm, &vm, request, pending_response](auto& error, auto status_code, auto data, auto& response_headers) {
+        };
+
+        auto on_load_error = [&realm, &vm, request, pending_response](auto& error, auto status_code, auto data, auto& response_headers) {
            dbgln_if(WEB_FETCH_DEBUG, "Fetch: ResourceLoader load for '{}' failed: {} (status {})", request->url(), error, status_code.value_or(0));
            if constexpr (WEB_FETCH_DEBUG)
                log_response(status_code, response_headers, data);
@ -2022,7 +2134,10 @@ WebIDL::ExceptionOr<JS::NonnullGCPtr<PendingResponse>> nonstandard_resource_load
                // FIXME: Set response status message
            }
            pending_response->resolve(response);
-        });
+        };
+
+        ResourceLoader::the().load(load_request, move(on_load_success), move(on_load_error));
+    }

    return pending_response;
 }
--- a/Userland/Libraries/LibWeb/Fetch/Fetching/PendingResponse.h
+++ b/Userland/Libraries/LibWeb/Fetch/Fetching/PendingResponse.h
@ -30,6 +30,7 @@ public:

    void when_loaded(Callback);
    void resolve(JS::NonnullGCPtr<Infrastructure::Response>);
+    bool is_resolved() const { return m_response != nullptr; }

 private:
    PendingResponse(JS::NonnullGCPtr<Infrastructure::Request>, JS::GCPtr<Infrastructure::Response> = {});
--- a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.cpp
+++ b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.cpp
@ -250,6 +250,7 @@ JS::NonnullGCPtr<Request> Request::clone(JS::Realm& realm) const
    new_request->set_prevent_no_cache_cache_control_header_modification(m_prevent_no_cache_cache_control_header_modification);
    new_request->set_done(m_done);
    new_request->set_timing_allow_failed(m_timing_allow_failed);
+    new_request->set_buffer_policy(m_buffer_policy);

    // 2. If request’s body is non-null, set newRequest’s body to the result of cloning request’s body.
    if (auto const* body = m_body.get_pointer<JS::NonnullGCPtr<Body>>())
--- a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.h
+++ b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Requests.h
@ -159,6 +159,13 @@ public:
        Auto
    };

+    // AD-HOC: Some web features need to receive data as it arrives, rather than when the response is fully complete
+    //         or when enough data has been buffered. Use this buffer policy to inform fetch of that requirement.
+    enum class BufferPolicy {
+        BufferResponse,
+        DoNotBufferResponse,
+    };
+
    // Members are implementation-defined
    struct InternalPriority { };

@ -325,6 +332,9 @@ public:
        m_pending_responses.remove_first_matching([&](auto gc_ptr) { return gc_ptr == pending_response; });
    }

+    [[nodiscard]] BufferPolicy buffer_policy() const { return m_buffer_policy; }
+    void set_buffer_policy(BufferPolicy buffer_policy) { m_buffer_policy = buffer_policy; }
+
 private:
    explicit Request(JS::NonnullGCPtr<HeaderList>);

@ -515,6 +525,8 @@ private:

    // Non-standard
    Vector<JS::NonnullGCPtr<Fetching::PendingResponse>> m_pending_responses;
+
+    BufferPolicy m_buffer_policy { BufferPolicy::BufferResponse };
 };

 StringView request_destination_to_string(Request::Destination);