mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-09 02:21:53 +00:00
LibWeb: Implement TextEncoderStream
Required by the server-side rendering mode of React Router, used by https://chatgpt.com/ Note that the imported tests do not have the worker variants to prevent freezing on macOS.
This commit is contained in:
parent
24d5f24749
commit
cae0ee6fa7
Notes:
github-actions[bot]
2025-02-07 16:05:51 +00:00
Author: https://github.com/Lubrsi
Commit: cae0ee6fa7
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/3481
Reviewed-by: https://github.com/trflynn89 ✅
36 changed files with 1375 additions and 0 deletions
215
Libraries/LibWeb/Encoding/TextEncoderStream.cpp
Normal file
215
Libraries/LibWeb/Encoding/TextEncoderStream.cpp
Normal file
|
@ -0,0 +1,215 @@
|
|||
/*
|
||||
* Copyright (c) 2025, Luke Wilde <luke@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/UnicodeUtils.h>
|
||||
#include <LibJS/Runtime/ArrayBuffer.h>
|
||||
#include <LibJS/Runtime/Realm.h>
|
||||
#include <LibJS/Runtime/TypedArray.h>
|
||||
#include <LibWeb/Bindings/ExceptionOrUtils.h>
|
||||
#include <LibWeb/Bindings/Intrinsics.h>
|
||||
#include <LibWeb/Bindings/TextEncoderStreamPrototype.h>
|
||||
#include <LibWeb/Encoding/TextEncoderStream.h>
|
||||
#include <LibWeb/Streams/AbstractOperations.h>
|
||||
#include <LibWeb/Streams/TransformStream.h>
|
||||
#include <LibWeb/WebIDL/Promise.h>
|
||||
|
||||
namespace Web::Encoding {
|
||||
|
||||
GC_DEFINE_ALLOCATOR(TextEncoderStream);
|
||||
|
||||
// https://encoding.spec.whatwg.org/#dom-textencoderstream
|
||||
WebIDL::ExceptionOr<GC::Ref<TextEncoderStream>> TextEncoderStream::construct_impl(JS::Realm& realm)
|
||||
{
|
||||
// 1. Set this’s encoder to an instance of the UTF-8 encoder.
|
||||
// NOTE: No-op, as AK::String is already in UTF-8 format.
|
||||
|
||||
// NOTE: We do these steps first so that we may store it as nonnull in the GenericTransformStream.
|
||||
// 4. Let transformStream be a new TransformStream.
|
||||
auto transform_stream = realm.create<Streams::TransformStream>(realm);
|
||||
|
||||
// 6. Set this's transform to a new TransformStream.
|
||||
auto stream = realm.create<TextEncoderStream>(realm, transform_stream);
|
||||
|
||||
// 2. Let transformAlgorithm be an algorithm which takes a chunk argument and runs the encode and enqueue a chunk
|
||||
// algorithm with this and chunk.
|
||||
auto transform_algorithm = GC::create_function(realm.heap(), [stream](JS::Value chunk) -> GC::Ref<WebIDL::Promise> {
|
||||
auto& realm = stream->realm();
|
||||
auto& vm = realm.vm();
|
||||
|
||||
if (auto result = stream->encode_and_enqueue_chunk(chunk); result.is_error()) {
|
||||
auto throw_completion = Bindings::exception_to_throw_completion(vm, result.exception());
|
||||
return WebIDL::create_rejected_promise(realm, *throw_completion.release_value());
|
||||
}
|
||||
|
||||
return WebIDL::create_resolved_promise(realm, JS::js_undefined());
|
||||
});
|
||||
|
||||
// 3. Let flushAlgorithm be an algorithm which runs the encode and flush algorithm with this.
|
||||
auto flush_algorithm = GC::create_function(realm.heap(), [stream]() -> GC::Ref<WebIDL::Promise> {
|
||||
auto& realm = stream->realm();
|
||||
auto& vm = realm.vm();
|
||||
|
||||
if (auto result = stream->encode_and_flush(); result.is_error()) {
|
||||
auto throw_completion = Bindings::exception_to_throw_completion(vm, result.exception());
|
||||
return WebIDL::create_rejected_promise(realm, *throw_completion.release_value());
|
||||
}
|
||||
|
||||
return WebIDL::create_resolved_promise(realm, JS::js_undefined());
|
||||
});
|
||||
|
||||
// 5. Set up transformStream with transformAlgorithm set to transformAlgorithm and flushAlgorithm set to flushAlgorithm.
|
||||
transform_stream->set_up(transform_algorithm, flush_algorithm);
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
TextEncoderStream::TextEncoderStream(JS::Realm& realm, GC::Ref<Streams::TransformStream> transform)
|
||||
: Bindings::PlatformObject(realm)
|
||||
, Streams::GenericTransformStreamMixin(transform)
|
||||
{
|
||||
}
|
||||
|
||||
TextEncoderStream::~TextEncoderStream() = default;
|
||||
|
||||
void TextEncoderStream::initialize(JS::Realm& realm)
|
||||
{
|
||||
Base::initialize(realm);
|
||||
WEB_SET_PROTOTYPE_FOR_INTERFACE(TextEncoderStream);
|
||||
}
|
||||
|
||||
void TextEncoderStream::visit_edges(JS::Cell::Visitor& visitor)
|
||||
{
|
||||
Base::visit_edges(visitor);
|
||||
Streams::GenericTransformStreamMixin::visit_edges(visitor);
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk
|
||||
WebIDL::ExceptionOr<void> TextEncoderStream::encode_and_enqueue_chunk(JS::Value chunk)
|
||||
{
|
||||
// Spec Note: This is equivalent to the "convert a string into a scalar value string" algorithm from the Infra
|
||||
// Standard, but allows for surrogate pairs that are split between strings. [INFRA]
|
||||
|
||||
auto& realm = this->realm();
|
||||
auto& vm = this->vm();
|
||||
|
||||
// 1. Let input be the result of converting chunk to a DOMString.
|
||||
auto input = TRY(chunk.to_string(vm));
|
||||
|
||||
// 2. Convert input to an I/O queue of code units.
|
||||
// Spec Note: DOMString, as well as an I/O queue of code units rather than scalar values, are used here so that a
|
||||
// surrogate pair that is split between chunks can be reassembled into the appropriate scalar value.
|
||||
// The behavior is otherwise identical to USVString. In particular, lone surrogates will be replaced
|
||||
// with U+FFFD.
|
||||
auto code_points = input.code_points();
|
||||
auto it = code_points.begin();
|
||||
|
||||
// 3. Let output be the I/O queue of bytes « end-of-queue ».
|
||||
ByteBuffer output;
|
||||
|
||||
// 4. While true:
|
||||
while (true) {
|
||||
// 2. If item is end-of-queue, then:
|
||||
// NOTE: This is done out-of-order so that we're not dereferencing a code point iterator that points to the end.
|
||||
if (it.done()) {
|
||||
// 1. Convert output into a byte sequence.
|
||||
// Note: No-op.
|
||||
|
||||
// 2. If output is non-empty, then:
|
||||
if (!output.is_empty()) {
|
||||
// 1. Let chunk be a Uint8Array object wrapping an ArrayBuffer containing output.
|
||||
auto array_buffer = JS::ArrayBuffer::create(realm, move(output));
|
||||
auto array = JS::Uint8Array::create(realm, array_buffer->byte_length(), *array_buffer);
|
||||
|
||||
// 2. Enqueue chunk into encoder’s transform.
|
||||
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), array));
|
||||
}
|
||||
|
||||
// 3. Return.
|
||||
return {};
|
||||
}
|
||||
|
||||
// 1. Let item be the result of reading from input.
|
||||
auto item = *it;
|
||||
|
||||
// 3. Let result be the result of executing the convert code unit to scalar value algorithm with encoder, item and input.
|
||||
auto result = convert_code_unit_to_scalar_value(item, it);
|
||||
|
||||
// 4. If result is not continue, then process an item with result, encoder’s encoder, input, output, and "fatal".
|
||||
if (result.has_value()) {
|
||||
(void)AK::UnicodeUtils::code_point_to_utf8(result.value(), [&output](char utf8_byte) {
|
||||
output.append(static_cast<u8>(utf8_byte));
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#encode-and-flush
|
||||
WebIDL::ExceptionOr<void> TextEncoderStream::encode_and_flush()
|
||||
{
|
||||
auto& realm = this->realm();
|
||||
|
||||
// 1. If encoder’s leading surrogate is non-null, then:
|
||||
if (m_leading_surrogate.has_value()) {
|
||||
// 1. Let chunk be a Uint8Array object wrapping an ArrayBuffer containing 0xEF 0xBF 0xBD.
|
||||
// Spec Note: This is U+FFFD (<28>) in UTF-8 bytes.
|
||||
constexpr static u8 replacement_character_utf8_bytes[3] = { 0xEF, 0xBF, 0xBD };
|
||||
auto bytes = MUST(ByteBuffer::copy(replacement_character_utf8_bytes, sizeof(replacement_character_utf8_bytes)));
|
||||
auto array_buffer = JS::ArrayBuffer::create(realm, bytes);
|
||||
auto chunk = JS::Uint8Array::create(realm, array_buffer->byte_length(), *array_buffer);
|
||||
|
||||
// 2. Enqueue chunk into encoder’s transform.
|
||||
TRY(Streams::transform_stream_default_controller_enqueue(*m_transform->controller(), chunk));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#convert-code-unit-to-scalar-value
|
||||
Optional<u32> TextEncoderStream::convert_code_unit_to_scalar_value(u32 item, Utf8CodePointIterator& code_point_iterator)
|
||||
{
|
||||
ArmedScopeGuard move_to_next_code_point_guard = [&] {
|
||||
++code_point_iterator;
|
||||
};
|
||||
|
||||
// 1. If encoder’s leading surrogate is non-null, then:
|
||||
if (m_leading_surrogate.has_value()) {
|
||||
// 1. Let leadingSurrogate be encoder’s leading surrogate.
|
||||
auto leading_surrogate = m_leading_surrogate.value();
|
||||
|
||||
// 2. Set encoder’s leading surrogate to null.
|
||||
m_leading_surrogate.clear();
|
||||
|
||||
// 3. If item is a trailing surrogate, then return a scalar value from surrogates given leadingSurrogate
|
||||
// and item.
|
||||
if (Utf16View::is_low_surrogate(item)) {
|
||||
// https://encoding.spec.whatwg.org/#scalar-value-from-surrogates
|
||||
// To obtain a scalar value from surrogates, given a leading surrogate leading and a trailing surrogate
|
||||
// trailing, return 0x10000 + ((leading − 0xD800) << 10) + (trailing − 0xDC00).
|
||||
return Utf16View::decode_surrogate_pair(leading_surrogate, item);
|
||||
}
|
||||
|
||||
// 4. Restore item to input.
|
||||
move_to_next_code_point_guard.disarm();
|
||||
|
||||
// 5. Return U+FFFD.
|
||||
return 0xFFFD;
|
||||
}
|
||||
|
||||
// 2. If item is a leading surrogate, then set encoder’s leading surrogate to item and return continue.
|
||||
if (Utf16View::is_high_surrogate(item)) {
|
||||
m_leading_surrogate = item;
|
||||
return OptionalNone {};
|
||||
}
|
||||
|
||||
// 3. If item is a trailing surrogate, then return U+FFFD.
|
||||
if (Utf16View::is_low_surrogate(item))
|
||||
return 0xFFFD;
|
||||
|
||||
// 4. Return item.
|
||||
return item;
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue