LibWeb: Add TextEncoder encodeInto

This commit is contained in:
Bastiaan van der Plaat 2023-10-06 07:03:44 +02:00 committed by Andreas Kling
parent f1ead552ce
commit 0104225d9b
Notes: sideshowbarker 2024-07-17 11:34:34 +09:00
9 changed files with 123 additions and 10 deletions

View file

@ -30,10 +30,9 @@ void TextEncoder::initialize(JS::Realm& realm)
}
// https://encoding.spec.whatwg.org/#dom-textencoder-encode
JS::Uint8Array* TextEncoder::encode(String const& input) const
JS::NonnullGCPtr<JS::Uint8Array> TextEncoder::encode(String const& input) const
{
// NOTE: The AK::DeprecatedString returned from PrimitiveString::string() is always UTF-8, regardless of the internal string type, so most of these steps are no-ops.
// NOTE: The AK::String is always UTF-8, so most of these steps are no-ops.
// 1. Convert input to an I/O queue of scalar values.
// 2. Let output be the I/O queue of bytes « end-of-queue ».
// 3. While true:
@ -48,10 +47,67 @@ JS::Uint8Array* TextEncoder::encode(String const& input) const
return JS::Uint8Array::create(realm(), array_length, *array_buffer);
}
// https://encoding.spec.whatwg.org/#dom-textencoder-encodeinto
TextEncoderEncodeIntoResult TextEncoder::encode_into(String const& source, JS::Handle<JS::Object> const& destination) const
{
auto& destination_array = static_cast<JS::Uint8Array&>(*destination);
auto data = destination_array.data();
// 1. Let read be 0.
unsigned long long read = 0;
// 2. Let written be 0.
unsigned long long written = 0;
// NOTE: The AK::String is always UTF-8, so most of these steps are no-ops.
// 3. Let encoder be an instance of the UTF-8 encoder.
// 4. Let unused be the I/O queue of scalar values « end-of-queue ».
// 5. Convert source to an I/O queue of scalar values.
auto code_points = source.code_points();
auto it = code_points.begin();
// 6. While true:
while (true) {
// 6.1. Let item be the result of reading from source.
// 6.2. Let result be the result of running encoders handler on unused and item.
// 6.3. If result is finished, then break.
if (it.done())
break;
auto item = *it;
auto result = it.underlying_code_point_bytes();
// 6.4. Otherwise:
// 6.4.1. If destinations byte length written is greater than or equal to the number of bytes in result, then:
if (data.size() - written >= result.size()) {
// 6.4.1.1. If item is greater than U+FFFF, then increment read by 2.
if (item > 0xffff) {
read += 2;
}
// 6.4.1.2. Otherwise, increment read by 1.
else {
read++;
}
// 6.4.1.3. Write the bytes in result into destination, with startingOffset set to written.
// 6.4.1.4. Increment written by the number of bytes in result.
for (auto byte : result)
data[written++] = byte;
}
// 6.4.2. Otherwise, break.
else {
break;
}
++it;
}
// 7. Return «[ "read" → read, "written" → written ]».
return { read, written };
}
// https://encoding.spec.whatwg.org/#dom-textencoder-encoding
FlyString const& TextEncoder::encoding()
{
static const FlyString encoding = "utf-8"_fly_string;
static FlyString const encoding = "utf-8"_fly_string;
return encoding;
}