mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-03 08:08:43 +00:00
AK+LibIPC: Implement an encoder/decoder for UTF-16 strings
This commit is contained in:
parent
e36df5ea0a
commit
13ed6aba71
Notes:
github-actions[bot]
2025-08-02 17:11:30 +00:00
Author: https://github.com/trflynn89
Commit: 13ed6aba71
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5633
Reviewed-by: https://github.com/gmta
10 changed files with 183 additions and 11 deletions
|
@ -4,6 +4,7 @@
|
|||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Stream.h>
|
||||
#include <AK/Utf16String.h>
|
||||
#include <AK/Utf32View.h>
|
||||
|
||||
|
@ -85,6 +86,24 @@ Utf16String Utf16String::from_utf32(Utf32View const& utf32_string)
|
|||
return Utf16String { Detail::Utf16StringData::from_utf32(utf32_string) };
|
||||
}
|
||||
|
||||
ErrorOr<Utf16String> Utf16String::from_ipc_stream(Stream& stream, size_t length_in_code_units, bool is_ascii)
|
||||
{
|
||||
if (is_ascii && length_in_code_units <= Detail::MAX_SHORT_STRING_BYTE_COUNT) {
|
||||
Utf16String string;
|
||||
string.m_value.short_ascii_string = Detail::ShortString::create_with_byte_count(length_in_code_units);
|
||||
|
||||
Bytes bytes { string.m_value.short_ascii_string.storage, length_in_code_units };
|
||||
TRY(stream.read_until_filled(bytes));
|
||||
|
||||
if (!StringView { bytes }.is_ascii())
|
||||
return Error::from_string_literal("Stream contains invalid ASCII data");
|
||||
|
||||
return string;
|
||||
}
|
||||
|
||||
return Utf16String { TRY(Detail::Utf16StringData::from_ipc_stream(stream, length_in_code_units, is_ascii)) };
|
||||
}
|
||||
|
||||
Utf16String Utf16String::from_string_builder_without_validation(StringBuilder& builder)
|
||||
{
|
||||
return Utf16String { Detail::Utf16StringData::from_string_builder(builder) };
|
||||
|
|
|
@ -138,6 +138,8 @@ public:
|
|||
return from_string_builder_without_validation(builder);
|
||||
}
|
||||
|
||||
static ErrorOr<Utf16String> from_ipc_stream(Stream&, size_t length_in_code_units, bool is_ascii);
|
||||
|
||||
Utf16String to_well_formed() const;
|
||||
String to_well_formed_utf8() const;
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/Stream.h>
|
||||
#include <AK/TypedTransfer.h>
|
||||
#include <AK/Utf16StringData.h>
|
||||
#include <AK/Utf32View.h>
|
||||
|
@ -158,6 +159,31 @@ NonnullRefPtr<Utf16StringData> Utf16StringData::from_string_builder(StringBuilde
|
|||
return adopt_ref(*new (buffer->buffer.data()) Utf16StringData { storage_type, code_unit_length });
|
||||
}
|
||||
|
||||
ErrorOr<NonnullRefPtr<Utf16StringData>> Utf16StringData::from_ipc_stream(Stream& stream, size_t length_in_code_units, bool is_ascii)
|
||||
{
|
||||
RefPtr<Utf16StringData> string;
|
||||
|
||||
if (is_ascii) {
|
||||
string = create_uninitialized(StorageType::ASCII, length_in_code_units);
|
||||
|
||||
Bytes bytes { string->m_ascii_data, length_in_code_units };
|
||||
TRY(stream.read_until_filled(bytes));
|
||||
|
||||
if (!string->ascii_view().is_ascii())
|
||||
return Error::from_string_literal("Stream contains invalid ASCII data");
|
||||
} else {
|
||||
string = create_uninitialized(StorageType::UTF16, length_in_code_units);
|
||||
|
||||
Bytes bytes { reinterpret_cast<u8*>(string->m_utf16_data), length_in_code_units * sizeof(char16_t) };
|
||||
TRY(stream.read_until_filled(bytes));
|
||||
|
||||
if (!string->utf16_view().validate())
|
||||
return Error::from_string_literal("Stream contains invalid UTF-16 data");
|
||||
}
|
||||
|
||||
return string.release_nonnull();
|
||||
}
|
||||
|
||||
NonnullRefPtr<Utf16StringData> Utf16StringData::to_well_formed(Utf16View const& utf16_string)
|
||||
{
|
||||
VERIFY(!utf16_string.has_ascii_storage());
|
||||
|
|
|
@ -34,6 +34,7 @@ public:
|
|||
static NonnullRefPtr<Utf16StringData> from_utf16(Utf16View const&);
|
||||
static NonnullRefPtr<Utf16StringData> from_utf32(Utf32View const&);
|
||||
static NonnullRefPtr<Utf16StringData> from_string_builder(StringBuilder&);
|
||||
static ErrorOr<NonnullRefPtr<Utf16StringData>> from_ipc_stream(Stream&, size_t length_in_code_units, bool is_ascii);
|
||||
|
||||
static NonnullRefPtr<Utf16StringData> to_well_formed(Utf16View const&);
|
||||
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2023-2025, Tim Flynn <trflynn89@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/JsonValue.h>
|
||||
#include <AK/NumericLimits.h>
|
||||
#include <AK/Utf16String.h>
|
||||
#include <LibCore/AnonymousBuffer.h>
|
||||
#include <LibCore/DateTime.h>
|
||||
#include <LibCore/Proxy.h>
|
||||
|
@ -30,6 +31,15 @@ ErrorOr<String> decode(Decoder& decoder)
|
|||
return String::from_stream(decoder.stream(), length);
|
||||
}
|
||||
|
||||
template<>
|
||||
ErrorOr<Utf16String> decode(Decoder& decoder)
|
||||
{
|
||||
auto is_ascii = TRY(decoder.decode<bool>());
|
||||
auto length_in_code_units = TRY(decoder.decode_size());
|
||||
|
||||
return Utf16String::from_ipc_stream(decoder.stream(), length_in_code_units, is_ascii);
|
||||
}
|
||||
|
||||
template<>
|
||||
ErrorOr<ByteString> decode(Decoder& decoder)
|
||||
{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2023-2025, Tim Flynn <trflynn89@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -88,6 +88,9 @@ ErrorOr<T> decode(Decoder& decoder)
|
|||
template<>
|
||||
ErrorOr<String> decode(Decoder&);
|
||||
|
||||
template<>
|
||||
ErrorOr<Utf16String> decode(Decoder&);
|
||||
|
||||
template<>
|
||||
ErrorOr<ByteString> decode(Decoder&);
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2021, kleines Filmröllchen <filmroellchen@serenityos.org>
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2023-2025, Tim Flynn <trflynn89@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -14,6 +14,8 @@
|
|||
#include <AK/NumericLimits.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Time.h>
|
||||
#include <AK/Utf16String.h>
|
||||
#include <AK/Utf16View.h>
|
||||
#include <LibCore/AnonymousBuffer.h>
|
||||
#include <LibCore/DateTime.h>
|
||||
#include <LibCore/Proxy.h>
|
||||
|
@ -58,6 +60,26 @@ ErrorOr<void> encode(Encoder& encoder, StringView const& value)
|
|||
return {};
|
||||
}
|
||||
|
||||
template<>
|
||||
ErrorOr<void> encode(Encoder& encoder, Utf16String const& value)
|
||||
{
|
||||
return encoder.encode(value.utf16_view());
|
||||
}
|
||||
|
||||
template<>
|
||||
ErrorOr<void> encode(Encoder& encoder, Utf16View const& value)
|
||||
{
|
||||
TRY(encoder.encode(value.has_ascii_storage()));
|
||||
TRY(encoder.encode_size(value.length_in_code_units()));
|
||||
|
||||
if (value.has_ascii_storage())
|
||||
TRY(encoder.append(value.bytes().data(), value.length_in_code_units()));
|
||||
else
|
||||
TRY(encoder.append(reinterpret_cast<u8 const*>(value.utf16_span().data()), value.length_in_code_units() * sizeof(char16_t)));
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
template<>
|
||||
ErrorOr<void> encode(Encoder& encoder, ByteString const& value)
|
||||
{
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2018-2021, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2023-2025, Tim Flynn <trflynn89@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -87,6 +87,12 @@ ErrorOr<void> encode(Encoder&, String const&);
|
|||
template<>
|
||||
ErrorOr<void> encode(Encoder&, StringView const&);
|
||||
|
||||
template<>
|
||||
ErrorOr<void> encode(Encoder&, Utf16String const&);
|
||||
|
||||
template<>
|
||||
ErrorOr<void> encode(Encoder&, Utf16View const&);
|
||||
|
||||
template<>
|
||||
ErrorOr<void> encode(Encoder&, ByteString const&);
|
||||
|
||||
|
|
|
@ -194,6 +194,8 @@ Vector<Endpoint> parse(ByteBuffer const& file_contents)
|
|||
parameter.type_for_encoding = parameter.type.replace("Vector"sv, "ReadonlySpan"sv, ReplaceMode::FirstOnly);
|
||||
} else if (parameter.type.is_one_of("String"sv, "ByteString"sv)) {
|
||||
parameter.type_for_encoding = "StringView"sv;
|
||||
} else if (parameter.type == "Utf16String"sv) {
|
||||
parameter.type_for_encoding = "Utf16View"sv;
|
||||
} else if (parameter.type == "ByteBuffer"sv) {
|
||||
parameter.type_for_encoding = "ReadonlyBytes"sv;
|
||||
} else {
|
||||
|
@ -518,7 +520,7 @@ private:)~~~");
|
|||
message_generator.appendln("\n};");
|
||||
}
|
||||
|
||||
void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& endpoint, Message const& message, ByteString const& name, Vector<Parameter> const& parameters, bool is_synchronous, bool is_try, bool is_utf8_string_overload = false)
|
||||
void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& endpoint, Message const& message, ByteString const& name, Vector<Parameter> const& parameters, bool is_synchronous, bool is_try, bool is_unicode_string_overload = false)
|
||||
{
|
||||
// FIXME: For String parameters, we want to retain the property that all tranferred String objects are strictly UTF-8.
|
||||
// So instead of generating a single proxy method that accepts StringView parameters, we generate two overloads.
|
||||
|
@ -527,7 +529,7 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e
|
|||
//
|
||||
// Ideally, we will eventually have separate StringView types for each of String and ByteString, where String's
|
||||
// view internally provides UTF-8 guarantees. Then we won't need these overloads.
|
||||
bool generate_utf8_string_overload = false;
|
||||
bool generate_unicode_string_overload = false;
|
||||
|
||||
ByteString return_type = "void";
|
||||
if (is_synchronous) {
|
||||
|
@ -554,7 +556,7 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e
|
|||
ByteString type;
|
||||
if (is_synchronous || is_try)
|
||||
type = parameter.type;
|
||||
else if (is_utf8_string_overload)
|
||||
else if (is_unicode_string_overload)
|
||||
type = make_argument_type(parameter.type);
|
||||
else
|
||||
type = make_argument_type(parameter.type_for_encoding);
|
||||
|
@ -569,7 +571,7 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e
|
|||
|
||||
message_generator.append(") {");
|
||||
|
||||
if (!is_synchronous && !is_try && !is_utf8_string_overload) {
|
||||
if (!is_synchronous && !is_try && !is_unicode_string_overload) {
|
||||
for (auto const& parameter : parameters) {
|
||||
auto const& type = is_synchronous || is_try ? parameter.type : parameter.type_for_encoding;
|
||||
|
||||
|
@ -579,7 +581,14 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e
|
|||
argument_generator.append(R"~~~(
|
||||
VERIFY(Utf8View { @argument.name@ }.validate());)~~~");
|
||||
|
||||
generate_utf8_string_overload = true;
|
||||
generate_unicode_string_overload = true;
|
||||
} else if (parameter.type == "Utf16String"sv && type == "Utf16View"sv) {
|
||||
auto argument_generator = message_generator.fork();
|
||||
argument_generator.set("argument.name", parameter.name);
|
||||
argument_generator.append(R"~~~(
|
||||
VERIFY(@argument.name@.validate());)~~~");
|
||||
|
||||
generate_unicode_string_overload = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -655,8 +664,8 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e
|
|||
message_generator.appendln(R"~~~(
|
||||
})~~~");
|
||||
|
||||
if (generate_utf8_string_overload)
|
||||
generate_proxy_method(message_generator, endpoint, message, message.name, message.inputs, is_synchronous, is_try, generate_utf8_string_overload);
|
||||
if (generate_unicode_string_overload)
|
||||
generate_proxy_method(message_generator, endpoint, message, message.name, message.inputs, is_synchronous, is_try, generate_unicode_string_overload);
|
||||
}
|
||||
|
||||
void do_message_for_proxy(SourceGenerator message_generator, Endpoint const& endpoint, Message const& message)
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <AK/Array.h>
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/Enumerate.h>
|
||||
#include <AK/MemoryStream.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/Utf16String.h>
|
||||
#include <AK/Utf32View.h>
|
||||
|
@ -411,6 +412,79 @@ TEST_CASE(repeated)
|
|||
EXPECT_DEATH("Creating a string from an invalid code point", (void)Utf16String::repeated(0xffffffff, 1));
|
||||
}
|
||||
|
||||
TEST_CASE(from_ipc_stream)
|
||||
{
|
||||
{
|
||||
auto data = "abc"sv;
|
||||
FixedMemoryStream stream { data.bytes() };
|
||||
|
||||
auto string = TRY_OR_FAIL(Utf16String::from_ipc_stream(stream, data.length(), true));
|
||||
EXPECT(string.is_ascii());
|
||||
EXPECT(!string.has_long_ascii_storage());
|
||||
EXPECT(string.has_short_ascii_storage());
|
||||
EXPECT_EQ(string.length_in_code_units(), 3uz);
|
||||
EXPECT_EQ(string, data);
|
||||
}
|
||||
{
|
||||
auto data = "abcdefghijklmnopqrstuvwxyz"sv;
|
||||
FixedMemoryStream stream { data.bytes() };
|
||||
|
||||
auto string = TRY_OR_FAIL(Utf16String::from_ipc_stream(stream, data.length(), true));
|
||||
EXPECT(string.is_ascii());
|
||||
EXPECT(string.has_long_ascii_storage());
|
||||
EXPECT(!string.has_short_ascii_storage());
|
||||
EXPECT_EQ(string.length_in_code_units(), 26uz);
|
||||
EXPECT_EQ(string, data);
|
||||
}
|
||||
{
|
||||
auto data = u"hello 😀 there!"sv;
|
||||
|
||||
StringBuilder builder(StringBuilder::Mode::UTF16);
|
||||
builder.append(data);
|
||||
|
||||
auto buffer = MUST(builder.to_byte_buffer());
|
||||
FixedMemoryStream stream { buffer.bytes() };
|
||||
|
||||
auto string = TRY_OR_FAIL(Utf16String::from_ipc_stream(stream, data.length_in_code_units(), false));
|
||||
EXPECT(!string.is_ascii());
|
||||
EXPECT(!string.has_long_ascii_storage());
|
||||
EXPECT(!string.has_short_ascii_storage());
|
||||
EXPECT_EQ(string.length_in_code_units(), 15uz);
|
||||
EXPECT_EQ(string, data);
|
||||
}
|
||||
{
|
||||
auto data = "abc"sv;
|
||||
FixedMemoryStream stream { data.bytes() };
|
||||
|
||||
auto result = Utf16String::from_ipc_stream(stream, data.length() + 1, true);
|
||||
EXPECT(result.is_error());
|
||||
}
|
||||
{
|
||||
auto data = u"😀"sv;
|
||||
|
||||
StringBuilder builder(StringBuilder::Mode::UTF16);
|
||||
builder.append(data);
|
||||
|
||||
auto buffer = MUST(builder.to_byte_buffer());
|
||||
FixedMemoryStream stream { buffer.bytes() };
|
||||
|
||||
auto result = Utf16String::from_ipc_stream(stream, data.length_in_code_units(), true);
|
||||
EXPECT(result.is_error());
|
||||
}
|
||||
{
|
||||
auto data = u"hello 😀 there!"sv;
|
||||
|
||||
StringBuilder builder(StringBuilder::Mode::UTF16);
|
||||
builder.append(data);
|
||||
|
||||
auto buffer = MUST(builder.to_byte_buffer());
|
||||
FixedMemoryStream stream { buffer.bytes() };
|
||||
|
||||
auto result = Utf16String::from_ipc_stream(stream, data.length_in_code_units(), true);
|
||||
EXPECT(result.is_error());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(to_lowercase_unconditional_special_casing)
|
||||
{
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue