diff --git a/AK/Utf16String.cpp b/AK/Utf16String.cpp index 4accbcbb905..1ae6405a5c2 100644 --- a/AK/Utf16String.cpp +++ b/AK/Utf16String.cpp @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include @@ -85,6 +86,24 @@ Utf16String Utf16String::from_utf32(Utf32View const& utf32_string) return Utf16String { Detail::Utf16StringData::from_utf32(utf32_string) }; } +ErrorOr Utf16String::from_ipc_stream(Stream& stream, size_t length_in_code_units, bool is_ascii) +{ + if (is_ascii && length_in_code_units <= Detail::MAX_SHORT_STRING_BYTE_COUNT) { + Utf16String string; + string.m_value.short_ascii_string = Detail::ShortString::create_with_byte_count(length_in_code_units); + + Bytes bytes { string.m_value.short_ascii_string.storage, length_in_code_units }; + TRY(stream.read_until_filled(bytes)); + + if (!StringView { bytes }.is_ascii()) + return Error::from_string_literal("Stream contains invalid ASCII data"); + + return string; + } + + return Utf16String { TRY(Detail::Utf16StringData::from_ipc_stream(stream, length_in_code_units, is_ascii)) }; +} + Utf16String Utf16String::from_string_builder_without_validation(StringBuilder& builder) { return Utf16String { Detail::Utf16StringData::from_string_builder(builder) }; diff --git a/AK/Utf16String.h b/AK/Utf16String.h index 6b7644691a0..eb611ad8c69 100644 --- a/AK/Utf16String.h +++ b/AK/Utf16String.h @@ -138,6 +138,8 @@ public: return from_string_builder_without_validation(builder); } + static ErrorOr from_ipc_stream(Stream&, size_t length_in_code_units, bool is_ascii); + Utf16String to_well_formed() const; String to_well_formed_utf8() const; diff --git a/AK/Utf16StringData.cpp b/AK/Utf16StringData.cpp index 437551d57a9..e2d1102dee0 100644 --- a/AK/Utf16StringData.cpp +++ b/AK/Utf16StringData.cpp @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -158,6 +159,31 @@ NonnullRefPtr Utf16StringData::from_string_builder(StringBuilde return adopt_ref(*new (buffer->buffer.data()) Utf16StringData { storage_type, code_unit_length }); } +ErrorOr> Utf16StringData::from_ipc_stream(Stream& stream, size_t length_in_code_units, bool is_ascii) +{ + RefPtr string; + + if (is_ascii) { + string = create_uninitialized(StorageType::ASCII, length_in_code_units); + + Bytes bytes { string->m_ascii_data, length_in_code_units }; + TRY(stream.read_until_filled(bytes)); + + if (!string->ascii_view().is_ascii()) + return Error::from_string_literal("Stream contains invalid ASCII data"); + } else { + string = create_uninitialized(StorageType::UTF16, length_in_code_units); + + Bytes bytes { reinterpret_cast(string->m_utf16_data), length_in_code_units * sizeof(char16_t) }; + TRY(stream.read_until_filled(bytes)); + + if (!string->utf16_view().validate()) + return Error::from_string_literal("Stream contains invalid UTF-16 data"); + } + + return string.release_nonnull(); +} + NonnullRefPtr Utf16StringData::to_well_formed(Utf16View const& utf16_string) { VERIFY(!utf16_string.has_ascii_storage()); diff --git a/AK/Utf16StringData.h b/AK/Utf16StringData.h index 85924b0aec1..dc2f0902ace 100644 --- a/AK/Utf16StringData.h +++ b/AK/Utf16StringData.h @@ -34,6 +34,7 @@ public: static NonnullRefPtr from_utf16(Utf16View const&); static NonnullRefPtr from_utf32(Utf32View const&); static NonnullRefPtr from_string_builder(StringBuilder&); + static ErrorOr> from_ipc_stream(Stream&, size_t length_in_code_units, bool is_ascii); static NonnullRefPtr to_well_formed(Utf16View const&); diff --git a/Libraries/LibIPC/Decoder.cpp b/Libraries/LibIPC/Decoder.cpp index 9e8df3711b1..6d8a9f315e8 100644 --- a/Libraries/LibIPC/Decoder.cpp +++ b/Libraries/LibIPC/Decoder.cpp @@ -1,12 +1,13 @@ /* * Copyright (c) 2020, Andreas Kling - * Copyright (c) 2023, Tim Flynn + * Copyright (c) 2023-2025, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #include #include +#include #include #include #include @@ -30,6 +31,15 @@ ErrorOr decode(Decoder& decoder) return String::from_stream(decoder.stream(), length); } +template<> +ErrorOr decode(Decoder& decoder) +{ + auto is_ascii = TRY(decoder.decode()); + auto length_in_code_units = TRY(decoder.decode_size()); + + return Utf16String::from_ipc_stream(decoder.stream(), length_in_code_units, is_ascii); +} + template<> ErrorOr decode(Decoder& decoder) { diff --git a/Libraries/LibIPC/Decoder.h b/Libraries/LibIPC/Decoder.h index 3666285990e..909b8d479d7 100644 --- a/Libraries/LibIPC/Decoder.h +++ b/Libraries/LibIPC/Decoder.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2020, Andreas Kling - * Copyright (c) 2023, Tim Flynn + * Copyright (c) 2023-2025, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -88,6 +88,9 @@ ErrorOr decode(Decoder& decoder) template<> ErrorOr decode(Decoder&); +template<> +ErrorOr decode(Decoder&); + template<> ErrorOr decode(Decoder&); diff --git a/Libraries/LibIPC/Encoder.cpp b/Libraries/LibIPC/Encoder.cpp index a39bd882a37..bef7a86d3ed 100644 --- a/Libraries/LibIPC/Encoder.cpp +++ b/Libraries/LibIPC/Encoder.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2020, Andreas Kling * Copyright (c) 2021, kleines Filmröllchen - * Copyright (c) 2023, Tim Flynn + * Copyright (c) 2023-2025, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include #include @@ -58,6 +60,26 @@ ErrorOr encode(Encoder& encoder, StringView const& value) return {}; } +template<> +ErrorOr encode(Encoder& encoder, Utf16String const& value) +{ + return encoder.encode(value.utf16_view()); +} + +template<> +ErrorOr encode(Encoder& encoder, Utf16View const& value) +{ + TRY(encoder.encode(value.has_ascii_storage())); + TRY(encoder.encode_size(value.length_in_code_units())); + + if (value.has_ascii_storage()) + TRY(encoder.append(value.bytes().data(), value.length_in_code_units())); + else + TRY(encoder.append(reinterpret_cast(value.utf16_span().data()), value.length_in_code_units() * sizeof(char16_t))); + + return {}; +} + template<> ErrorOr encode(Encoder& encoder, ByteString const& value) { diff --git a/Libraries/LibIPC/Encoder.h b/Libraries/LibIPC/Encoder.h index b4ebf037d09..ea1dc30a139 100644 --- a/Libraries/LibIPC/Encoder.h +++ b/Libraries/LibIPC/Encoder.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2018-2021, Andreas Kling - * Copyright (c) 2023, Tim Flynn + * Copyright (c) 2023-2025, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -87,6 +87,12 @@ ErrorOr encode(Encoder&, String const&); template<> ErrorOr encode(Encoder&, StringView const&); +template<> +ErrorOr encode(Encoder&, Utf16String const&); + +template<> +ErrorOr encode(Encoder&, Utf16View const&); + template<> ErrorOr encode(Encoder&, ByteString const&); diff --git a/Meta/Lagom/Tools/CodeGenerators/IPCCompiler/main.cpp b/Meta/Lagom/Tools/CodeGenerators/IPCCompiler/main.cpp index 3081454bd78..6ceedb1d39a 100644 --- a/Meta/Lagom/Tools/CodeGenerators/IPCCompiler/main.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/IPCCompiler/main.cpp @@ -194,6 +194,8 @@ Vector parse(ByteBuffer const& file_contents) parameter.type_for_encoding = parameter.type.replace("Vector"sv, "ReadonlySpan"sv, ReplaceMode::FirstOnly); } else if (parameter.type.is_one_of("String"sv, "ByteString"sv)) { parameter.type_for_encoding = "StringView"sv; + } else if (parameter.type == "Utf16String"sv) { + parameter.type_for_encoding = "Utf16View"sv; } else if (parameter.type == "ByteBuffer"sv) { parameter.type_for_encoding = "ReadonlyBytes"sv; } else { @@ -518,7 +520,7 @@ private:)~~~"); message_generator.appendln("\n};"); } -void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& endpoint, Message const& message, ByteString const& name, Vector const& parameters, bool is_synchronous, bool is_try, bool is_utf8_string_overload = false) +void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& endpoint, Message const& message, ByteString const& name, Vector const& parameters, bool is_synchronous, bool is_try, bool is_unicode_string_overload = false) { // FIXME: For String parameters, we want to retain the property that all tranferred String objects are strictly UTF-8. // So instead of generating a single proxy method that accepts StringView parameters, we generate two overloads. @@ -527,7 +529,7 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e // // Ideally, we will eventually have separate StringView types for each of String and ByteString, where String's // view internally provides UTF-8 guarantees. Then we won't need these overloads. - bool generate_utf8_string_overload = false; + bool generate_unicode_string_overload = false; ByteString return_type = "void"; if (is_synchronous) { @@ -554,7 +556,7 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e ByteString type; if (is_synchronous || is_try) type = parameter.type; - else if (is_utf8_string_overload) + else if (is_unicode_string_overload) type = make_argument_type(parameter.type); else type = make_argument_type(parameter.type_for_encoding); @@ -569,7 +571,7 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e message_generator.append(") {"); - if (!is_synchronous && !is_try && !is_utf8_string_overload) { + if (!is_synchronous && !is_try && !is_unicode_string_overload) { for (auto const& parameter : parameters) { auto const& type = is_synchronous || is_try ? parameter.type : parameter.type_for_encoding; @@ -579,7 +581,14 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e argument_generator.append(R"~~~( VERIFY(Utf8View { @argument.name@ }.validate());)~~~"); - generate_utf8_string_overload = true; + generate_unicode_string_overload = true; + } else if (parameter.type == "Utf16String"sv && type == "Utf16View"sv) { + auto argument_generator = message_generator.fork(); + argument_generator.set("argument.name", parameter.name); + argument_generator.append(R"~~~( + VERIFY(@argument.name@.validate());)~~~"); + + generate_unicode_string_overload = true; } } } @@ -655,8 +664,8 @@ void generate_proxy_method(SourceGenerator& message_generator, Endpoint const& e message_generator.appendln(R"~~~( })~~~"); - if (generate_utf8_string_overload) - generate_proxy_method(message_generator, endpoint, message, message.name, message.inputs, is_synchronous, is_try, generate_utf8_string_overload); + if (generate_unicode_string_overload) + generate_proxy_method(message_generator, endpoint, message, message.name, message.inputs, is_synchronous, is_try, generate_unicode_string_overload); } void do_message_for_proxy(SourceGenerator message_generator, Endpoint const& endpoint, Message const& message) diff --git a/Tests/AK/TestUtf16String.cpp b/Tests/AK/TestUtf16String.cpp index c4e35ca719d..c5b5ba0e69b 100644 --- a/Tests/AK/TestUtf16String.cpp +++ b/Tests/AK/TestUtf16String.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -411,6 +412,79 @@ TEST_CASE(repeated) EXPECT_DEATH("Creating a string from an invalid code point", (void)Utf16String::repeated(0xffffffff, 1)); } +TEST_CASE(from_ipc_stream) +{ + { + auto data = "abc"sv; + FixedMemoryStream stream { data.bytes() }; + + auto string = TRY_OR_FAIL(Utf16String::from_ipc_stream(stream, data.length(), true)); + EXPECT(string.is_ascii()); + EXPECT(!string.has_long_ascii_storage()); + EXPECT(string.has_short_ascii_storage()); + EXPECT_EQ(string.length_in_code_units(), 3uz); + EXPECT_EQ(string, data); + } + { + auto data = "abcdefghijklmnopqrstuvwxyz"sv; + FixedMemoryStream stream { data.bytes() }; + + auto string = TRY_OR_FAIL(Utf16String::from_ipc_stream(stream, data.length(), true)); + EXPECT(string.is_ascii()); + EXPECT(string.has_long_ascii_storage()); + EXPECT(!string.has_short_ascii_storage()); + EXPECT_EQ(string.length_in_code_units(), 26uz); + EXPECT_EQ(string, data); + } + { + auto data = u"hello 😀 there!"sv; + + StringBuilder builder(StringBuilder::Mode::UTF16); + builder.append(data); + + auto buffer = MUST(builder.to_byte_buffer()); + FixedMemoryStream stream { buffer.bytes() }; + + auto string = TRY_OR_FAIL(Utf16String::from_ipc_stream(stream, data.length_in_code_units(), false)); + EXPECT(!string.is_ascii()); + EXPECT(!string.has_long_ascii_storage()); + EXPECT(!string.has_short_ascii_storage()); + EXPECT_EQ(string.length_in_code_units(), 15uz); + EXPECT_EQ(string, data); + } + { + auto data = "abc"sv; + FixedMemoryStream stream { data.bytes() }; + + auto result = Utf16String::from_ipc_stream(stream, data.length() + 1, true); + EXPECT(result.is_error()); + } + { + auto data = u"😀"sv; + + StringBuilder builder(StringBuilder::Mode::UTF16); + builder.append(data); + + auto buffer = MUST(builder.to_byte_buffer()); + FixedMemoryStream stream { buffer.bytes() }; + + auto result = Utf16String::from_ipc_stream(stream, data.length_in_code_units(), true); + EXPECT(result.is_error()); + } + { + auto data = u"hello 😀 there!"sv; + + StringBuilder builder(StringBuilder::Mode::UTF16); + builder.append(data); + + auto buffer = MUST(builder.to_byte_buffer()); + FixedMemoryStream stream { buffer.bytes() }; + + auto result = Utf16String::from_ipc_stream(stream, data.length_in_code_units(), true); + EXPECT(result.is_error()); + } +} + TEST_CASE(to_lowercase_unconditional_special_casing) { // LATIN SMALL LETTER SHARP S