From 298ec6a12ad5c743e74e6744a00681720cc1fa5b Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 6 Aug 2025 06:57:58 -0400 Subject: [PATCH] AK: Ensure StringBuilder encodes U+10000 as 2 UTF-16 code units --- AK/StringBuilder.cpp | 2 +- Tests/AK/TestUtf16String.cpp | 24 +++++++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/AK/StringBuilder.cpp b/AK/StringBuilder.cpp index 622060acfa2..8b0eed5b44d 100644 --- a/AK/StringBuilder.cpp +++ b/AK/StringBuilder.cpp @@ -370,7 +370,7 @@ void StringBuilder::append_code_point(u32 code_point) if (m_mode == Mode::UTF16) { (void)(will_append(2)); - if (code_point <= UnicodeUtils::FIRST_SUPPLEMENTARY_PLANE_CODE_POINT) { + if (code_point < UnicodeUtils::FIRST_SUPPLEMENTARY_PLANE_CODE_POINT) { auto code_unit = static_cast(code_point); m_buffer.append(&code_unit, sizeof(code_unit)); return; diff --git a/Tests/AK/TestUtf16String.cpp b/Tests/AK/TestUtf16String.cpp index 94f1081eb91..3d21098c859 100644 --- a/Tests/AK/TestUtf16String.cpp +++ b/Tests/AK/TestUtf16String.cpp @@ -343,6 +343,7 @@ TEST_CASE(formatted) u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv, u"\xd83d\xde00"sv, u"abcdefghijklmnopqrstuvwxyz"sv, + u"\xd800\xdc00"sv, u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv, u"🍕"sv, u"abcdefghijklmnopqrstuvwxyz"sv, @@ -354,9 +355,9 @@ TEST_CASE(formatted) EXPECT(!string.is_ascii()); EXPECT(!string.has_long_ascii_storage()); EXPECT(!string.has_short_ascii_storage()); - EXPECT_EQ(string.length_in_code_units(), 174uz); - EXPECT_EQ(string.length_in_code_points(), 172uz); - EXPECT_EQ(string, u"abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--😀--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--🍕--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv); + EXPECT_EQ(string.length_in_code_units(), 178uz); + EXPECT_EQ(string.length_in_code_points(), 175uz); + EXPECT_EQ(string, u"abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--😀--abcdefghijklmnopqrstuvwxyz--𐀀--ABCDEFGHIJKLMNOPQRSTUVWXYZ--🍕--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv); } } @@ -415,6 +416,23 @@ TEST_CASE(repeated) EXPECT_DEATH("Creating a string from an invalid code point", (void)Utf16String::repeated(0xffffffff, 1)); } +TEST_CASE(from_string_builder) +{ + StringBuilder builder(StringBuilder::Mode::UTF16); + builder.append_code_point('a'); + builder.append_code_point('b'); + builder.append_code_point(0x1f600); + builder.append_code_point(0x10000); + builder.append_code_point(0x1f355); + builder.append_code_point('c'); + builder.append_code_point('d'); + + auto string = builder.to_utf16_string(); + EXPECT_EQ(string.length_in_code_units(), 10uz); + EXPECT_EQ(string.length_in_code_points(), 7uz); + EXPECT_EQ(string, "ab😀𐀀🍕cd"sv); +} + TEST_CASE(from_ipc_stream) { {