AK: Ensure StringBuilder encodes U+10000 as 2 UTF-16 code units

This commit is contained in:
Timothy Flynn 2025-08-06 06:57:58 -04:00 committed by Jelle Raaijmakers
commit 298ec6a12a
Notes: github-actions[bot] 2025-08-07 00:07:09 +00:00
2 changed files with 22 additions and 4 deletions

View file

@ -370,7 +370,7 @@ void StringBuilder::append_code_point(u32 code_point)
if (m_mode == Mode::UTF16) { if (m_mode == Mode::UTF16) {
(void)(will_append(2)); (void)(will_append(2));
if (code_point <= UnicodeUtils::FIRST_SUPPLEMENTARY_PLANE_CODE_POINT) { if (code_point < UnicodeUtils::FIRST_SUPPLEMENTARY_PLANE_CODE_POINT) {
auto code_unit = static_cast<char16_t>(code_point); auto code_unit = static_cast<char16_t>(code_point);
m_buffer.append(&code_unit, sizeof(code_unit)); m_buffer.append(&code_unit, sizeof(code_unit));
return; return;

View file

@ -343,6 +343,7 @@ TEST_CASE(formatted)
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv, u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
u"\xd83d\xde00"sv, u"\xd83d\xde00"sv,
u"abcdefghijklmnopqrstuvwxyz"sv, u"abcdefghijklmnopqrstuvwxyz"sv,
u"\xd800\xdc00"sv,
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv, u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
u"🍕"sv, u"🍕"sv,
u"abcdefghijklmnopqrstuvwxyz"sv, u"abcdefghijklmnopqrstuvwxyz"sv,
@ -354,9 +355,9 @@ TEST_CASE(formatted)
EXPECT(!string.is_ascii()); EXPECT(!string.is_ascii());
EXPECT(!string.has_long_ascii_storage()); EXPECT(!string.has_long_ascii_storage());
EXPECT(!string.has_short_ascii_storage()); EXPECT(!string.has_short_ascii_storage());
EXPECT_EQ(string.length_in_code_units(), 174uz); EXPECT_EQ(string.length_in_code_units(), 178uz);
EXPECT_EQ(string.length_in_code_points(), 172uz); EXPECT_EQ(string.length_in_code_points(), 175uz);
EXPECT_EQ(string, u"abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--😀--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--🍕--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv); EXPECT_EQ(string, u"abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--😀--abcdefghijklmnopqrstuvwxyz--𐀀--ABCDEFGHIJKLMNOPQRSTUVWXYZ--🍕--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv);
} }
} }
@ -415,6 +416,23 @@ TEST_CASE(repeated)
EXPECT_DEATH("Creating a string from an invalid code point", (void)Utf16String::repeated(0xffffffff, 1)); EXPECT_DEATH("Creating a string from an invalid code point", (void)Utf16String::repeated(0xffffffff, 1));
} }
TEST_CASE(from_string_builder)
{
StringBuilder builder(StringBuilder::Mode::UTF16);
builder.append_code_point('a');
builder.append_code_point('b');
builder.append_code_point(0x1f600);
builder.append_code_point(0x10000);
builder.append_code_point(0x1f355);
builder.append_code_point('c');
builder.append_code_point('d');
auto string = builder.to_utf16_string();
EXPECT_EQ(string.length_in_code_units(), 10uz);
EXPECT_EQ(string.length_in_code_points(), 7uz);
EXPECT_EQ(string, "ab😀𐀀🍕cd"sv);
}
TEST_CASE(from_ipc_stream) TEST_CASE(from_ipc_stream)
{ {
{ {