mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 19:59:17 +00:00
AK: Support UTF-16 string formatting
The underlying storage used during string formatting is StringBuilder. To support UTF-16 strings, this patch allows callers to specify a mode during StringBuilder construction. The default mode is UTF-8, for which StringBuilder remains unchanged. In UTF-16 mode, we treat the StringBuilder's internal ByteBuffer as a series of u16 code units. Appending a single character will append 2 bytes for that character (cast to a char16_t). Appending a StringView will transcode the string to UTF-16. Utf16String also gains the same memory optimization that we added for String, where we hand-off the underlying buffer to Utf16String to avoid having to re-allocate. In the future, we may want to further optimize for ASCII strings. For example, we could defer committing to the u16-esque storage until we see a non-ASCII code point.
This commit is contained in:
parent
fe676585f5
commit
2803d66d87
Notes:
github-actions[bot]
2025-07-18 16:47:24 +00:00
Author: https://github.com/trflynn89
Commit: 2803d66d87
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5388
Reviewed-by: https://github.com/shannonbooth ✅
11 changed files with 362 additions and 55 deletions
|
@ -235,6 +235,80 @@ TEST_CASE(from_utf32)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE(formatted)
|
||||
{
|
||||
{
|
||||
auto string = Utf16String::formatted("{}", 42);
|
||||
EXPECT(!string.is_empty());
|
||||
EXPECT(string.is_ascii());
|
||||
EXPECT(!string.has_long_ascii_storage());
|
||||
EXPECT(string.has_short_ascii_storage());
|
||||
EXPECT_EQ(string.length_in_code_units(), 2uz);
|
||||
EXPECT_EQ(string.length_in_code_points(), 2uz);
|
||||
EXPECT_EQ(string, u"42"sv);
|
||||
}
|
||||
{
|
||||
auto string = Utf16String::number(42);
|
||||
EXPECT(!string.is_empty());
|
||||
EXPECT(string.is_ascii());
|
||||
EXPECT(!string.has_long_ascii_storage());
|
||||
EXPECT(string.has_short_ascii_storage());
|
||||
EXPECT_EQ(string.length_in_code_units(), 2uz);
|
||||
EXPECT_EQ(string.length_in_code_points(), 2uz);
|
||||
EXPECT_EQ(string, u"42"sv);
|
||||
}
|
||||
{
|
||||
auto string = Utf16String::formatted("whf {} {} {}!", "😀"sv, Utf16View { u"🍕"sv }, 3.14);
|
||||
EXPECT(!string.is_empty());
|
||||
EXPECT(!string.is_ascii());
|
||||
EXPECT(!string.has_long_ascii_storage());
|
||||
EXPECT(!string.has_short_ascii_storage());
|
||||
EXPECT_EQ(string.length_in_code_units(), 15uz);
|
||||
EXPECT_EQ(string.length_in_code_points(), 13uz);
|
||||
EXPECT_EQ(string, u"whf 😀 🍕 3.14!"sv);
|
||||
}
|
||||
{
|
||||
Array segments {
|
||||
u"abcdefghijklmnopqrstuvwxyz"sv,
|
||||
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
||||
u"abcdefghijklmnopqrstuvwxyz"sv,
|
||||
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
||||
u"abcdefghijklmnopqrstuvwxyz"sv,
|
||||
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
||||
};
|
||||
|
||||
auto string = Utf16String::join(u"--"sv, segments);
|
||||
EXPECT(!string.is_empty());
|
||||
EXPECT(string.is_ascii());
|
||||
EXPECT(string.has_long_ascii_storage());
|
||||
EXPECT(!string.has_short_ascii_storage());
|
||||
EXPECT_EQ(string.length_in_code_units(), 166uz);
|
||||
EXPECT_EQ(string.length_in_code_points(), 166uz);
|
||||
EXPECT_EQ(string, u"abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv);
|
||||
}
|
||||
{
|
||||
Array segments {
|
||||
u"abcdefghijklmnopqrstuvwxyz"sv,
|
||||
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
||||
u"\xd83d\xde00"sv,
|
||||
u"abcdefghijklmnopqrstuvwxyz"sv,
|
||||
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
||||
u"🍕"sv,
|
||||
u"abcdefghijklmnopqrstuvwxyz"sv,
|
||||
u"ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv,
|
||||
};
|
||||
|
||||
auto string = Utf16String::join(u"--"sv, segments);
|
||||
EXPECT(!string.is_empty());
|
||||
EXPECT(!string.is_ascii());
|
||||
EXPECT(!string.has_long_ascii_storage());
|
||||
EXPECT(!string.has_short_ascii_storage());
|
||||
EXPECT_EQ(string.length_in_code_units(), 174uz);
|
||||
EXPECT_EQ(string.length_in_code_points(), 172uz);
|
||||
EXPECT_EQ(string, u"abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--😀--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ--🍕--abcdefghijklmnopqrstuvwxyz--ABCDEFGHIJKLMNOPQRSTUVWXYZ"sv);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(copy_operations)
|
||||
{
|
||||
auto test = [](Utf16String const& string1) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue