AK: Support UTF-16 string formatting

The underlying storage used during string formatting is StringBuilder.
To support UTF-16 strings, this patch allows callers to specify a mode
during StringBuilder construction. The default mode is UTF-8, for which
StringBuilder remains unchanged.

In UTF-16 mode, we treat the StringBuilder's internal ByteBuffer as a
series of u16 code units. Appending a single character will append 2
bytes for that character (cast to a char16_t). Appending a StringView
will transcode the string to UTF-16.

Utf16String also gains the same memory optimization that we added for
String, where we hand-off the underlying buffer to Utf16String to avoid
having to re-allocate.

In the future, we may want to further optimize for ASCII strings. For
example, we could defer committing to the u16-esque storage until we
see a non-ASCII code point.
This commit is contained in:
Timothy Flynn 2025-06-17 16:08:30 -04:00 committed by Tim Flynn
commit 2803d66d87
Notes: github-actions[bot] 2025-07-18 16:47:24 +00:00
11 changed files with 362 additions and 55 deletions

View file

@ -81,11 +81,50 @@ public:
requires(IsOneOf<RemoveCVReference<T>, Utf16String>)
static Utf16String from_utf16_without_validation(T&&) = delete;
template<typename... Parameters>
ALWAYS_INLINE static Utf16String formatted(CheckedFormatString<Parameters...>&& format, Parameters const&... parameters)
{
StringBuilder builder(StringBuilder::Mode::UTF16);
VariadicFormatParams<AllowDebugOnlyFormatters::No, Parameters...> variadic_format_parameters { parameters... };
MUST(vformat(builder, format.view(), variadic_format_parameters));
return builder.to_utf16_string();
}
template<Arithmetic T>
ALWAYS_INLINE static Utf16String number(T value)
{
return formatted("{}", value);
}
template<class SeparatorType, class CollectionType>
ALWAYS_INLINE static Utf16String join(SeparatorType const& separator, CollectionType const& collection, StringView format = "{}"sv)
{
StringBuilder builder(StringBuilder::Mode::UTF16);
builder.join(separator, collection, format);
return builder.to_utf16_string();
}
ALWAYS_INLINE static Utf16String from_string_builder(Badge<StringBuilder>, StringBuilder& builder)
{
VERIFY(builder.utf16_string_view().validate());
return from_string_builder_without_validation(builder);
}
ALWAYS_INLINE static Utf16String from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder& builder)
{
return from_string_builder_without_validation(builder);
}
private:
ALWAYS_INLINE explicit Utf16String(NonnullRefPtr<Detail::Utf16StringData const> value)
: Utf16StringBase(move(value))
{
}
static Utf16String from_string_builder_without_validation(StringBuilder&);
};
template<>