mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-23 17:33:12 +00:00
AK: Support UTF-16 string formatting
The underlying storage used during string formatting is StringBuilder. To support UTF-16 strings, this patch allows callers to specify a mode during StringBuilder construction. The default mode is UTF-8, for which StringBuilder remains unchanged. In UTF-16 mode, we treat the StringBuilder's internal ByteBuffer as a series of u16 code units. Appending a single character will append 2 bytes for that character (cast to a char16_t). Appending a StringView will transcode the string to UTF-16. Utf16String also gains the same memory optimization that we added for String, where we hand-off the underlying buffer to Utf16String to avoid having to re-allocate. In the future, we may want to further optimize for ASCII strings. For example, we could defer committing to the u16-esque storage until we see a non-ASCII code point.
This commit is contained in:
parent
fe676585f5
commit
2803d66d87
Notes:
github-actions[bot]
2025-07-18 16:47:24 +00:00
Author: https://github.com/trflynn89
Commit: 2803d66d87
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5388
Reviewed-by: https://github.com/shannonbooth ✅
11 changed files with 362 additions and 55 deletions
|
@ -87,12 +87,11 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
|
|||
auto inserted_data_result = MUST(AK::utf8_to_utf16(data));
|
||||
auto after_data = utf16_view.substring_view(offset + count);
|
||||
|
||||
Utf16Data full_data;
|
||||
full_data.ensure_capacity(before_data.length_in_code_units() + inserted_data_result.data.size() + after_data.length_in_code_units());
|
||||
full_data.append(before_data.utf16_span().data(), before_data.length_in_code_units());
|
||||
full_data.extend(inserted_data_result.data);
|
||||
full_data.append(after_data.utf16_span().data(), after_data.length_in_code_units());
|
||||
Utf16View full_view { full_data };
|
||||
StringBuilder full_data(StringBuilder::Mode::UTF16, before_data.length_in_code_units() + inserted_data_result.data.size() + after_data.length_in_code_units());
|
||||
full_data.append(before_data);
|
||||
full_data.append(inserted_data_result.data);
|
||||
full_data.append(after_data);
|
||||
auto full_view = full_data.utf16_string_view();
|
||||
|
||||
bool characters_are_the_same = utf16_view == full_view;
|
||||
auto old_data = m_data;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue