mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-23 09:22:30 +00:00
AK: Add a UTF-16 string with optimized short- and ASCII-string storage
This is a strictly UTF-16 string with some optimizations for ASCII.
* If created from a short UTF-8 or UTF-16 string that is also ASCII,
then the string is stored in an inlined byte buffer.
* If created with a long UTF-8 or UTF-16 string that is also ASCII,
then the string is stored in an outlined char buffer.
* If created with a short or long UTF-8 or UTF-16 string that is not
ASCII, then the string is stored in an outlined char16 buffer.
We do not store short non-ASCII text in the inlined buffer to avoid
confusion with operations such as `length_in_code_units` and
`code_unit_at`. For example, "😀" would be stored as 4 UTF-8 bytes
in short string form. But we still want `length_in_code_units` to
be 2, and `code_unit_at(0)` to be 0xD83D.
This commit is contained in:
parent
8fbb80fffc
commit
fe676585f5
Notes:
github-actions[bot]
2025-07-18 16:47:31 +00:00
Author: https://github.com/trflynn89
Commit: fe676585f5
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5388
Reviewed-by: https://github.com/shannonbooth ✅
17 changed files with 1527 additions and 44 deletions
|
@ -44,7 +44,13 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
|
|||
{
|
||||
Utf16Data string;
|
||||
string.ensure_capacity(view.length_in_code_units());
|
||||
string.unchecked_append(view.span().data(), view.length_in_code_units());
|
||||
|
||||
if (view.has_ascii_storage()) {
|
||||
for (size_t i = 0; i < view.length_in_code_units(); ++i)
|
||||
string.unchecked_append(static_cast<char16_t>(view.code_unit_at(i)));
|
||||
} else {
|
||||
string.unchecked_append(view.utf16_span().data(), view.length_in_code_units());
|
||||
}
|
||||
|
||||
auto impl = create(move(string));
|
||||
if (auto length_in_code_points = view.length_in_code_points_if_known(); length_in_code_points.has_value())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue