AK: Avoid double allocations when converting UTF-16 LE/BE to UTF-8

We can form the UTF-8 string in-place.
This commit is contained in:
Timothy Flynn 2025-07-03 10:12:07 -04:00 committed by Tim Flynn
commit 69074a3841
Notes: github-actions[bot] 2025-07-03 15:46:35 +00:00

View file

@ -61,19 +61,30 @@ ErrorOr<String> String::from_utf8(StringView view)
return result;
}
ErrorOr<String> String::from_utf16(Utf16View const& utf16)
{
return utf16.to_utf8();
}
ErrorOr<String> String::from_utf16_le(ReadonlyBytes bytes)
{
if (!validate_utf16_le(bytes))
return Error::from_string_literal("String::from_utf16_le: Input was not valid UTF-16LE");
if (bytes.is_empty())
return String {};
char16_t const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
size_t utf16_length = bytes.size() / 2;
size_t max_utf8_length = simdutf::utf8_length_from_utf16(utf16_data, utf16_length);
Vector<u8> buffer;
buffer.resize(max_utf8_length);
auto utf8_length = simdutf::convert_utf16le_to_utf8(utf16_data, utf16_length, reinterpret_cast<char*>(buffer.data()));
return String::from_utf8_without_validation(ReadonlyBytes { buffer.data(), utf8_length });
auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
auto utf16_length = bytes.size() / 2;
auto utf8_length = simdutf::utf8_length_from_utf16le(utf16_data, utf16_length);
String result;
TRY(result.replace_with_new_string(utf8_length, [&](Bytes buffer) -> ErrorOr<void> {
[[maybe_unused]] auto result = simdutf::convert_utf16le_to_utf8(utf16_data, utf16_length, reinterpret_cast<char*>(buffer.data()));
ASSERT(result == buffer.size());
return {};
}));
return result;
}
ErrorOr<String> String::from_utf16_be(ReadonlyBytes bytes)
@ -82,18 +93,19 @@ ErrorOr<String> String::from_utf16_be(ReadonlyBytes bytes)
return Error::from_string_literal("String::from_utf16_be: Input was not valid UTF-16BE");
if (bytes.is_empty())
return String {};
char16_t const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
size_t utf16_length = bytes.size() / 2;
size_t max_utf8_length = simdutf::utf8_length_from_utf16(utf16_data, utf16_length);
Vector<u8> buffer;
buffer.resize(max_utf8_length);
auto utf8_length = simdutf::convert_utf16be_to_utf8(utf16_data, utf16_length, reinterpret_cast<char*>(buffer.data()));
return String::from_utf8_without_validation(ReadonlyBytes { buffer.data(), utf8_length });
}
ErrorOr<String> String::from_utf16(Utf16View const& utf16)
{
return utf16.to_utf8();
auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
auto utf16_length = bytes.size() / 2;
auto utf8_length = simdutf::utf8_length_from_utf16be(utf16_data, utf16_length);
String result;
TRY(result.replace_with_new_string(utf8_length, [&](Bytes buffer) -> ErrorOr<void> {
[[maybe_unused]] auto result = simdutf::convert_utf16be_to_utf8(utf16_data, utf16_length, reinterpret_cast<char*>(buffer.data()));
ASSERT(result == buffer.size());
return {};
}));
return result;
}
ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)