AK: Support non-native endianness in Utf16View

Utf16View currently assumes host endianness. Add support for specifying
either big or little endianness (which we mostly just pipe through to
simdutf). This will allow using simdutf facilities with LibTextCodec.
This commit is contained in:
Timothy Flynn 2024-07-18 11:57:01 -04:00 committed by Andreas Kling
commit 71c29504af
Notes: sideshowbarker 2024-07-19 06:22:46 +09:00
5 changed files with 228 additions and 79 deletions

View file

@ -8,6 +8,7 @@
#include <AK/Array.h>
#include <AK/Checked.h>
#include <AK/Endian.h>
#include <AK/FlyString.h>
#include <AK/Format.h>
#include <AK/MemMem.h>
@ -51,15 +52,30 @@ ErrorOr<String> String::from_utf16(Utf16View const& utf16)
String result;
auto utf8_length = simdutf::utf8_length_from_utf16(
reinterpret_cast<char16_t const*>(utf16.data()),
utf16.length_in_code_units());
auto utf8_length = [&]() {
switch (utf16.endianness()) {
case Endianness::Host:
return simdutf::utf8_length_from_utf16(utf16.char_data(), utf16.length_in_code_units());
case Endianness::Big:
return simdutf::utf8_length_from_utf16be(utf16.char_data(), utf16.length_in_code_units());
case Endianness::Little:
return simdutf::utf8_length_from_utf16le(utf16.char_data(), utf16.length_in_code_units());
}
VERIFY_NOT_REACHED();
}();
TRY(result.replace_with_new_string(utf8_length, [&](Bytes buffer) -> ErrorOr<void> {
[[maybe_unused]] auto result = simdutf::convert_utf16_to_utf8(
reinterpret_cast<char16_t const*>(utf16.data()),
utf16.length_in_code_units(),
reinterpret_cast<char*>(buffer.data()));
[[maybe_unused]] auto result = [&]() {
switch (utf16.endianness()) {
case Endianness::Host:
return simdutf::convert_utf16_to_utf8(utf16.char_data(), utf16.length_in_code_units(), reinterpret_cast<char*>(buffer.data()));
case Endianness::Big:
return simdutf::convert_utf16be_to_utf8(utf16.char_data(), utf16.length_in_code_units(), reinterpret_cast<char*>(buffer.data()));
case Endianness::Little:
return simdutf::convert_utf16le_to_utf8(utf16.char_data(), utf16.length_in_code_units(), reinterpret_cast<char*>(buffer.data()));
}
VERIFY_NOT_REACHED();
}();
ASSERT(result == buffer.size());
return {};