AK: Replace converting to and from UTF-16 with simdutf

The one behavior difference is that we will now actually fail on invalid
code units with Utf16View::to_utf8(AllowInvalidCodeUnits::No). It was
arguably a bug that this wasn't already the case.
This commit is contained in:
Timothy Flynn 2024-07-16 16:05:46 -04:00 committed by Andreas Kling
commit 0c14a9417a
Notes: sideshowbarker 2024-07-18 23:45:58 +09:00
4 changed files with 81 additions and 23 deletions

View file

@ -4,6 +4,8 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#define AK_DONT_REPLACE_STD
#include <AK/Array.h>
#include <AK/Checked.h>
#include <AK/FlyString.h>
@ -11,9 +13,12 @@
#include <AK/MemMem.h>
#include <AK/Stream.h>
#include <AK/String.h>
#include <AK/Utf16View.h>
#include <AK/Vector.h>
#include <stdlib.h>
#include <simdutf.h>
namespace AK {
String String::from_utf8_without_validation(ReadonlyBytes bytes)
@ -39,6 +44,30 @@ ErrorOr<String> String::from_utf8(StringView view)
return result;
}
ErrorOr<String> String::from_utf16(Utf16View const& utf16)
{
if (!utf16.validate())
return Error::from_string_literal("String::from_utf16: Input was not valid UTF-16");
String result;
auto utf8_length = simdutf::utf8_length_from_utf16(
reinterpret_cast<char16_t const*>(utf16.data()),
utf16.length_in_code_units());
TRY(result.replace_with_new_string(utf8_length, [&](Bytes buffer) -> ErrorOr<void> {
[[maybe_unused]] auto result = simdutf::convert_utf16_to_utf8(
reinterpret_cast<char16_t const*>(utf16.data()),
utf16.length_in_code_units(),
reinterpret_cast<char*>(buffer.data()));
ASSERT(result == buffer.size());
return {};
}));
return result;
}
ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)
{
String result;