LibTextCodec: Use AK facilities to validate and convert UTF-16 to UTF-8

This allows LibTextCodec to make use of simdutf, and also reduces the
number of places with manual UTF-16 implementations.
This commit is contained in:
Timothy Flynn 2024-07-18 12:07:37 -04:00 committed by Andreas Kling
commit 368dad54ef
Notes: sideshowbarker 2024-07-19 06:22:43 +09:00
2 changed files with 30 additions and 112 deletions

View file

@ -15,6 +15,8 @@ TEST_CASE(test_utf8_decode)
// Bytes for U+1F600 GRINNING FACE
auto test_string = "\xf0\x9f\x98\x80"sv;
EXPECT(decoder.validate(test_string));
Vector<u32> processed_code_points;
MUST(decoder.process(test_string, [&](u32 code_point) {
return processed_code_points.try_append(code_point);
@ -31,6 +33,8 @@ TEST_CASE(test_utf16be_decode)
// This is the output of `python3 -c "print('säk😀'.encode('utf-16be'))"`.
auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv;
EXPECT(decoder.validate(test_string));
Vector<u32> processed_code_points;
MUST(decoder.process(test_string, [&](u32 code_point) {
return processed_code_points.try_append(code_point);
@ -40,6 +44,9 @@ TEST_CASE(test_utf16be_decode)
EXPECT(processed_code_points[1] == 0xE4);
EXPECT(processed_code_points[2] == 0x6B);
EXPECT(processed_code_points[3] == 0x1F600);
auto utf8 = MUST(decoder.to_utf8(test_string));
EXPECT_EQ(utf8, "säk😀"sv);
}
TEST_CASE(test_utf16le_decode)
@ -48,6 +55,8 @@ TEST_CASE(test_utf16le_decode)
// This is the output of `python3 -c "print('säk😀'.encode('utf-16le'))"`.
auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv;
EXPECT(decoder.validate(test_string));
Vector<u32> processed_code_points;
MUST(decoder.process(test_string, [&](u32 code_point) {
return processed_code_points.try_append(code_point);
@ -57,4 +66,7 @@ TEST_CASE(test_utf16le_decode)
EXPECT(processed_code_points[1] == 0xE4);
EXPECT(processed_code_points[2] == 0x6B);
EXPECT(processed_code_points[3] == 0x1F600);
auto utf8 = MUST(decoder.to_utf8(test_string));
EXPECT_EQ(utf8, "säk😀"sv);
}