mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 19:59:17 +00:00
LibTextCodec: Use AK facilities to validate and convert UTF-16 to UTF-8
This allows LibTextCodec to make use of simdutf, and also reduces the number of places with manual UTF-16 implementations.
This commit is contained in:
parent
71c29504af
commit
368dad54ef
Notes:
sideshowbarker
2024-07-19 06:22:43 +09:00
Author: https://github.com/trflynn89
Commit: 368dad54ef
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/698
2 changed files with 30 additions and 112 deletions
|
@ -15,6 +15,8 @@ TEST_CASE(test_utf8_decode)
|
|||
// Bytes for U+1F600 GRINNING FACE
|
||||
auto test_string = "\xf0\x9f\x98\x80"sv;
|
||||
|
||||
EXPECT(decoder.validate(test_string));
|
||||
|
||||
Vector<u32> processed_code_points;
|
||||
MUST(decoder.process(test_string, [&](u32 code_point) {
|
||||
return processed_code_points.try_append(code_point);
|
||||
|
@ -31,6 +33,8 @@ TEST_CASE(test_utf16be_decode)
|
|||
// This is the output of `python3 -c "print('säk😀'.encode('utf-16be'))"`.
|
||||
auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv;
|
||||
|
||||
EXPECT(decoder.validate(test_string));
|
||||
|
||||
Vector<u32> processed_code_points;
|
||||
MUST(decoder.process(test_string, [&](u32 code_point) {
|
||||
return processed_code_points.try_append(code_point);
|
||||
|
@ -40,6 +44,9 @@ TEST_CASE(test_utf16be_decode)
|
|||
EXPECT(processed_code_points[1] == 0xE4);
|
||||
EXPECT(processed_code_points[2] == 0x6B);
|
||||
EXPECT(processed_code_points[3] == 0x1F600);
|
||||
|
||||
auto utf8 = MUST(decoder.to_utf8(test_string));
|
||||
EXPECT_EQ(utf8, "säk😀"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(test_utf16le_decode)
|
||||
|
@ -48,6 +55,8 @@ TEST_CASE(test_utf16le_decode)
|
|||
// This is the output of `python3 -c "print('säk😀'.encode('utf-16le'))"`.
|
||||
auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv;
|
||||
|
||||
EXPECT(decoder.validate(test_string));
|
||||
|
||||
Vector<u32> processed_code_points;
|
||||
MUST(decoder.process(test_string, [&](u32 code_point) {
|
||||
return processed_code_points.try_append(code_point);
|
||||
|
@ -57,4 +66,7 @@ TEST_CASE(test_utf16le_decode)
|
|||
EXPECT(processed_code_points[1] == 0xE4);
|
||||
EXPECT(processed_code_points[2] == 0x6B);
|
||||
EXPECT(processed_code_points[3] == 0x1F600);
|
||||
|
||||
auto utf8 = MUST(decoder.to_utf8(test_string));
|
||||
EXPECT_EQ(utf8, "säk😀"sv);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue