LibTextCodec+LibURL: Implement utf-8 and euc-jp encoders

Implements the corresponding encoders, selects the appropriate one when
encoding URL search params. If an encoder for the given encoding could
not be found, fallback to utf-8.
This commit is contained in:
BenJilks 2024-08-05 16:03:53 +01:00 committed by Tim Ledbetter
parent 82a63e350c
commit 72d0e3284b
Notes: github-actions[bot] 2024-08-08 16:51:38 +00:00
11 changed files with 260 additions and 22 deletions

View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2024, Ben Jilks <benjyjilks@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <LibTextCodec/Encoder.h>
TEST_CASE(test_utf8_encode)
{
TextCodec::UTF8Encoder encoder;
// Unicode character U+1F600 GRINNING FACE
auto test_string = "\U0001F600"sv;
Vector<u8> processed_bytes;
MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
return processed_bytes.try_append(byte);
}));
EXPECT(processed_bytes.size() == 4);
EXPECT(processed_bytes[0] == 0xF0);
EXPECT(processed_bytes[1] == 0x9F);
EXPECT(processed_bytes[2] == 0x98);
EXPECT(processed_bytes[3] == 0x80);
}
TEST_CASE(test_euc_jp_encoder)
{
TextCodec::EUCJPEncoder encoder;
// U+A5 Yen Sign
// U+3088 Hiragana Letter Yo
// U+30C4 Katakana Letter Tu
auto test_string = "\U000000A5\U00003088\U000030C4"sv;
Vector<u8> processed_bytes;
MUST(encoder.process(Utf8View(test_string), [&](u8 byte) {
return processed_bytes.try_append(byte);
}));
EXPECT(processed_bytes.size() == 5);
EXPECT(processed_bytes[0] == 0x5C);
EXPECT(processed_bytes[1] == 0xA4);
EXPECT(processed_bytes[2] == 0xE8);
EXPECT(processed_bytes[3] == 0xA5);
EXPECT(processed_bytes[4] == 0xC4);
}