diff --git a/Meta/Lagom/Tools/CodeGenerators/LibTextCodec/GenerateEncodingIndexes.cpp b/Meta/Lagom/Tools/CodeGenerators/LibTextCodec/GenerateEncodingIndexes.cpp index 3f268c15c39..b568f4a60df 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibTextCodec/GenerateEncodingIndexes.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibTextCodec/GenerateEncodingIndexes.cpp @@ -270,7 +270,7 @@ ErrorOr serenity_main(Main::Arguments arguments) { "big5"sv, prepare_table(data.get("big5"sv)->as_array(), GenerateAccessor::Yes) }, { "jis0208"sv, prepare_table(data.get("jis0208"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) }, { "jis0212"sv, prepare_table(data.get("jis0212"sv)->as_array(), GenerateAccessor::Yes) }, - { "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes) }, + { "euc_kr"sv, prepare_table(data.get("euc-kr"sv)->as_array(), GenerateAccessor::Yes, GenerateInverseAccessor::Yes) }, { "ibm866"sv, prepare_table(data.get("ibm866"sv)->as_array()) }, { "iso_8859_2"sv, prepare_table(data.get("iso-8859-2"sv)->as_array()) }, { "iso_8859_3"sv, prepare_table(data.get("iso-8859-3"sv)->as_array()) }, diff --git a/Tests/LibTextCodec/TestTextEncoders.cpp b/Tests/LibTextCodec/TestTextEncoders.cpp index 17c298123f6..64c0c9a96a4 100644 --- a/Tests/LibTextCodec/TestTextEncoders.cpp +++ b/Tests/LibTextCodec/TestTextEncoders.cpp @@ -43,3 +43,21 @@ TEST_CASE(test_euc_jp_encoder) EXPECT(processed_bytes[3] == 0xA5); EXPECT(processed_bytes[4] == 0xC4); } + +TEST_CASE(test_euc_kr_encoder) +{ + TextCodec::EUCKREncoder encoder; + // U+B29F Hangul Syllable Neulh + // U+7C97 CJK Unified Ideograph-7C97 + auto test_string = "\U0000B29F\U00007C97"sv; + + Vector processed_bytes; + MUST(encoder.process(Utf8View(test_string), [&](u8 byte) { + return processed_bytes.try_append(byte); + })); + EXPECT(processed_bytes.size() == 4); + EXPECT(processed_bytes[0] == 0x88); + EXPECT(processed_bytes[1] == 0x6B); + EXPECT(processed_bytes[2] == 0xF0); + EXPECT(processed_bytes[3] == 0xD8); +} diff --git a/Userland/Libraries/LibTextCodec/Encoder.cpp b/Userland/Libraries/LibTextCodec/Encoder.cpp index b41cce99de3..8206bedc617 100644 --- a/Userland/Libraries/LibTextCodec/Encoder.cpp +++ b/Userland/Libraries/LibTextCodec/Encoder.cpp @@ -15,6 +15,7 @@ namespace TextCodec { namespace { UTF8Encoder s_utf8_encoder; EUCJPEncoder s_euc_jp_encoder; +EUCKREncoder s_euc_kr_encoder; } Optional encoder_for_exact_name(StringView encoding) @@ -23,6 +24,8 @@ Optional encoder_for_exact_name(StringView encoding) return s_utf8_encoder; if (encoding.equals_ignoring_ascii_case("euc-jp"sv)) return s_euc_jp_encoder; + if (encoding.equals_ignoring_ascii_case("euc-kr"sv)) + return s_euc_kr_encoder; dbgln("TextCodec: No encoder implemented for encoding '{}'", encoding); return {}; } @@ -100,4 +103,39 @@ ErrorOr EUCJPEncoder::process(Utf8View input, Function(u8)> return {}; } +// https://encoding.spec.whatwg.org/#euc-kr-encoder +ErrorOr EUCKREncoder::process(Utf8View input, Function(u8)> on_byte) +{ + for (u32 item : input) { + // 1. If code point is end-of-queue, return finished. + + // 2. If code point is an ASCII code point, return a byte whose value is code point. + if (is_ascii(item)) { + TRY(on_byte(static_cast(item))); + continue; + } + + // 3. Let pointer be the index pointer for code point in index EUC-KR. + auto pointer = code_point_euc_kr_index(item); + + // 4. If pointer is null, return error with code point. + if (!pointer.has_value()) { + // TODO: Report error. + continue; + } + + // 5. Let lead be pointer / 190 + 0x81. + auto lead = *pointer / 190 + 0x81; + + // 6. Let trail be pointer % 190 + 0x41. + auto trail = *pointer % 190 + 0x41; + + // 7. Return two bytes whose values are lead and trail. + TRY(on_byte(static_cast(lead))); + TRY(on_byte(static_cast(trail))); + } + + return {}; +} + } diff --git a/Userland/Libraries/LibTextCodec/Encoder.h b/Userland/Libraries/LibTextCodec/Encoder.h index b3b0c0423dc..508c654a910 100644 --- a/Userland/Libraries/LibTextCodec/Encoder.h +++ b/Userland/Libraries/LibTextCodec/Encoder.h @@ -29,6 +29,11 @@ public: virtual ErrorOr process(Utf8View, Function(u8)> on_byte) override; }; +class EUCKREncoder final : public Encoder { +public: + virtual ErrorOr process(Utf8View, Function(u8)> on_byte) override; +}; + Optional encoder_for_exact_name(StringView encoding); Optional encoder_for(StringView label);