From 5eda6293263ccfeb1e4b545f8cdfa8712d94db3d Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Fri, 6 Sep 2024 14:42:06 -0400 Subject: [PATCH] LibUnicode: Remove unused emoji support methods --- Tests/LibUnicode/CMakeLists.txt | 1 - Tests/LibUnicode/TestEmoji.cpp | 72 ------------ Userland/Libraries/LibGfx/TextLayout.cpp | 1 - Userland/Libraries/LibUnicode/CMakeLists.txt | 1 - Userland/Libraries/LibUnicode/Emoji.cpp | 114 ------------------- Userland/Libraries/LibUnicode/Emoji.h | 26 ----- 6 files changed, 215 deletions(-) delete mode 100644 Tests/LibUnicode/TestEmoji.cpp delete mode 100644 Userland/Libraries/LibUnicode/Emoji.cpp delete mode 100644 Userland/Libraries/LibUnicode/Emoji.h diff --git a/Tests/LibUnicode/CMakeLists.txt b/Tests/LibUnicode/CMakeLists.txt index 52e9638f154..ec38a11dc5e 100644 --- a/Tests/LibUnicode/CMakeLists.txt +++ b/Tests/LibUnicode/CMakeLists.txt @@ -1,6 +1,5 @@ set(TEST_SOURCES TestDisplayNames.cpp - TestEmoji.cpp TestIDNA.cpp TestLocale.cpp TestSegmenter.cpp diff --git a/Tests/LibUnicode/TestEmoji.cpp b/Tests/LibUnicode/TestEmoji.cpp deleted file mode 100644 index 8c1048e8387..00000000000 --- a/Tests/LibUnicode/TestEmoji.cpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2023, Tim Flynn - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include -#include -#include -#include - -// These emojis are the first subgroup in each Unicode-defined group of emojis, plus some interesting -// hand-picked test cases (such as keycap emoji, which begin with ASCII symbols, and country flags). -static constexpr auto s_smileys_emotion = Array { "๐Ÿ˜€"sv, "๐Ÿ˜ƒ"sv, "๐Ÿ˜„"sv, "๐Ÿ˜"sv, "๐Ÿ˜†"sv, "๐Ÿ˜…"sv, "๐Ÿคฃ"sv, "๐Ÿ˜‚"sv, "๐Ÿ™‚"sv, "๐Ÿ™ƒ"sv, "๐Ÿซ "sv, "๐Ÿ˜‰"sv, "๐Ÿ˜Š"sv, "๐Ÿ˜‡"sv }; -static constexpr auto s_people_body = Array { "๐Ÿ‘‹"sv, "๐Ÿคš"sv, "๐Ÿ–๏ธ"sv, "๐Ÿ–"sv, "โœ‹"sv, "๐Ÿซฑ"sv, "๐Ÿซฒ"sv, "๐Ÿซณ"sv, "๐Ÿซด"sv, "๐Ÿซท"sv, "๐Ÿซธ"sv }; -static constexpr auto s_animals_nature = Array { "๐Ÿถ"sv, "๐Ÿ•"sv, "๐Ÿ•โ€๐Ÿฆบ"sv, "๐Ÿฉ"sv, "๐ŸฆŠ"sv, "๐Ÿฆ"sv, "๐Ÿฑ"sv, "๐Ÿˆ"sv, "๐Ÿˆโ€โฌ›"sv, "๐Ÿฆ"sv, "๐Ÿฏ"sv, "๐Ÿด"sv, "๐ŸซŽ"sv, "๐Ÿซ"sv, "๐ŸŽ"sv, "๐Ÿฆ„"sv, "๐Ÿฆ“"sv, "๐ŸฆŒ"sv, "๐Ÿฆฌ"sv, "๐Ÿฎ"sv, "๐Ÿท"sv, "๐Ÿ–"sv, "๐Ÿ—"sv, "๐Ÿฝ"sv, "๐Ÿ‘"sv, "๐Ÿฆ™"sv, "๐Ÿฆ’"sv, "๐Ÿ˜"sv, "๐Ÿญ"sv, "๐Ÿ"sv, "๐Ÿ€"sv, "๐Ÿฐ"sv, "๐Ÿ‡"sv, "๐Ÿฟ๏ธ"sv, "๐Ÿฟ"sv, "๐Ÿฆ”"sv, "๐Ÿฆ‡"sv, "๐Ÿป"sv, "๐Ÿปโ€โ„๏ธ"sv, "๐Ÿปโ€โ„"sv, "๐Ÿจ"sv, "๐Ÿผ"sv, "๐Ÿฆฅ"sv, "๐Ÿฆ˜"sv, "๐Ÿฆก"sv, "๐Ÿพ"sv }; -static constexpr auto s_food_drink = Array { "๐Ÿ‡"sv, "๐Ÿˆ"sv, "๐Ÿ‰"sv, "๐ŸŠ"sv, "๐Ÿ‹"sv, "๐ŸŒ"sv, "๐Ÿ"sv, "๐Ÿฅญ"sv, "๐ŸŽ"sv, "๐Ÿ"sv, "๐Ÿ"sv, "๐Ÿ‘"sv, "๐Ÿ’"sv, "๐Ÿ“"sv, "๐Ÿซ"sv, "๐Ÿฅ"sv, "๐Ÿ…"sv, "๐Ÿซ’"sv, "๐Ÿฅฅ"sv }; -static constexpr auto s_travel_places = Array { "๐ŸŒ"sv, "๐ŸŒŽ"sv, "๐ŸŒ"sv, "๐ŸŒ"sv, "๐Ÿ—บ๏ธ"sv, "๐Ÿ—บ"sv, "๐Ÿ—พ"sv, "๐Ÿงญ"sv }; -static constexpr auto s_activities = Array { "๐ŸŽƒ"sv, "๐ŸŽ„"sv, "๐ŸŽ†"sv, "๐ŸŽ‡"sv, "๐Ÿงจ"sv, "โœจ"sv, "๐ŸŽˆ"sv, "๐ŸŽ‰"sv, "๐ŸŽŠ"sv, "๐ŸŽ‹"sv, "๐ŸŽ"sv, "๐ŸŽ"sv, "๐ŸŽ‘"sv, "๐ŸŽ€"sv, "๐ŸŽ"sv, "๐ŸŽ—๏ธ"sv, "๐ŸŽ—"sv, "๐ŸŽŸ๏ธ"sv, "๐ŸŽŸ"sv, "๐ŸŽซ"sv }; -static constexpr auto s_objects = Array { "๐Ÿ‘“"sv, "๐Ÿ•ถ๏ธ"sv, "๐Ÿ•ถ"sv, "๐Ÿฆบ"sv, "๐Ÿ‘”"sv, "๐Ÿ‘–"sv, "๐Ÿงฆ"sv, "๐Ÿ‘—"sv, "๐Ÿฅป"sv, "๐Ÿฉฑ"sv, "๐Ÿฉฒ"sv, "๐Ÿฉณ"sv, "๐Ÿ‘™"sv, "๐Ÿชญ"sv, "๐Ÿ‘›"sv, "๐Ÿ‘œ"sv, "๐Ÿ›๏ธ"sv, "๐Ÿ›"sv, "๐Ÿฉด"sv, "๐Ÿ‘ก"sv, "๐Ÿ‘ข"sv, "๐Ÿชฎ"sv, "๐Ÿ‘‘"sv, "๐ŸŽฉ"sv, "๐ŸŽ“"sv, "๐Ÿช–"sv, "โ›‘๏ธ"sv, "โ›‘"sv, "๐Ÿ’„"sv, "๐Ÿ’"sv, "๐Ÿ’Ž"sv }; -static constexpr auto s_symbols = Array { "๐Ÿšฎ"sv, "๐Ÿšฐ"sv, "โ™ฟ"sv, "๐Ÿšน"sv, "๐Ÿšบ"sv, "๐Ÿšพ"sv, "๐Ÿ›‚"sv, "๐Ÿ›ƒ"sv, "๐Ÿ›„"sv, "๐Ÿ›…"sv, "#๏ธโƒฃ"sv, "#โƒฃ"sv, "*๏ธโƒฃ"sv, "*โƒฃ"sv, "0๏ธโƒฃ"sv, "0โƒฃ"sv, "1๏ธโƒฃ"sv, "1โƒฃ"sv, "2๏ธโƒฃ"sv, "2โƒฃ"sv, "3๏ธโƒฃ"sv, "3โƒฃ"sv, "4๏ธโƒฃ"sv, "4โƒฃ"sv, "5๏ธโƒฃ"sv, "5โƒฃ"sv, "6๏ธโƒฃ"sv, "6โƒฃ"sv, "7๏ธโƒฃ"sv, "7โƒฃ"sv, "8๏ธโƒฃ"sv, "8โƒฃ"sv, "9๏ธโƒฃ"sv, "9โƒฃ"sv, "๐Ÿ”Ÿ"sv }; -static constexpr auto s_flags = Array { "๐Ÿ"sv, "๐Ÿšฉ"sv, "๐ŸŽŒ"sv, "๐Ÿด"sv, "๐Ÿณ๏ธ"sv, "๐Ÿณ"sv, "๐Ÿณ๏ธโ€๐ŸŒˆ"sv, "๐Ÿณโ€๐ŸŒˆ"sv, "๐Ÿณ๏ธโ€โšง๏ธ"sv, "๐Ÿณโ€โšง๏ธ"sv, "๐Ÿณ๏ธโ€โšง"sv, "๐Ÿณโ€โšง"sv, "๐Ÿดโ€โ˜ ๏ธ"sv, "๐Ÿดโ€โ˜ "sv, "๐Ÿ‡ฆ๐Ÿ‡จ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฉ"sv, "๐Ÿ‡ฆ๐Ÿ‡ช"sv, "๐Ÿ‡ฆ๐Ÿ‡ซ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฌ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฎ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฑ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฒ"sv, "๐Ÿ‡ฆ๐Ÿ‡ด"sv, "๐Ÿ‡ฆ๐Ÿ‡ถ"sv, "๐Ÿ‡ฆ๐Ÿ‡ท"sv, "๐Ÿ‡ฆ๐Ÿ‡ธ"sv, "๐Ÿ‡ฆ๐Ÿ‡น"sv, "๐Ÿ‡ฆ๐Ÿ‡บ"sv, "๐Ÿ‡ฆ๐Ÿ‡ผ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฝ"sv, "๐Ÿ‡ฆ๐Ÿ‡ฟ"sv, "๐Ÿ‡ง๐Ÿ‡ฆ"sv, "๐Ÿ‡ง๐Ÿ‡ง"sv, "๐Ÿ‡ง๐Ÿ‡ฉ"sv, "๐Ÿ‡ง๐Ÿ‡ช"sv, "๐Ÿ‡ง๐Ÿ‡ซ"sv, "๐Ÿ‡ง๐Ÿ‡ฌ"sv, "๐Ÿ‡ง๐Ÿ‡ญ"sv, "๐Ÿ‡ง๐Ÿ‡ฎ"sv, "๐Ÿ‡ง๐Ÿ‡ฏ"sv, "๐Ÿ‡ง๐Ÿ‡ฑ"sv, "๐Ÿ‡ง๐Ÿ‡ฒ"sv, "๐Ÿ‡ง๐Ÿ‡ณ"sv, "๐Ÿ‡ง๐Ÿ‡ด"sv, "๐Ÿ‡ง๐Ÿ‡ถ"sv, "๐Ÿ‡ง๐Ÿ‡ท"sv, "๐Ÿ‡ง๐Ÿ‡ธ"sv }; - -TEST_CASE(emoji) -{ - auto test_emojis = [](auto const& emojis) { - for (auto emoji : emojis) { - Utf8View view { emoji }; - EXPECT(Unicode::could_be_start_of_emoji_sequence(view.begin())); - } - }; - - test_emojis(s_smileys_emotion); - test_emojis(s_people_body); - test_emojis(s_animals_nature); - test_emojis(s_food_drink); - test_emojis(s_travel_places); - test_emojis(s_activities); - test_emojis(s_objects); - test_emojis(s_symbols); - test_emojis(s_flags); -} - -TEST_CASE(emoji_presentation_only) -{ - auto test_emoji = [](auto emoji, auto expected_result) { - Utf8View view { emoji }; - auto is_start_of_emoji_sequence = Unicode::could_be_start_of_emoji_sequence(view.begin(), Unicode::SequenceType::EmojiPresentation); - EXPECT_EQ(is_start_of_emoji_sequence, expected_result); - }; - - test_emoji("ยฉ๏ธ"sv, true); - test_emoji("ยฉ"sv, false); - - test_emoji("ยฎ๏ธ"sv, true); - test_emoji("ยฎ"sv, false); - - test_emoji("\U0001F3F3\u200D\U0001F41E"sv, true); // SerenityOS flag - test_emoji("\U0001F3F3\uFE0F\u200D\U0001F41E"sv, true); // SerenityOS flag -} - -TEST_CASE(ascii_is_not_emoji) -{ - for (u32 code_point = 0u; is_ascii(code_point); ++code_point) { - auto string = String::from_code_point(code_point); - Utf8View view { string }; - - EXPECT(!Unicode::could_be_start_of_emoji_sequence(view.begin())); - } -} diff --git a/Userland/Libraries/LibGfx/TextLayout.cpp b/Userland/Libraries/LibGfx/TextLayout.cpp index ceab729e663..f1f95bd4e76 100644 --- a/Userland/Libraries/LibGfx/TextLayout.cpp +++ b/Userland/Libraries/LibGfx/TextLayout.cpp @@ -8,7 +8,6 @@ #include "TextLayout.h" #include #include -#include #include namespace Gfx { diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index 41cc0327b86..e396922575d 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -5,7 +5,6 @@ set(SOURCES DateTimeFormat.cpp DisplayNames.cpp DurationFormat.cpp - Emoji.cpp ICU.cpp IDNA.cpp ListFormat.cpp diff --git a/Userland/Libraries/LibUnicode/Emoji.cpp b/Userland/Libraries/LibUnicode/Emoji.cpp deleted file mode 100644 index 04cb51db8f7..00000000000 --- a/Userland/Libraries/LibUnicode/Emoji.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2022-2024, Tim Flynn - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#include -#include -#include -#include -#include - -namespace Unicode { - -Optional __attribute__((weak)) emoji_image_for_code_points(ReadonlySpan) { return {}; } - -// https://unicode.org/reports/tr51/#def_emoji_core_sequence -static bool could_be_start_of_emoji_core_sequence(u32 code_point, Optional const& next_code_point, SequenceType type) -{ - // emoji_core_sequence := emoji_character | emoji_presentation_sequence | emoji_keycap_sequence | emoji_modifier_sequence | emoji_flag_sequence - - static constexpr auto emoji_presentation_selector = 0xFE0Fu; - static constexpr auto combining_enclosing_keycap = 0x20E3u; - static constexpr auto zero_width_joiner = 0x200Du; - - // https://unicode.org/reports/tr51/#def_emoji_keycap_sequence - // emoji_keycap_sequence := [0-9#*] \x{FE0F 20E3} - if (is_ascii_digit(code_point) || code_point == '#' || code_point == '*') - return next_code_point == emoji_presentation_selector || next_code_point == combining_enclosing_keycap; - - // A little non-standard, but all other ASCII code points are not the beginning of any emoji sequence. - if (is_ascii(code_point)) - return false; - - // https://unicode.org/reports/tr51/#def_emoji_character - switch (type) { - case SequenceType::Any: - if (code_point_has_emoji_property(code_point)) - return true; - break; - case SequenceType::EmojiPresentation: - if (code_point_has_emoji_presentation_property(code_point)) - return true; - if (next_code_point == zero_width_joiner && code_point_has_emoji_property(code_point)) - return true; - break; - } - - // https://unicode.org/reports/tr51/#def_emoji_presentation_sequence - // emoji_presentation_sequence := emoji_character emoji_presentation_selector - if (next_code_point == emoji_presentation_selector) - return true; - - // https://unicode.org/reports/tr51/#def_emoji_modifier_sequence - // emoji_modifier_sequence := emoji_modifier_base emoji_modifier - if (code_point_has_emoji_modifier_base_property(code_point)) - return true; - - // https://unicode.org/reports/tr51/#def_emoji_flag_sequence - // emoji_flag_sequence := regional_indicator regional_indicator - if (code_point_has_regional_indicator_property(code_point)) - return true; - - return false; -} - -static bool could_be_start_of_serenity_emoji(u32 code_point) -{ - // We use Supplementary Private Use Area-B for custom Serenity emoji, starting at U+10CD00. - static constexpr auto first_custom_serenity_emoji_code_point = 0x10CD00u; - - return code_point >= first_custom_serenity_emoji_code_point; -} - -// https://unicode.org/reports/tr51/#def_emoji_sequence -template -static bool could_be_start_of_emoji_sequence_impl(CodePointIterator const& it, SequenceType type) -{ - // emoji_sequence := emoji_core_sequence | emoji_zwj_sequence | emoji_tag_sequence - - if (it.done()) - return false; - - // The purpose of this method is to quickly filter out code points that cannot be the start of - // an emoji. The emoji_core_sequence definition alone captures the start of all possible - // emoji_zwj_sequence and emoji_tag_sequence emojis, because: - // - // * emoji_zwj_sequence must begin with emoji_zwj_element, which is: - // emoji_zwj_element := emoji_core_sequence | emoji_tag_sequence - // - // * emoji_tag_sequence must begin with tag_base, which is: - // tag_base := emoji_character | emoji_modifier_sequence | emoji_presentation_sequence - // Note that this is a subset of emoji_core_sequence. - auto code_point = *it; - auto next_code_point = it.peek(1); - - if (could_be_start_of_emoji_core_sequence(code_point, next_code_point, type)) - return true; - if (could_be_start_of_serenity_emoji(code_point)) - return true; - return false; -} - -bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const& it, SequenceType type) -{ - return could_be_start_of_emoji_sequence_impl(it, type); -} - -bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const& it, SequenceType type) -{ - return could_be_start_of_emoji_sequence_impl(it, type); -} - -} diff --git a/Userland/Libraries/LibUnicode/Emoji.h b/Userland/Libraries/LibUnicode/Emoji.h deleted file mode 100644 index e0ddb845b6d..00000000000 --- a/Userland/Libraries/LibUnicode/Emoji.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Copyright (c) 2022-2024, Tim Flynn - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#pragma once - -#include -#include -#include -#include - -namespace Unicode { - -Optional emoji_image_for_code_points(ReadonlySpan code_points); - -enum class SequenceType { - Any, - EmojiPresentation, -}; - -bool could_be_start_of_emoji_sequence(Utf8CodePointIterator const&, SequenceType = SequenceType::Any); -bool could_be_start_of_emoji_sequence(Utf32CodePointIterator const&, SequenceType = SequenceType::Any); - -}