mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-23 04:55:15 +00:00
LibUnicode+Tests: Remove now unused to_unicode_*_full
methods
Relocating all of the tests for these in LibUnicode over to the AK String testsuite.
This commit is contained in:
parent
d1ed04a6cb
commit
d777b279e3
Notes:
sideshowbarker
2024-07-17 05:19:06 +09:00
Author: https://github.com/shannonbooth Commit: https://github.com/SerenityOS/serenity/commit/d777b279e3 Pull-request: https://github.com/SerenityOS/serenity/pull/22057 Reviewed-by: https://github.com/trflynn89
4 changed files with 426 additions and 510 deletions
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Andreas Kling <kling@serenityos.org>
|
||||
* Copyright (c) 2021, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -15,6 +16,7 @@
|
|||
#include <AK/Try.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <ctype.h>
|
||||
|
||||
TEST_CASE(construct_empty)
|
||||
{
|
||||
|
@ -263,65 +265,439 @@ TEST_CASE(reverse)
|
|||
test_reverse("ab😀cd"sv, "dc😀ba"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_lowercase)
|
||||
TEST_CASE(to_lowercase_unconditional_special_casing)
|
||||
{
|
||||
{
|
||||
auto string = "Aa"_string;
|
||||
auto result = MUST(string.to_lowercase());
|
||||
EXPECT_EQ(result, "aa"sv);
|
||||
}
|
||||
{
|
||||
auto string = "Ωω"_string;
|
||||
auto result = MUST(string.to_lowercase());
|
||||
EXPECT_EQ(result, "ωω"sv);
|
||||
}
|
||||
{
|
||||
auto string = "İi̇"_string;
|
||||
auto result = MUST(string.to_lowercase());
|
||||
EXPECT_EQ(result, "i̇i̇"sv);
|
||||
}
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST("\u00DF"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u00DF");
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST("\u0130"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u0069\u0307");
|
||||
|
||||
// LATIN SMALL LIGATURE FF
|
||||
result = MUST("\uFB00"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\uFB00");
|
||||
|
||||
// LATIN SMALL LIGATURE FI
|
||||
result = MUST("\uFB01"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\uFB01");
|
||||
|
||||
// LATIN SMALL LIGATURE FL
|
||||
result = MUST("\uFB02"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\uFB02");
|
||||
|
||||
// LATIN SMALL LIGATURE FFI
|
||||
result = MUST("\uFB03"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\uFB03");
|
||||
|
||||
// LATIN SMALL LIGATURE FFL
|
||||
result = MUST("\uFB04"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\uFB04");
|
||||
|
||||
// LATIN SMALL LIGATURE LONG S T
|
||||
result = MUST("\uFB05"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\uFB05");
|
||||
|
||||
// LATIN SMALL LIGATURE ST
|
||||
result = MUST("\uFB06"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\uFB06");
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FB7"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u1FB7");
|
||||
|
||||
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FC7"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u1FC7");
|
||||
|
||||
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FF7"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u1FF7");
|
||||
}
|
||||
|
||||
TEST_CASE(to_uppercase)
|
||||
TEST_CASE(to_lowercase_special_casing_sigma)
|
||||
{
|
||||
{
|
||||
auto string = "Aa"_string;
|
||||
auto result = MUST(string.to_uppercase());
|
||||
EXPECT_EQ(result, "AA"sv);
|
||||
}
|
||||
{
|
||||
auto string = "Ωω"_string;
|
||||
auto result = MUST(string.to_uppercase());
|
||||
EXPECT_EQ(result, "ΩΩ"sv);
|
||||
}
|
||||
{
|
||||
auto string = "ʼn"_string;
|
||||
auto result = MUST(string.to_uppercase());
|
||||
EXPECT_EQ(result, "ʼN"sv);
|
||||
}
|
||||
auto result = MUST("ABCI"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "abci");
|
||||
|
||||
// Sigma preceded by A
|
||||
result = MUST("A\u03A3"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "a\u03C2");
|
||||
|
||||
// Sigma preceded by FEMININE ORDINAL INDICATOR
|
||||
result = MUST("\u00AA\u03A3"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u00AA\u03C2");
|
||||
|
||||
// Sigma preceded by ROMAN NUMERAL ONE
|
||||
result = MUST("\u2160\u03A3"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u2170\u03C2");
|
||||
|
||||
// Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
|
||||
result = MUST("\u0345\u03A3"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u0345\u03C3");
|
||||
|
||||
// Sigma preceded by A and FULL STOP
|
||||
result = MUST("A.\u03A3"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "a.\u03C2");
|
||||
|
||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
|
||||
result = MUST("A\u180E\u03A3"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "a\u180E\u03C2");
|
||||
|
||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by B
|
||||
result = MUST("A\u180E\u03A3B"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "a\u180E\u03C3b");
|
||||
|
||||
// Sigma followed by A
|
||||
result = MUST("\u03A3A"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "\u03C3a");
|
||||
|
||||
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR
|
||||
result = MUST("A\u03A3\u180E"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "a\u03C2\u180E");
|
||||
|
||||
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR and B
|
||||
result = MUST("A\u03A3\u180EB"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "a\u03C3\u180Eb");
|
||||
|
||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR
|
||||
result = MUST("A\u180E\u03A3\u180E"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "a\u180E\u03C2\u180E");
|
||||
|
||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR and B
|
||||
result = MUST("A\u180E\u03A3\u180EB"_string.to_lowercase());
|
||||
EXPECT_EQ(result, "a\u180E\u03C3\u180Eb");
|
||||
}
|
||||
|
||||
TEST_CASE(to_lowercase_special_casing_i)
|
||||
{
|
||||
// LATIN CAPITAL LETTER I
|
||||
auto result = MUST("I"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST("I"_string.to_lowercase("az"sv));
|
||||
EXPECT_EQ(result, "\u0131"sv);
|
||||
|
||||
result = MUST("I"_string.to_lowercase("tr"sv));
|
||||
EXPECT_EQ(result, "\u0131"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST("\u0130"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "\u0069\u0307"sv);
|
||||
|
||||
result = MUST("\u0130"_string.to_lowercase("az"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST("\u0130"_string.to_lowercase("tr"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
|
||||
result = MUST("I\u0307"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "i\u0307"sv);
|
||||
|
||||
result = MUST("I\u0307"_string.to_lowercase("az"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST("I\u0307"_string.to_lowercase("tr"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I followed by combining class 0 and COMBINING DOT ABOVE
|
||||
result = MUST("IA\u0307"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "ia\u0307"sv);
|
||||
|
||||
result = MUST("IA\u0307"_string.to_lowercase("az"sv));
|
||||
EXPECT_EQ(result, "\u0131a\u0307"sv);
|
||||
|
||||
result = MUST("IA\u0307"_string.to_lowercase("tr"sv));
|
||||
EXPECT_EQ(result, "\u0131a\u0307"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_lowercase_special_casing_more_above)
|
||||
{
|
||||
// LATIN CAPITAL LETTER I
|
||||
auto result = MUST("I"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST("I"_string.to_lowercase("lt"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER J
|
||||
result = MUST("J"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "j"sv);
|
||||
|
||||
result = MUST("J"_string.to_lowercase("lt"sv));
|
||||
EXPECT_EQ(result, "j"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH OGONEK
|
||||
result = MUST("\u012e"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "\u012f"sv);
|
||||
|
||||
result = MUST("\u012e"_string.to_lowercase("lt"sv));
|
||||
EXPECT_EQ(result, "\u012f"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I followed by COMBINING GRAVE ACCENT
|
||||
result = MUST("I\u0300"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "i\u0300"sv);
|
||||
|
||||
result = MUST("I\u0300"_string.to_lowercase("lt"sv));
|
||||
EXPECT_EQ(result, "i\u0307\u0300"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER J followed by COMBINING GRAVE ACCENT
|
||||
result = MUST("J\u0300"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "j\u0300"sv);
|
||||
|
||||
result = MUST("J\u0300"_string.to_lowercase("lt"sv));
|
||||
EXPECT_EQ(result, "j\u0307\u0300"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH OGONEK followed by COMBINING GRAVE ACCENT
|
||||
result = MUST("\u012e\u0300"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "\u012f\u0300"sv);
|
||||
|
||||
result = MUST("\u012e\u0300"_string.to_lowercase("lt"sv));
|
||||
EXPECT_EQ(result, "\u012f\u0307\u0300"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_lowercase_special_casing_not_before_dot)
|
||||
{
|
||||
// LATIN CAPITAL LETTER I
|
||||
auto result = MUST("I"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST("I"_string.to_lowercase("az"sv));
|
||||
EXPECT_EQ(result, "\u0131"sv);
|
||||
|
||||
result = MUST("I"_string.to_lowercase("tr"sv));
|
||||
EXPECT_EQ(result, "\u0131"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
|
||||
result = MUST("I\u0307"_string.to_lowercase("en"sv));
|
||||
EXPECT_EQ(result, "i\u0307"sv);
|
||||
|
||||
result = MUST("I\u0307"_string.to_lowercase("az"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST("I\u0307"_string.to_lowercase("tr"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_uppercase_unconditional_special_casing)
|
||||
{
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST("\u00DF"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0053\u0053");
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST("\u0130"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0130");
|
||||
|
||||
// LATIN SMALL LIGATURE FF
|
||||
result = MUST("\uFB00"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0046\u0046");
|
||||
|
||||
// LATIN SMALL LIGATURE FI
|
||||
result = MUST("\uFB01"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0046\u0049");
|
||||
|
||||
// LATIN SMALL LIGATURE FL
|
||||
result = MUST("\uFB02"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0046\u004C");
|
||||
|
||||
// LATIN SMALL LIGATURE FFI
|
||||
result = MUST("\uFB03"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0046\u0046\u0049");
|
||||
|
||||
// LATIN SMALL LIGATURE FFL
|
||||
result = MUST("\uFB04"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0046\u0046\u004C");
|
||||
|
||||
// LATIN SMALL LIGATURE LONG S T
|
||||
result = MUST("\uFB05"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0053\u0054");
|
||||
|
||||
// LATIN SMALL LIGATURE ST
|
||||
result = MUST("\uFB06"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0053\u0054");
|
||||
|
||||
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
result = MUST("\u0390"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0399\u0308\u0301");
|
||||
|
||||
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
result = MUST("\u03B0"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u03A5\u0308\u0301");
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FB7"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0391\u0342\u0399");
|
||||
|
||||
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FC7"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u0397\u0342\u0399");
|
||||
|
||||
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FF7"_string.to_uppercase());
|
||||
EXPECT_EQ(result, "\u03A9\u0342\u0399");
|
||||
}
|
||||
|
||||
TEST_CASE(to_uppercase_special_casing_soft_dotted)
|
||||
{
|
||||
// LATIN SMALL LETTER I
|
||||
auto result = MUST("i"_string.to_uppercase("en"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
result = MUST("i"_string.to_uppercase("lt"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
// LATIN SMALL LETTER J
|
||||
result = MUST("j"_string.to_uppercase("en"sv));
|
||||
EXPECT_EQ(result, "J"sv);
|
||||
|
||||
result = MUST("j"_string.to_uppercase("lt"sv));
|
||||
EXPECT_EQ(result, "J"sv);
|
||||
|
||||
// LATIN SMALL LETTER I followed by COMBINING DOT ABOVE
|
||||
result = MUST("i\u0307"_string.to_uppercase("en"sv));
|
||||
EXPECT_EQ(result, "I\u0307"sv);
|
||||
|
||||
result = MUST("i\u0307"_string.to_uppercase("lt"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
// LATIN SMALL LETTER J followed by COMBINING DOT ABOVE
|
||||
result = MUST("j\u0307"_string.to_uppercase("en"sv));
|
||||
EXPECT_EQ(result, "J\u0307"sv);
|
||||
|
||||
result = MUST("j\u0307"_string.to_uppercase("lt"sv));
|
||||
EXPECT_EQ(result, "J"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_titlecase)
|
||||
{
|
||||
{
|
||||
auto string = "foo bar baz"_string;
|
||||
auto result = MUST(string.to_titlecase());
|
||||
EXPECT_EQ(result, "Foo Bar Baz"sv);
|
||||
EXPECT_EQ(MUST(""_string.to_titlecase()), ""sv);
|
||||
EXPECT_EQ(MUST(" "_string.to_titlecase()), " "sv);
|
||||
EXPECT_EQ(MUST(" - "_string.to_titlecase()), " - "sv);
|
||||
|
||||
EXPECT_EQ(MUST("a"_string.to_titlecase()), "A"sv);
|
||||
EXPECT_EQ(MUST("A"_string.to_titlecase()), "A"sv);
|
||||
EXPECT_EQ(MUST(" a"_string.to_titlecase()), " A"sv);
|
||||
EXPECT_EQ(MUST("a "_string.to_titlecase()), "A "sv);
|
||||
|
||||
EXPECT_EQ(MUST("ab"_string.to_titlecase()), "Ab"sv);
|
||||
EXPECT_EQ(MUST("Ab"_string.to_titlecase()), "Ab"sv);
|
||||
EXPECT_EQ(MUST("aB"_string.to_titlecase()), "Ab"sv);
|
||||
EXPECT_EQ(MUST("AB"_string.to_titlecase()), "Ab"sv);
|
||||
EXPECT_EQ(MUST(" ab"_string.to_titlecase()), " Ab"sv);
|
||||
EXPECT_EQ(MUST("ab "_string.to_titlecase()), "Ab "sv);
|
||||
|
||||
EXPECT_EQ(MUST("foo bar baz"_string.to_titlecase()), "Foo Bar Baz"sv);
|
||||
EXPECT_EQ(MUST("foo \n \r bar \t baz"_string.to_titlecase()), "Foo \n \r Bar \t Baz"sv);
|
||||
EXPECT_EQ(MUST("f\"oo\" b'ar'"_string.to_titlecase()), "F\"Oo\" B'ar'"sv);
|
||||
EXPECT_EQ(MUST("123dollars"_string.to_titlecase()), "123Dollars"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_casefold)
|
||||
{
|
||||
for (u8 code_point = 0; code_point < 0x80; ++code_point) {
|
||||
auto ascii = tolower(code_point);
|
||||
auto unicode = MUST(MUST(String::from_utf8({ reinterpret_cast<char const*>(&code_point), 1 })).to_casefold());
|
||||
|
||||
EXPECT_EQ(unicode.bytes_as_string_view().length(), 1u);
|
||||
EXPECT_EQ(unicode.bytes_as_string_view()[0], ascii);
|
||||
}
|
||||
{
|
||||
auto string = "foo \n \r bar \t baz"_string;
|
||||
auto result = MUST(string.to_titlecase());
|
||||
EXPECT_EQ(result, "Foo \n \r Bar \t Baz"sv);
|
||||
}
|
||||
{
|
||||
auto string = "f\"oo\" b'ar'"_string;
|
||||
auto result = MUST(string.to_titlecase());
|
||||
EXPECT_EQ(result, "F\"Oo\" B'ar'"sv);
|
||||
}
|
||||
{
|
||||
auto string = "123dollars"_string;
|
||||
auto result = MUST(string.to_titlecase());
|
||||
EXPECT_EQ(result, "123Dollars"sv);
|
||||
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST("\u00DF"_string.to_casefold());
|
||||
EXPECT_EQ(result, "\u0073\u0073"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
|
||||
result = MUST("\u1FB3"_string.to_casefold());
|
||||
EXPECT_EQ(result, "\u03B1\u03B9"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
result = MUST("\u1FB6"_string.to_casefold());
|
||||
EXPECT_EQ(result, "\u03B1\u0342"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FB7"_string.to_casefold());
|
||||
EXPECT_EQ(result, "\u03B1\u0342\u03B9"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_titlecase_unconditional_special_casing)
|
||||
{
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST("\u00DF"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0053\u0073"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST("\u0130"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FF
|
||||
result = MUST("\uFB00"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0046\u0066"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FI
|
||||
result = MUST("\uFB01"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0046\u0069"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FL
|
||||
result = MUST("\uFB02"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0046\u006C"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FFI
|
||||
result = MUST("\uFB03"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0046\u0066\u0069"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FFL
|
||||
result = MUST("\uFB04"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0046\u0066\u006C"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE LONG S T
|
||||
result = MUST("\uFB05"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0053\u0074"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE ST
|
||||
result = MUST("\uFB06"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0053\u0074"sv);
|
||||
|
||||
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
result = MUST("\u0390"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0399\u0308\u0301"sv);
|
||||
|
||||
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
result = MUST("\u03B0"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u03A5\u0308\u0301"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FB7"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0391\u0342\u0345"sv);
|
||||
|
||||
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FC7"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u0397\u0342\u0345"sv);
|
||||
|
||||
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST("\u1FF7"_string.to_titlecase());
|
||||
EXPECT_EQ(result, "\u03A9\u0342\u0345"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_titlecase_special_casing_i)
|
||||
{
|
||||
// LATIN SMALL LETTER I
|
||||
auto result = MUST("i"_string.to_titlecase("en"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
result = MUST("i"_string.to_titlecase("az"sv));
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
|
||||
result = MUST("i"_string.to_titlecase("tr"sv));
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
}
|
||||
|
||||
BENCHMARK_CASE(casefold)
|
||||
{
|
||||
for (size_t i = 0; i < 50'000; ++i) {
|
||||
__test_to_casefold();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -74,54 +74,6 @@ TEST_CASE(to_unicode_titlecase)
|
|||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "lj" to "Lj"
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "nj" to "Nj"
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "dz" to "Dz"
|
||||
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(""sv)), ""sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" "sv)), " "sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" - "sv)), " - "sv);
|
||||
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a"sv)), "A"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("A"sv)), "A"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" a"sv)), " A"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a "sv)), "A "sv);
|
||||
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab"sv)), "Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("Ab"sv)), "Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("aB"sv)), "Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("AB"sv)), "Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" ab"sv)), " Ab"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab "sv)), "Ab "sv);
|
||||
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo bar baz"sv)), "Foo Bar Baz"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo \n \r bar \t baz"sv)), "Foo \n \r Bar \t Baz"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("f\"oo\" b'ar'"sv)), "F\"Oo\" B'ar'"sv);
|
||||
EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("123dollars"sv)), "123Dollars"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_casefold)
|
||||
{
|
||||
for (u8 code_point = 0; code_point < 0x80; ++code_point) {
|
||||
auto ascii = tolower(code_point);
|
||||
auto unicode = MUST(Unicode::to_unicode_casefold_full({ reinterpret_cast<char const*>(&code_point), 1 }));
|
||||
|
||||
EXPECT_EQ(unicode.bytes_as_string_view().length(), 1u);
|
||||
EXPECT_EQ(unicode.bytes_as_string_view()[0], ascii);
|
||||
}
|
||||
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST(Unicode::to_unicode_casefold_full("\u00DF"sv));
|
||||
EXPECT_EQ(result, "\u0073\u0073"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_casefold_full("\u1FB3"sv));
|
||||
EXPECT_EQ(result, "\u03B1\u03B9"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
result = MUST(Unicode::to_unicode_casefold_full("\u1FB6"sv));
|
||||
EXPECT_EQ(result, "\u03B1\u0342"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_casefold_full("\u1FB7"sv));
|
||||
EXPECT_EQ(result, "\u03B1\u0342\u03B9"sv);
|
||||
}
|
||||
|
||||
BENCHMARK_CASE(casing)
|
||||
|
@ -130,388 +82,9 @@ BENCHMARK_CASE(casing)
|
|||
__test_to_unicode_lowercase();
|
||||
__test_to_unicode_uppercase();
|
||||
__test_to_unicode_titlecase();
|
||||
__test_to_unicode_casefold();
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_lowercase_unconditional_special_casing)
|
||||
{
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST(Unicode::to_unicode_lowercase_full("\u00DF"sv));
|
||||
EXPECT_EQ(result, "\u00DF");
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv));
|
||||
EXPECT_EQ(result, "\u0069\u0307");
|
||||
|
||||
// LATIN SMALL LIGATURE FF
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\uFB00"sv));
|
||||
EXPECT_EQ(result, "\uFB00");
|
||||
|
||||
// LATIN SMALL LIGATURE FI
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\uFB01"sv));
|
||||
EXPECT_EQ(result, "\uFB01");
|
||||
|
||||
// LATIN SMALL LIGATURE FL
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\uFB02"sv));
|
||||
EXPECT_EQ(result, "\uFB02");
|
||||
|
||||
// LATIN SMALL LIGATURE FFI
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\uFB03"sv));
|
||||
EXPECT_EQ(result, "\uFB03");
|
||||
|
||||
// LATIN SMALL LIGATURE FFL
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\uFB04"sv));
|
||||
EXPECT_EQ(result, "\uFB04");
|
||||
|
||||
// LATIN SMALL LIGATURE LONG S T
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\uFB05"sv));
|
||||
EXPECT_EQ(result, "\uFB05");
|
||||
|
||||
// LATIN SMALL LIGATURE ST
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\uFB06"sv));
|
||||
EXPECT_EQ(result, "\uFB06");
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u1FB7"sv));
|
||||
EXPECT_EQ(result, "\u1FB7");
|
||||
|
||||
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u1FC7"sv));
|
||||
EXPECT_EQ(result, "\u1FC7");
|
||||
|
||||
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u1FF7"sv));
|
||||
EXPECT_EQ(result, "\u1FF7");
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_lowercase_special_casing_sigma)
|
||||
{
|
||||
auto result = MUST(Unicode::to_unicode_lowercase_full("ABCI"sv));
|
||||
EXPECT_EQ(result, "abci");
|
||||
|
||||
// Sigma preceded by A
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3"sv));
|
||||
EXPECT_EQ(result, "a\u03C2");
|
||||
|
||||
// Sigma preceded by FEMININE ORDINAL INDICATOR
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv));
|
||||
EXPECT_EQ(result, "\u00AA\u03C2");
|
||||
|
||||
// Sigma preceded by ROMAN NUMERAL ONE
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv));
|
||||
EXPECT_EQ(result, "\u2170\u03C2");
|
||||
|
||||
// Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv));
|
||||
EXPECT_EQ(result, "\u0345\u03C3");
|
||||
|
||||
// Sigma preceded by A and FULL STOP
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("A.\u03A3"sv));
|
||||
EXPECT_EQ(result, "a.\u03C2");
|
||||
|
||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv));
|
||||
EXPECT_EQ(result, "a\u180E\u03C2");
|
||||
|
||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by B
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3B"sv));
|
||||
EXPECT_EQ(result, "a\u180E\u03C3b");
|
||||
|
||||
// Sigma followed by A
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u03A3A"sv));
|
||||
EXPECT_EQ(result, "\u03C3a");
|
||||
|
||||
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3\u180E"sv));
|
||||
EXPECT_EQ(result, "a\u03C2\u180E");
|
||||
|
||||
// Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR and B
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3\u180EB"sv));
|
||||
EXPECT_EQ(result, "a\u03C3\u180Eb");
|
||||
|
||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180E"sv));
|
||||
EXPECT_EQ(result, "a\u180E\u03C2\u180E");
|
||||
|
||||
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR and B
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180EB"sv));
|
||||
EXPECT_EQ(result, "a\u180E\u03C3\u180Eb");
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_lowercase_special_casing_i)
|
||||
{
|
||||
// LATIN CAPITAL LETTER I
|
||||
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "az"sv));
|
||||
EXPECT_EQ(result, "\u0131"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "tr"sv));
|
||||
EXPECT_EQ(result, "\u0131"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "en"sv));
|
||||
EXPECT_EQ(result, "\u0069\u0307"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "az"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "tr"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv));
|
||||
EXPECT_EQ(result, "i\u0307"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I followed by combining class 0 and COMBINING DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "en"sv));
|
||||
EXPECT_EQ(result, "ia\u0307"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "az"sv));
|
||||
EXPECT_EQ(result, "\u0131a\u0307"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "tr"sv));
|
||||
EXPECT_EQ(result, "\u0131a\u0307"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_lowercase_special_casing_more_above)
|
||||
{
|
||||
// LATIN CAPITAL LETTER I
|
||||
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER J
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("J"sv, "en"sv));
|
||||
EXPECT_EQ(result, "j"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("J"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "j"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH OGONEK
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u012e"sv, "en"sv));
|
||||
EXPECT_EQ(result, "\u012f"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u012e"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "\u012f"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I followed by COMBINING GRAVE ACCENT
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I\u0300"sv, "en"sv));
|
||||
EXPECT_EQ(result, "i\u0300"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I\u0300"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "i\u0307\u0300"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER J followed by COMBINING GRAVE ACCENT
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("J\u0300"sv, "en"sv));
|
||||
EXPECT_EQ(result, "j\u0300"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("J\u0300"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "j\u0307\u0300"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH OGONEK followed by COMBINING GRAVE ACCENT
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "en"sv));
|
||||
EXPECT_EQ(result, "\u012f\u0300"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "\u012f\u0307\u0300"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_lowercase_special_casing_not_before_dot)
|
||||
{
|
||||
// LATIN CAPITAL LETTER I
|
||||
auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "az"sv));
|
||||
EXPECT_EQ(result, "\u0131"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "tr"sv));
|
||||
EXPECT_EQ(result, "\u0131"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv));
|
||||
EXPECT_EQ(result, "i\u0307"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv));
|
||||
EXPECT_EQ(result, "i"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
|
||||
{
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST(Unicode::to_unicode_uppercase_full("\u00DF"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0053");
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\u0130"sv));
|
||||
EXPECT_EQ(result, "\u0130");
|
||||
|
||||
// LATIN SMALL LIGATURE FF
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\uFB00"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0046");
|
||||
|
||||
// LATIN SMALL LIGATURE FI
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\uFB01"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0049");
|
||||
|
||||
// LATIN SMALL LIGATURE FL
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\uFB02"sv));
|
||||
EXPECT_EQ(result, "\u0046\u004C");
|
||||
|
||||
// LATIN SMALL LIGATURE FFI
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\uFB03"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0046\u0049");
|
||||
|
||||
// LATIN SMALL LIGATURE FFL
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\uFB04"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0046\u004C");
|
||||
|
||||
// LATIN SMALL LIGATURE LONG S T
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\uFB05"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0054");
|
||||
|
||||
// LATIN SMALL LIGATURE ST
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\uFB06"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0054");
|
||||
|
||||
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\u0390"sv));
|
||||
EXPECT_EQ(result, "\u0399\u0308\u0301");
|
||||
|
||||
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\u03B0"sv));
|
||||
EXPECT_EQ(result, "\u03A5\u0308\u0301");
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\u1FB7"sv));
|
||||
EXPECT_EQ(result, "\u0391\u0342\u0399");
|
||||
|
||||
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\u1FC7"sv));
|
||||
EXPECT_EQ(result, "\u0397\u0342\u0399");
|
||||
|
||||
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("\u1FF7"sv));
|
||||
EXPECT_EQ(result, "\u03A9\u0342\u0399");
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_uppercase_special_casing_soft_dotted)
|
||||
{
|
||||
// LATIN SMALL LETTER I
|
||||
auto result = MUST(Unicode::to_unicode_uppercase_full("i"sv, "en"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("i"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
// LATIN SMALL LETTER J
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("j"sv, "en"sv));
|
||||
EXPECT_EQ(result, "J"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("j"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "J"sv);
|
||||
|
||||
// LATIN SMALL LETTER I followed by COMBINING DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("i\u0307"sv, "en"sv));
|
||||
EXPECT_EQ(result, "I\u0307"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("i\u0307"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
// LATIN SMALL LETTER J followed by COMBINING DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("j\u0307"sv, "en"sv));
|
||||
EXPECT_EQ(result, "J\u0307"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_uppercase_full("j\u0307"sv, "lt"sv));
|
||||
EXPECT_EQ(result, "J"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_titlecase_unconditional_special_casing)
|
||||
{
|
||||
// LATIN SMALL LETTER SHARP S
|
||||
auto result = MUST(Unicode::to_unicode_titlecase_full("\u00DF"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0073"sv);
|
||||
|
||||
// LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u0130"sv));
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FF
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB00"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0066"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB01"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0069"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FL
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB02"sv));
|
||||
EXPECT_EQ(result, "\u0046\u006C"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FFI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB03"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0066\u0069"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE FFL
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB04"sv));
|
||||
EXPECT_EQ(result, "\u0046\u0066\u006C"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE LONG S T
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB05"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0074"sv);
|
||||
|
||||
// LATIN SMALL LIGATURE ST
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\uFB06"sv));
|
||||
EXPECT_EQ(result, "\u0053\u0074"sv);
|
||||
|
||||
// GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u0390"sv));
|
||||
EXPECT_EQ(result, "\u0399\u0308\u0301"sv);
|
||||
|
||||
// GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u03B0"sv));
|
||||
EXPECT_EQ(result, "\u03A5\u0308\u0301"sv);
|
||||
|
||||
// GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u1FB7"sv));
|
||||
EXPECT_EQ(result, "\u0391\u0342\u0345"sv);
|
||||
|
||||
// GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u1FC7"sv));
|
||||
EXPECT_EQ(result, "\u0397\u0342\u0345"sv);
|
||||
|
||||
// GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("\u1FF7"sv));
|
||||
EXPECT_EQ(result, "\u03A9\u0342\u0345"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_titlecase_special_casing_i)
|
||||
{
|
||||
// LATIN SMALL LETTER I
|
||||
auto result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "en"sv));
|
||||
EXPECT_EQ(result, "I"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "az"sv));
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
|
||||
result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "tr"sv));
|
||||
EXPECT_EQ(result, "\u0130"sv);
|
||||
}
|
||||
|
||||
TEST_CASE(general_category)
|
||||
{
|
||||
auto general_category = [](StringView name) {
|
||||
|
|
|
@ -41,34 +41,6 @@ u32 __attribute__((weak)) to_unicode_titlecase(u32 code_point)
|
|||
return to_ascii_uppercase(code_point);
|
||||
}
|
||||
|
||||
ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView string, Optional<StringView> const& locale)
|
||||
{
|
||||
StringBuilder builder;
|
||||
TRY(Detail::build_lowercase_string(Utf8View { string }, builder, locale));
|
||||
return builder.to_deprecated_string();
|
||||
}
|
||||
|
||||
ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView string, Optional<StringView> const& locale)
|
||||
{
|
||||
StringBuilder builder;
|
||||
TRY(Detail::build_uppercase_string(Utf8View { string }, builder, locale));
|
||||
return builder.to_deprecated_string();
|
||||
}
|
||||
|
||||
ErrorOr<String> to_unicode_titlecase_full(StringView string, Optional<StringView> const& locale, TrailingCodePointTransformation trailing_code_point_transformation)
|
||||
{
|
||||
StringBuilder builder;
|
||||
TRY(Detail::build_titlecase_string(Utf8View { string }, builder, locale, trailing_code_point_transformation));
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
ErrorOr<String> to_unicode_casefold_full(StringView string)
|
||||
{
|
||||
StringBuilder builder;
|
||||
TRY(Detail::build_casefold_string(Utf8View { string }, builder));
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
template<typename ViewType>
|
||||
class CasefoldStringComparator {
|
||||
public:
|
||||
|
|
|
@ -48,11 +48,6 @@ u32 to_unicode_lowercase(u32 code_point);
|
|||
u32 to_unicode_uppercase(u32 code_point);
|
||||
u32 to_unicode_titlecase(u32 code_point);
|
||||
|
||||
ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView, Optional<StringView> const& locale = {});
|
||||
ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView, Optional<StringView> const& locale = {});
|
||||
ErrorOr<String> to_unicode_titlecase_full(StringView, Optional<StringView> const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase);
|
||||
ErrorOr<String> to_unicode_casefold_full(StringView);
|
||||
|
||||
template<typename ViewType>
|
||||
bool equals_ignoring_case(ViewType, ViewType);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue