From 6fcc1c742647f05fc65264290622f67b9a6ec745 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Sun, 8 Jan 2023 16:33:30 -0500 Subject: [PATCH] AK+LibUnicode: Provide Unicode-aware String case transformations Since AK can't refer to LibUnicode directly, the strategy here is that if you need case transformations, you can link LibUnicode and receive them. If you try to use either of these methods without linking it, then you'll of course get a linker error (note we don't do any fallbacks to e.g. ASCII case transformations). If you don't need these methods, you don't have to link LibUnicode. --- AK/String.h | 6 ++++ Meta/Lagom/CMakeLists.txt | 1 + Tests/AK/CMakeLists.txt | 2 ++ Tests/AK/TestString.cpp | 38 ++++++++++++++++++++ Userland/Libraries/LibUnicode/CMakeLists.txt | 1 + Userland/Libraries/LibUnicode/String.cpp | 29 +++++++++++++++ 6 files changed, 77 insertions(+) create mode 100644 Userland/Libraries/LibUnicode/String.cpp diff --git a/AK/String.h b/AK/String.h index 01c4c3cc1b1..7a8eecce15c 100644 --- a/AK/String.h +++ b/AK/String.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,11 @@ public: // Creates a new String from a sequence of UTF-8 encoded code points. static ErrorOr from_utf8(StringView); + // Creates a new String by transforming this String to lower- or uppercase. Using these methods + // require linking LibUnicode into your application. + ErrorOr to_lowercase(Optional const& locale = {}) const; + ErrorOr to_uppercase(Optional const& locale = {}) const; + // Creates a substring with a deep copy of the specified data window. ErrorOr substring_from_byte_offset(size_t start, size_t byte_count) const; diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index 152981f46f3..e9c2f4c01c1 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -558,6 +558,7 @@ if (BUILD_LAGOM) foreach(source ${AK_TEST_SOURCES}) lagom_test(${source} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../../Tests/AK) endforeach() + target_link_libraries(TestString LibUnicode) # LibAudio file(GLOB LIBAUDIO_TEST_SOURCES CONFIGURE_DEPENDS "../../Tests/LibAudio/*.cpp") diff --git a/Tests/AK/CMakeLists.txt b/Tests/AK/CMakeLists.txt index e7e0c0855f8..957304cb778 100644 --- a/Tests/AK/CMakeLists.txt +++ b/Tests/AK/CMakeLists.txt @@ -86,3 +86,5 @@ set(AK_TEST_SOURCES foreach(source IN LISTS AK_TEST_SOURCES) serenity_test("${source}" AK) endforeach() + +target_link_libraries(TestString PRIVATE LibUnicode) diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index 3cc88317a9a..3faaa3b6548 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -107,3 +107,41 @@ TEST_CASE(replace) EXPECT_EQ(result, "anon@courage:~"sv); } } + +TEST_CASE(to_lowercase) +{ + { + auto string = MUST(String::from_utf8("Aa"sv)); + auto result = MUST(string.to_lowercase()); + EXPECT_EQ(result, "aa"sv); + } + { + auto string = MUST(String::from_utf8("Ωω"sv)); + auto result = MUST(string.to_lowercase()); + EXPECT_EQ(result, "ωω"sv); + } + { + auto string = MUST(String::from_utf8("İi̇"sv)); + auto result = MUST(string.to_lowercase()); + EXPECT_EQ(result, "i̇i̇"sv); + } +} + +TEST_CASE(to_uppercase) +{ + { + auto string = MUST(String::from_utf8("Aa"sv)); + auto result = MUST(string.to_uppercase()); + EXPECT_EQ(result, "AA"sv); + } + { + auto string = MUST(String::from_utf8("Ωω"sv)); + auto result = MUST(string.to_uppercase()); + EXPECT_EQ(result, "ΩΩ"sv); + } + { + auto string = MUST(String::from_utf8("ʼn"sv)); + auto result = MUST(string.to_uppercase()); + EXPECT_EQ(result, "ʼN"sv); + } +} diff --git a/Userland/Libraries/LibUnicode/CMakeLists.txt b/Userland/Libraries/LibUnicode/CMakeLists.txt index 109307cad96..27f92018159 100644 --- a/Userland/Libraries/LibUnicode/CMakeLists.txt +++ b/Userland/Libraries/LibUnicode/CMakeLists.txt @@ -5,6 +5,7 @@ set(SOURCES CurrencyCode.cpp Emoji.cpp Normalize.cpp + String.cpp UnicodeUtils.cpp ${UNICODE_DATA_SOURCES} ) diff --git a/Userland/Libraries/LibUnicode/String.cpp b/Userland/Libraries/LibUnicode/String.cpp new file mode 100644 index 00000000000..4cbc47d5ad8 --- /dev/null +++ b/Userland/Libraries/LibUnicode/String.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include +#include + +// This file contains definitions of AK::String methods which require UCD data. + +namespace AK { + +ErrorOr String::to_lowercase(Optional const& locale) const +{ + StringBuilder builder; + TRY(Unicode::Detail::build_lowercase_string(code_points(), builder, locale)); + return builder.to_string(); +} + +ErrorOr String::to_uppercase(Optional const& locale) const +{ + StringBuilder builder; + TRY(Unicode::Detail::build_uppercase_string(code_points(), builder, locale)); + return builder.to_string(); +} + +}