From 21cff645a220221cd5fed623a4baf35acfa3fe4c Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 2 Jun 2025 14:15:05 -0400 Subject: [PATCH] LibUnicode: Use ICU to convert Unicode keywords to their BCP 47 value We were manually doing this for the calendar keyword, and would need to do so for the collation keyword as well. I wasn't aware of this API originally, so let's start using it. --- Libraries/LibUnicode/ICU.h | 22 ++++++++++++++-------- Libraries/LibUnicode/TimeZone.cpp | 2 +- Libraries/LibUnicode/UnicodeKeywords.cpp | 15 +++------------ 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/Libraries/LibUnicode/ICU.h b/Libraries/LibUnicode/ICU.h index a436a378a68..35a562c7faf 100644 --- a/Libraries/LibUnicode/ICU.h +++ b/Libraries/LibUnicode/ICU.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, Tim Flynn + * Copyright (c) 2024-2025, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -102,7 +103,7 @@ String icu_string_to_string(icu::UnicodeString const& string); String icu_string_to_string(UChar const*, i32 length); template -Vector icu_string_enumeration_to_list(OwnPtr enumeration, Filter&& filter) +Vector icu_string_enumeration_to_list(OwnPtr enumeration, char const* bcp47_keyword, Filter&& filter) { UErrorCode status = U_ZERO_ERROR; Vector result; @@ -112,23 +113,28 @@ Vector icu_string_enumeration_to_list(OwnPtr enu while (true) { i32 length = 0; - auto const* keyword = enumeration->next(&length, status); + auto const* value = enumeration->next(&length, status); - if (icu_failure(status) || keyword == nullptr) + if (icu_failure(status) || value == nullptr) break; - if (!filter(keyword)) + if (!filter(value)) continue; - result.append(MUST(String::from_utf8({ keyword, static_cast(length) }))); + if (bcp47_keyword) { + if (auto const* bcp47_value = uloc_toUnicodeLocaleType(bcp47_keyword, value)) + result.append(MUST(String::from_utf8({ bcp47_value, strlen(bcp47_value) }))); + } else { + result.append(MUST(String::from_utf8({ value, static_cast(length) }))); + } } return result; } -ALWAYS_INLINE Vector icu_string_enumeration_to_list(OwnPtr enumeration) +ALWAYS_INLINE Vector icu_string_enumeration_to_list(OwnPtr enumeration, char const* bcp47_keyword) { - return icu_string_enumeration_to_list(move(enumeration), [](char const*) { return true; }); + return icu_string_enumeration_to_list(move(enumeration), bcp47_keyword, [](char const*) { return true; }); } } diff --git a/Libraries/LibUnicode/TimeZone.cpp b/Libraries/LibUnicode/TimeZone.cpp index acd43715c8d..3a4409a5481 100644 --- a/Libraries/LibUnicode/TimeZone.cpp +++ b/Libraries/LibUnicode/TimeZone.cpp @@ -107,7 +107,7 @@ static Vector icu_available_time_zones(Optional const& regio if (icu_failure(status)) return { "UTC"_string }; - auto time_zones = icu_string_enumeration_to_list(move(time_zone_enumerator), [](char const* zone) { + auto time_zones = icu_string_enumeration_to_list(move(time_zone_enumerator), nullptr, [](char const* zone) { return !is_legacy_non_iana_time_zone({ zone, strlen(zone) }); }); diff --git a/Libraries/LibUnicode/UnicodeKeywords.cpp b/Libraries/LibUnicode/UnicodeKeywords.cpp index 048a6092d2c..519b036fe4e 100644 --- a/Libraries/LibUnicode/UnicodeKeywords.cpp +++ b/Libraries/LibUnicode/UnicodeKeywords.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, Tim Flynn + * Copyright (c) 2024-2025, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -59,16 +59,7 @@ Vector available_calendars(StringView locale) if (icu_failure(status)) return {}; - auto calendars = icu_string_enumeration_to_list(move(keywords)); - - for (auto& calendar : calendars) { - if (calendar == "gregorian"sv) - calendar = "gregory"_string; - else if (calendar == "ethiopic-amete-alem"sv) - calendar = "ethioaa"_string; - } - - return calendars; + return icu_string_enumeration_to_list(move(keywords), "ca"); } Vector const& available_currencies() @@ -162,7 +153,7 @@ Vector const& available_number_systems() if (icu_failure(status)) return {}; - auto number_systems = icu_string_enumeration_to_list(move(keywords), [&](char const* keyword) { + auto number_systems = icu_string_enumeration_to_list(move(keywords), "nu", [&](char const* keyword) { auto system = adopt_own_if_nonnull(icu::NumberingSystem::createInstanceByName(keyword, status)); if (icu_failure(status)) return false;