diff --git a/Meta/CMake/locale_data.cmake b/Meta/CMake/locale_data.cmake index 618f8ebed5c..9ef4b83f35d 100644 --- a/Meta/CMake/locale_data.cmake +++ b/Meta/CMake/locale_data.cmake @@ -75,7 +75,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD) "${CLDR_VERSION_FILE}" "${LOCALE_DATA_HEADER}" "${LOCALE_DATA_IMPLEMENTATION}" - arguments -b "${CLDR_BCP47_PATH}" -r "${CLDR_CORE_PATH}" -l "${CLDR_LOCALES_PATH}" -m "${CLDR_MISC_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}" + arguments -b "${CLDR_BCP47_PATH}" -r "${CLDR_CORE_PATH}" -m "${CLDR_MISC_PATH}" -n "${CLDR_NUMBERS_PATH}" -d "${CLDR_DATES_PATH}" ) invoke_generator( "NumberFormatData" diff --git a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp index 426816e8440..65e220a3c3e 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibLocale/GenerateLocaleData.cpp @@ -33,38 +33,6 @@ static ByteString format_identifier(StringView owner, ByteString identifier) return identifier; } -struct DisplayPattern { - unsigned hash() const - { - return pair_int_hash(locale_pattern, locale_separator); - } - - bool operator==(DisplayPattern const& other) const - { - return (locale_pattern == other.locale_pattern) - && (locale_separator == other.locale_separator); - } - - size_t locale_pattern { 0 }; - size_t locale_separator { 0 }; -}; - -template<> -struct AK::Formatter : Formatter { - ErrorOr format(FormatBuilder& builder, DisplayPattern const& patterns) - { - return Formatter::format(builder, - "{{ {}, {} }}"sv, - patterns.locale_pattern, - patterns.locale_separator); - } -}; - -template<> -struct AK::Traits : public DefaultTraits { - static unsigned hash(DisplayPattern const& p) { return p.hash(); } -}; - struct ListPatterns { unsigned hash() const { @@ -143,31 +111,10 @@ struct AK::Traits : public DefaultTraits { static unsigned hash(TextLayout const& t) { return t.hash(); } }; -using LanguageList = Vector; -using TerritoryList = Vector; -using ScriptList = Vector; -using CurrencyList = Vector; -using CalendarList = Vector; -using DateFieldList = Vector; using KeywordList = Vector; using ListPatternList = Vector; struct LocaleData { - ByteString language; - Optional territory; - Optional variant; - size_t display_patterns { 0 }; - size_t languages { 0 }; - size_t territories { 0 }; - size_t scripts { 0 }; - size_t long_currencies { 0 }; - size_t short_currencies { 0 }; - size_t narrow_currencies { 0 }; - size_t numeric_currencies { 0 }; - size_t calendars { 0 }; - size_t long_date_fields { 0 }; - size_t short_date_fields { 0 }; - size_t narrow_date_fields { 0 }; size_t calendar_keywords { 0 }; size_t collation_case_keywords { 0 }; size_t collation_numeric_keywords { 0 }; @@ -183,13 +130,6 @@ struct LanguageMapping { struct CLDR { UniqueStringStorage unique_strings; - UniqueStorage unique_display_patterns; - UniqueStorage unique_language_lists; - UniqueStorage unique_territory_lists; - UniqueStorage unique_script_lists; - UniqueStorage unique_currency_lists; - UniqueStorage unique_calendar_lists; - UniqueStorage unique_date_field_lists; UniqueStorage unique_keyword_lists; UniqueStorage unique_list_patterns; UniqueStorage unique_list_pattern_lists; @@ -198,44 +138,12 @@ struct CLDR { HashMap locales; Vector locale_aliases; - Vector languages; - HashMap language_indices; - - Vector territories; - HashMap territory_indices; - - Vector scripts; - HashMap script_indices; - - Vector variants; - HashMap variant_indices; - - Vector currencies; - HashMap currency_indices; - - Vector date_fields; - HashMap date_fields_indices; - - Vector date_field_aliases { - // ECMA-402 and the CLDR refer to some date fields with different names. Defining these aliases - // means we can remain agnostic about the naming differences elsewhere. - { "dayperiod"sv, "dayPeriod"sv }, - { "week"sv, "weekOfYear"sv }, - { "zone"sv, "timeZoneName"sv }, - }; - HashMap> keywords; HashMap> keyword_aliases; HashMap keyword_names; Vector list_pattern_types; Vector character_orders; - HashMap language_aliases; - HashMap territory_aliases; - HashMap script_aliases; - HashMap variant_aliases; - HashMap subdivision_aliases; - Vector complex_mappings; Vector likely_subtags; size_t max_variant_size { 0 }; }; @@ -273,40 +181,6 @@ static ErrorOr parse_language_mapping(CLDR& cldr, StringView ke return LanguageMapping { move(parsed_key), move(parsed_alias) }; } -static ErrorOr parse_core_aliases(ByteString core_supplemental_path, CLDR& cldr) -{ - LexicalPath core_aliases_path(move(core_supplemental_path)); - core_aliases_path = core_aliases_path.append("aliases.json"sv); - - auto core_aliases = TRY(read_json_file(core_aliases_path.string())); - auto const& supplemental_object = core_aliases.as_object().get_object("supplemental"sv).value(); - auto const& metadata_object = supplemental_object.get_object("metadata"sv).value(); - auto const& alias_object = metadata_object.get_object("alias"sv).value(); - - auto append_aliases = [&](auto& alias_object, auto& alias_map) { - alias_object.for_each_member([&](auto const& key, JsonValue const& value) { - auto alias = value.as_object().get_byte_string("_replacement"sv).value(); - - if (key.contains('-')) { - auto mapping = TRY_OR_DISCARD(parse_language_mapping(cldr, key, alias)); - cldr.max_variant_size = max(mapping.key.variants.size(), cldr.max_variant_size); - cldr.max_variant_size = max(mapping.alias.variants.size(), cldr.max_variant_size); - cldr.complex_mappings.append(move(mapping)); - } else { - alias_map.set(key, cldr.unique_strings.ensure(alias)); - } - }); - }; - - append_aliases(alias_object.get_object("languageAlias"sv).value(), cldr.language_aliases); - append_aliases(alias_object.get_object("territoryAlias"sv).value(), cldr.territory_aliases); - append_aliases(alias_object.get_object("scriptAlias"sv).value(), cldr.script_aliases); - append_aliases(alias_object.get_object("variantAlias"sv).value(), cldr.variant_aliases); - append_aliases(alias_object.get_object("subdivisionAlias"sv).value(), cldr.subdivision_aliases); - - return {}; -} - static ErrorOr parse_likely_subtags(ByteString core_supplemental_path, CLDR& cldr) { LexicalPath likely_subtags_path(move(core_supplemental_path)); @@ -326,149 +200,6 @@ static ErrorOr parse_likely_subtags(ByteString core_supplemental_path, CLD return {}; } -static ErrorOr parse_identity(ByteString locale_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath locale_display_names_path(move(locale_path)); // Note: Every JSON file defines identity data, so we can use any of them. - locale_display_names_path = locale_display_names_path.append("localeDisplayNames.json"sv); - - auto const& locale_display_names = *TRY(read_json_file_with_cache(locale_display_names_path.string())); - auto const& main_object = locale_display_names.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(locale_display_names_path.parent().basename()).value(); - auto const& identity_object = locale_object.get_object("identity"sv).value(); - auto const& language_string = identity_object.get_byte_string("language"sv).value(); - auto const& territory_string = identity_object.get_byte_string("territory"sv); - auto const& script_string = identity_object.get_byte_string("script"sv); - auto const& variant_string = identity_object.get_byte_string("variant"sv); - - locale.language = language_string; - - if (territory_string.has_value()) { - locale.territory = territory_string.value(); - - if (!cldr.territory_indices.contains(*locale.territory)) { - cldr.territory_indices.set(*locale.territory, 0); - cldr.territories.append(*locale.territory); - } - } - - if (script_string.has_value()) { - auto const& script = script_string.value(); - - if (!cldr.script_indices.contains(script)) { - cldr.script_indices.set(script, 0); - cldr.scripts.append(script); - } - } - - if (variant_string.has_value()) { - locale.variant = variant_string.value(); - - if (!cldr.variant_indices.contains(*locale.variant)) { - cldr.variant_indices.set(*locale.variant, 0); - cldr.variants.append(*locale.variant); - } - } - - return {}; -} - -static ErrorOr parse_locale_display_patterns(ByteString locale_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath locale_display_names_path(move(locale_path)); - locale_display_names_path = locale_display_names_path.append("localeDisplayNames.json"sv); - - auto const& locale_display_names = *TRY(read_json_file_with_cache(locale_display_names_path.string())); - auto const& main_object = locale_display_names.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(locale_display_names_path.parent().basename()).value(); - auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); - auto const& locale_display_patterns_object = locale_display_names_object.get_object("localeDisplayPattern"sv).value(); - auto const& locale_pattern = locale_display_patterns_object.get_byte_string("localePattern"sv).value(); - auto const& locale_separator = locale_display_patterns_object.get_byte_string("localeSeparator"sv).value(); - - DisplayPattern patterns {}; - patterns.locale_pattern = cldr.unique_strings.ensure(locale_pattern); - patterns.locale_separator = cldr.unique_strings.ensure(locale_separator); - - locale.display_patterns = cldr.unique_display_patterns.ensure(move(patterns)); - return {}; -} - -static ErrorOr preprocess_languages(ByteString locale_path, CLDR& cldr) -{ - LexicalPath languages_path(move(locale_path)); - languages_path = languages_path.append("languages.json"sv); - - if (!FileSystem::exists(languages_path.string())) - return {}; - - auto const& locale_languages = *TRY(read_json_file_with_cache(languages_path.string())); - auto const& main_object = locale_languages.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(languages_path.parent().basename()).value(); - auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); - auto const& languages_object = locale_display_names_object.get_object("languages"sv).value(); - - languages_object.for_each_member([&](auto const& key, auto const&) { - if (!key.contains("-alt-"sv) && !cldr.language_indices.contains(key)) { - cldr.language_indices.set(key, 0); - cldr.languages.append(key); - } - }); - - return {}; -} - -static ErrorOr preprocess_currencies(ByteString numbers_path, CLDR& cldr) -{ - LexicalPath currencies_path(move(numbers_path)); - currencies_path = currencies_path.append("currencies.json"sv); - - auto const& locale_currencies = *TRY(read_json_file_with_cache(currencies_path.string())); - auto const& main_object = locale_currencies.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(currencies_path.parent().basename()).value(); - auto const& locale_numbers_object = locale_object.get_object("numbers"sv).value(); - auto const& currencies_object = locale_numbers_object.get_object("currencies"sv).value(); - - currencies_object.for_each_member([&](auto const& key, JsonValue const&) { - if (!cldr.currency_indices.contains(key)) { - cldr.currency_indices.set(key, 0); - cldr.currencies.append(key); - } - }); - - return {}; -} - -static bool is_sanctioned_date_field(StringView field) -{ - // This is a copy of the units sanctioned for use within ECMA-402, with names adjusted for the names used by the CLDR. - // https://tc39.es/ecma402/#table-validcodeforDateField - return field.is_one_of("era"sv, "year"sv, "quarter"sv, "month"sv, "week"sv, "weekday"sv, "day"sv, "dayperiod"sv, "hour"sv, "minute"sv, "second"sv, "zone"sv); -} - -static ErrorOr preprocess_date_fields(ByteString dates_path, CLDR& cldr) -{ - LexicalPath date_fields_path(move(dates_path)); - date_fields_path = date_fields_path.append("dateFields.json"sv); - - auto const& locale_date_fields = *TRY(read_json_file_with_cache(date_fields_path.string())); - auto const& main_object = locale_date_fields.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(date_fields_path.parent().basename()).value(); - auto const& dates_object = locale_object.get_object("dates"sv).value(); - auto const& fields_object = dates_object.get_object("fields"sv).value(); - - fields_object.for_each_member([&](auto const& key, JsonValue const&) { - if (!is_sanctioned_date_field(key)) - return; - - if (!cldr.date_fields_indices.contains(key)) { - cldr.date_fields_indices.set(key, 0); - cldr.date_fields.append(key); - } - }); - - return {}; -} - static ErrorOr parse_unicode_extension_keywords(ByteString bcp47_path, CLDR& cldr) { constexpr auto desired_keywords = Array { "ca"sv, "co"sv, "hc"sv, "kf"sv, "kn"sv, "nu"sv }; @@ -533,102 +264,6 @@ static Optional find_keyword_alias(StringView key, StringView calend return alias->name; } -static ErrorOr parse_locale_languages(ByteString locale_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath languages_path(move(locale_path)); - languages_path = languages_path.append("languages.json"sv); - - LanguageList languages; - languages.resize(cldr.languages.size()); - - if (!FileSystem::exists(languages_path.string())) { - for (size_t i = 0; i < languages.size(); ++i) - languages[i] = cldr.unique_strings.ensure(cldr.languages[i]); - - locale.languages = cldr.unique_language_lists.ensure(move(languages)); - return {}; - } - - auto const& locale_languages = *TRY(read_json_file_with_cache(languages_path.string())); - auto const& main_object = locale_languages.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(languages_path.parent().basename()).value(); - auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); - auto const& languages_object = locale_display_names_object.get_object("languages"sv).value(); - - languages_object.for_each_member([&](auto const& key, JsonValue const& value) { - if (key.contains("-alt-"sv)) - return; - - auto index = cldr.language_indices.get(key).value(); - languages[index] = cldr.unique_strings.ensure(value.as_string()); - }); - - locale.languages = cldr.unique_language_lists.ensure(move(languages)); - return {}; -} - -static ErrorOr parse_locale_territories(ByteString locale_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath territories_path(move(locale_path)); - territories_path = territories_path.append("territories.json"sv); - - TerritoryList territories; - territories.resize(cldr.territories.size()); - - if (!FileSystem::exists(territories_path.string())) { - for (size_t i = 0; i < territories.size(); ++i) - territories[i] = cldr.unique_strings.ensure(cldr.territories[i]); - - locale.territories = cldr.unique_territory_lists.ensure(move(territories)); - return {}; - } - - auto locale_territories = TRY(read_json_file(territories_path.string())); - auto const& main_object = locale_territories.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(territories_path.parent().basename()).value(); - auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); - auto const& territories_object = locale_display_names_object.get_object("territories"sv).value(); - - territories_object.for_each_member([&](auto const& key, JsonValue const& value) { - if (auto index = cldr.territory_indices.get(key); index.has_value()) - territories[*index] = cldr.unique_strings.ensure(value.as_string()); - }); - - locale.territories = cldr.unique_territory_lists.ensure(move(territories)); - return {}; -} - -static ErrorOr parse_locale_scripts(ByteString locale_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath scripts_path(move(locale_path)); - scripts_path = scripts_path.append("scripts.json"sv); - - ScriptList scripts; - scripts.resize(cldr.scripts.size()); - - if (!FileSystem::exists(scripts_path.string())) { - for (size_t i = 0; i < scripts.size(); ++i) - scripts[i] = cldr.unique_strings.ensure(cldr.scripts[i]); - - locale.scripts = cldr.unique_script_lists.ensure(move(scripts)); - return {}; - } - - auto locale_scripts = TRY(read_json_file(scripts_path.string())); - auto const& main_object = locale_scripts.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(scripts_path.parent().basename()).value(); - auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); - auto const& scripts_object = locale_display_names_object.get_object("scripts"sv).value(); - - scripts_object.for_each_member([&](auto const& key, JsonValue const& value) { - if (auto index = cldr.script_indices.get(key); index.has_value()) - scripts[*index] = cldr.unique_strings.ensure(value.as_string()); - }); - - locale.scripts = cldr.unique_script_lists.ensure(move(scripts)); - return {}; -} - static ErrorOr parse_locale_list_patterns(ByteString misc_path, CLDR& cldr, LocaleData& locale) { LexicalPath list_patterns_path(move(misc_path)); @@ -711,124 +346,6 @@ static ErrorOr parse_locale_layout(ByteString misc_path, CLDR& cldr, Local return {}; } -static ErrorOr parse_locale_currencies(ByteString numbers_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath currencies_path(move(numbers_path)); - currencies_path = currencies_path.append("currencies.json"sv); - - auto const& locale_currencies = *TRY(read_json_file_with_cache(currencies_path.string())); - auto const& main_object = locale_currencies.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(currencies_path.parent().basename()).value(); - auto const& locale_numbers_object = locale_object.get_object("numbers"sv).value(); - auto const& currencies_object = locale_numbers_object.get_object("currencies"sv).value(); - - CurrencyList long_currencies {}; - long_currencies.resize(cldr.currencies.size()); - - CurrencyList short_currencies {}; - short_currencies.resize(cldr.currencies.size()); - - CurrencyList narrow_currencies {}; - narrow_currencies.resize(cldr.currencies.size()); - - CurrencyList numeric_currencies {}; - numeric_currencies.resize(cldr.currencies.size()); - - currencies_object.for_each_member([&](auto const& key, JsonValue const& value) { - auto long_name = value.as_object().get_byte_string("displayName"sv).value_or(key); - auto short_name = value.as_object().get_byte_string("symbol"sv).value_or(key); - auto narrow_name = value.as_object().get_byte_string("symbol-alt-narrow"sv); - auto numeric_name = value.as_object().get_byte_string("displayName-count-other"sv); - - auto index = cldr.currency_indices.get(key).value(); - long_currencies[index] = cldr.unique_strings.ensure(move(long_name)); - short_currencies[index] = cldr.unique_strings.ensure(move(short_name)); - narrow_currencies[index] = narrow_name.has_value() ? cldr.unique_strings.ensure(narrow_name.release_value()) : 0; - numeric_currencies[index] = numeric_name.has_value() ? cldr.unique_strings.ensure(numeric_name.release_value()) : long_currencies[index]; - }); - - locale.long_currencies = cldr.unique_currency_lists.ensure(move(long_currencies)); - locale.short_currencies = cldr.unique_currency_lists.ensure(move(short_currencies)); - locale.narrow_currencies = cldr.unique_currency_lists.ensure(move(narrow_currencies)); - locale.numeric_currencies = cldr.unique_currency_lists.ensure(move(numeric_currencies)); - return {}; -} - -static ErrorOr parse_locale_calendars(ByteString locale_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath locale_display_names_path(move(locale_path)); - locale_display_names_path = locale_display_names_path.append("localeDisplayNames.json"sv); - - auto const& locale_display_names = *TRY(read_json_file_with_cache(locale_display_names_path.string())); - auto const& main_object = locale_display_names.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(locale_display_names_path.parent().basename()).value(); - auto const& locale_display_names_object = locale_object.get_object("localeDisplayNames"sv).value(); - - if (!locale_display_names_object.has_object("types"sv)) - return {}; - - auto const& types_object = locale_display_names_object.get_object("types"sv).value(); - auto const& calendar_object = types_object.get_object("calendar"sv).value(); - - auto const& supported_calendars = cldr.keywords.find("ca"sv)->value; - - CalendarList calendars; - calendars.resize(supported_calendars.size()); - - calendar_object.for_each_member([&](auto const& key, auto const& calendar) { - auto index = supported_calendars.find_first_index(key); - if (!index.has_value()) { - auto alias = find_keyword_alias("ca"sv, key, cldr); - index = supported_calendars.find_first_index(*alias); - } - - calendars[*index] = cldr.unique_strings.ensure(calendar.as_string()); - }); - - locale.calendars = cldr.unique_calendar_lists.ensure(move(calendars)); - return {}; -} - -static ErrorOr parse_locale_date_fields(ByteString dates_path, CLDR& cldr, LocaleData& locale) -{ - LexicalPath date_fields_path(move(dates_path)); - date_fields_path = date_fields_path.append("dateFields.json"sv); - - auto const& locale_date_fields = *TRY(read_json_file_with_cache(date_fields_path.string())); - auto const& main_object = locale_date_fields.as_object().get_object("main"sv).value(); - auto const& locale_object = main_object.get_object(date_fields_path.parent().basename()).value(); - auto const& dates_object = locale_object.get_object("dates"sv).value(); - auto const& fields_object = dates_object.get_object("fields"sv).value(); - - DateFieldList long_date_fields {}; - long_date_fields.resize(cldr.date_fields.size()); - - DateFieldList short_date_fields {}; - short_date_fields.resize(cldr.date_fields.size()); - - DateFieldList narrow_date_fields {}; - narrow_date_fields.resize(cldr.date_fields.size()); - - fields_object.for_each_member([&](auto const& key, JsonValue const& value) { - if (!is_sanctioned_date_field(key)) - return; - - auto const& long_name = value.as_object().get_byte_string("displayName"sv).value(); - auto const& short_name = fields_object.get_object(ByteString::formatted("{}-short", key))->get_byte_string("displayName"sv).value(); - auto const& narrow_name = fields_object.get_object(ByteString::formatted("{}-narrow", key))->get_byte_string("displayName"sv).value(); - - auto index = cldr.date_fields_indices.get(key).value(); - long_date_fields[index] = cldr.unique_strings.ensure(long_name); - short_date_fields[index] = cldr.unique_strings.ensure(short_name); - narrow_date_fields[index] = cldr.unique_strings.ensure(narrow_name); - }); - - locale.long_date_fields = cldr.unique_date_field_lists.ensure(move(long_date_fields)); - locale.short_date_fields = cldr.unique_date_field_lists.ensure(move(short_date_fields)); - locale.narrow_date_fields = cldr.unique_date_field_lists.ensure(move(narrow_date_fields)); - return {}; -} - static ErrorOr parse_number_system_keywords(ByteString locale_numbers_path, CLDR& cldr, LocaleData& locale) { LexicalPath numbers_path(move(locale_numbers_path)); @@ -1008,13 +525,12 @@ static ErrorOr define_aliases_without_scripts(CLDR& cldr) return {}; } -static ErrorOr parse_all_locales(ByteString bcp47_path, ByteString core_path, ByteString locale_names_path, ByteString misc_path, ByteString numbers_path, ByteString dates_path, CLDR& cldr) +static ErrorOr parse_all_locales(ByteString bcp47_path, ByteString core_path, ByteString misc_path, ByteString numbers_path, ByteString dates_path, CLDR& cldr) { LexicalPath core_supplemental_path(core_path); core_supplemental_path = core_supplemental_path.append("supplemental"sv); VERIFY(FileSystem::is_directory(core_supplemental_path.string())); - TRY(parse_core_aliases(core_supplemental_path.string(), cldr)); TRY(parse_likely_subtags(core_supplemental_path.string(), cldr)); auto remove_variants_from_path = [&](ByteString path) -> ErrorOr { @@ -1030,66 +546,12 @@ static ErrorOr parse_all_locales(ByteString bcp47_path, ByteString core_pa return builder.to_byte_string(); }; - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", locale_names_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { - auto locale_path = LexicalPath::join(directory.path().string(), entry.name).string(); - auto language = TRY(remove_variants_from_path(locale_path)); - - auto& locale = cldr.locales.ensure(language); - TRY(parse_identity(locale_path, cldr, locale)); - return IterationDecision::Continue; - })); - - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", locale_names_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { - auto locale_path = LexicalPath::join(directory.path().string(), entry.name).string(); - TRY(preprocess_languages(locale_path, cldr)); - return IterationDecision::Continue; - })); - - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", numbers_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { - auto numbers_path = LexicalPath::join(directory.path().string(), entry.name).string(); - TRY(preprocess_currencies(numbers_path, cldr)); - return IterationDecision::Continue; - })); - - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", dates_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { - auto dates_path = LexicalPath::join(directory.path().string(), entry.name).string(); - TRY(preprocess_date_fields(dates_path, cldr)); - return IterationDecision::Continue; - })); - - auto update_indices = [](auto& keys, auto& indices) { - quick_sort(keys); - - for (size_t i = 0; i < keys.size(); ++i) - indices.set(keys[i], i); - }; - - update_indices(cldr.languages, cldr.language_indices); - update_indices(cldr.territories, cldr.territory_indices); - update_indices(cldr.scripts, cldr.script_indices); - update_indices(cldr.variants, cldr.variant_indices); - update_indices(cldr.currencies, cldr.currency_indices); - update_indices(cldr.date_fields, cldr.date_fields_indices); - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/bcp47", bcp47_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { auto bcp47_path = LexicalPath::join(directory.path().string(), entry.name).string(); TRY(parse_unicode_extension_keywords(move(bcp47_path), cldr)); return IterationDecision::Continue; })); - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", locale_names_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { - auto locale_path = LexicalPath::join(directory.path().string(), entry.name).string(); - auto language = TRY(remove_variants_from_path(locale_path)); - - auto& locale = cldr.locales.ensure(language); - TRY(parse_locale_display_patterns(locale_path, cldr, locale)); - TRY(parse_locale_languages(locale_path, cldr, locale)); - TRY(parse_locale_territories(locale_path, cldr, locale)); - TRY(parse_locale_scripts(locale_path, cldr, locale)); - TRY(parse_locale_calendars(locale_path, cldr, locale)); - return IterationDecision::Continue; - })); - TRY(Core::Directory::for_each_entry(TRY(String::formatted("{}/main", misc_path)), Core::DirIterator::SkipParentAndBaseDir, [&](auto& entry, auto& directory) -> ErrorOr { auto misc_path = LexicalPath::join(directory.path().string(), entry.name).string(); auto language = TRY(remove_variants_from_path(misc_path)); @@ -1105,7 +567,6 @@ static ErrorOr parse_all_locales(ByteString bcp47_path, ByteString core_pa auto language = TRY(remove_variants_from_path(numbers_path)); auto& locale = cldr.locales.ensure(language); - TRY(parse_locale_currencies(numbers_path, cldr, locale)); TRY(parse_number_system_keywords(numbers_path, cldr, locale)); fill_in_collation_keywords(cldr, locale); return IterationDecision::Continue; @@ -1116,7 +577,6 @@ static ErrorOr parse_all_locales(ByteString bcp47_path, ByteString core_pa auto language = TRY(remove_variants_from_path(dates_path)); auto& locale = cldr.locales.ensure(language); - TRY(parse_locale_date_fields(dates_path, cldr, locale)); TRY(parse_calendar_keywords(dates_path, cldr, locale)); return IterationDecision::Continue; })); @@ -1144,12 +604,6 @@ namespace Locale { auto keywords = cldr.keywords.keys(); generate_enum(generator, format_identifier, "Locale"sv, "None"sv, locales, cldr.locale_aliases); - generate_enum(generator, format_identifier, "Language"sv, {}, cldr.languages); - generate_enum(generator, format_identifier, "Territory"sv, {}, cldr.territories); - generate_enum(generator, format_identifier, "ScriptTag"sv, {}, cldr.scripts); - generate_enum(generator, format_identifier, "Currency"sv, {}, cldr.currencies); - generate_enum(generator, format_identifier, "DateField"sv, {}, cldr.date_fields, cldr.date_field_aliases); - generate_enum(generator, format_identifier, "Variant"sv, {}, cldr.variants); generate_enum(generator, format_identifier, "ListPatternType"sv, {}, cldr.list_pattern_types); generate_enum(generator, format_identifier, "CharacterOrder"sv, {}, cldr.character_orders); generate_enum(generator, format_identifier, "Key"sv, {}, keywords); @@ -1180,7 +634,6 @@ static ErrorOr generate_unicode_locale_implementation(Core::InputBufferedF SourceGenerator generator { builder }; generator.set("string_index_type"sv, string_index_type); generator.set("locales_size"sv, ByteString::number(cldr.locales.size())); - generator.set("territories_size", ByteString::number(cldr.territories.size())); generator.set("variants_size", ByteString::number(cldr.max_variant_size)); generator.append(R"~~~( @@ -1202,20 +655,6 @@ namespace Locale { cldr.unique_strings.generate(generator); generator.append(R"~~~( -struct DisplayPatternImpl { - DisplayPattern to_display_pattern() const - { - DisplayPattern display_patterns {}; - display_patterns.locale_pattern = decode_string(locale_pattern); - display_patterns.locale_separator = decode_string(locale_separator); - - return display_patterns; - } - - @string_index_type@ locale_pattern { 0 }; - @string_index_type@ locale_separator { 0 }; -}; - struct Patterns { ListPatternType type; Style style; @@ -1244,7 +683,6 @@ struct TextLayout { }); generate_available_values(generator, "get_available_hour_cycles"sv, cldr.keywords.find("hc"sv)->value); generate_available_values(generator, "get_available_number_systems"sv, cldr.keywords.find("nu"sv)->value); - generate_available_values(generator, "get_available_currencies"sv, cldr.currencies); generator.append(R"~~~( ReadonlySpan get_available_keyword_values(StringView key) @@ -1272,13 +710,6 @@ ReadonlySpan get_available_keyword_values(StringView key) } )~~~"); - cldr.unique_display_patterns.generate(generator, "DisplayPatternImpl"sv, "s_display_patterns"sv, 30); - cldr.unique_language_lists.generate(generator, string_index_type, "s_language_lists"sv); - cldr.unique_territory_lists.generate(generator, string_index_type, "s_territory_lists"sv); - cldr.unique_script_lists.generate(generator, string_index_type, "s_script_lists"sv); - cldr.unique_currency_lists.generate(generator, string_index_type, "s_currency_lists"sv); - cldr.unique_calendar_lists.generate(generator, string_index_type, "s_calendar_lists"sv); - cldr.unique_date_field_lists.generate(generator, string_index_type, "s_date_field_lists"sv); cldr.unique_keyword_lists.generate(generator, string_index_type, "s_keyword_lists"sv); cldr.unique_list_patterns.generate(generator, "Patterns"sv, "s_list_patterns"sv, 10); cldr.unique_list_pattern_lists.generate(generator, cldr.unique_list_patterns.type_that_fits(), "s_list_pattern_lists"sv); @@ -1328,18 +759,6 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~"); auto locales = cldr.locales.keys(); quick_sort(locales); - append_mapping(locales, cldr.locales, cldr.unique_display_patterns.type_that_fits(), "s_locale_display_patterns"sv, [&](auto const& locale) { return locale.display_patterns; }); - append_mapping(locales, cldr.locales, cldr.unique_language_lists.type_that_fits(), "s_languages"sv, [&](auto const& locale) { return locale.languages; }); - append_mapping(locales, cldr.locales, cldr.unique_territory_lists.type_that_fits(), "s_territories"sv, [&](auto const& locale) { return locale.territories; }); - append_mapping(locales, cldr.locales, cldr.unique_script_lists.type_that_fits(), "s_scripts"sv, [&](auto const& locale) { return locale.scripts; }); - append_mapping(locales, cldr.locales, cldr.unique_currency_lists.type_that_fits(), "s_long_currencies"sv, [&](auto const& locale) { return locale.long_currencies; }); - append_mapping(locales, cldr.locales, cldr.unique_currency_lists.type_that_fits(), "s_short_currencies"sv, [&](auto const& locale) { return locale.short_currencies; }); - append_mapping(locales, cldr.locales, cldr.unique_currency_lists.type_that_fits(), "s_narrow_currencies"sv, [&](auto const& locale) { return locale.narrow_currencies; }); - append_mapping(locales, cldr.locales, cldr.unique_currency_lists.type_that_fits(), "s_numeric_currencies"sv, [&](auto const& locale) { return locale.numeric_currencies; }); - append_mapping(locales, cldr.locales, cldr.unique_calendar_lists.type_that_fits(), "s_calendars"sv, [&](auto const& locale) { return locale.calendars; }); - append_mapping(locales, cldr.locales, cldr.unique_date_field_lists.type_that_fits(), "s_long_date_fields"sv, [&](auto const& locale) { return locale.long_date_fields; }); - append_mapping(locales, cldr.locales, cldr.unique_date_field_lists.type_that_fits(), "s_short_date_fields"sv, [&](auto const& locale) { return locale.short_date_fields; }); - append_mapping(locales, cldr.locales, cldr.unique_date_field_lists.type_that_fits(), "s_narrow_date_fields"sv, [&](auto const& locale) { return locale.narrow_date_fields; }); append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_calendar_keywords"sv, [&](auto const& locale) { return locale.calendar_keywords; }); append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_collation_case_keywords"sv, [&](auto const& locale) { return locale.collation_case_keywords; }); append_mapping(locales, cldr.locales, cldr.unique_keyword_lists.type_that_fits(), "s_collation_numeric_keywords"sv, [&](auto const& locale) { return locale.collation_numeric_keywords; }); @@ -1351,42 +770,11 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~"); struct CanonicalLanguageID { - LanguageID to_unicode_language_id() const - { - LanguageID language_id {}; - language_id.variants.ensure_capacity(variants_size); - - language_id.language = MUST(String::from_utf8(decode_string(language))); - if (script != 0) - language_id.script = MUST(String::from_utf8(decode_string(script))); - if (region != 0) - language_id.region = MUST(String::from_utf8(decode_string(region))); - for (size_t i = 0; i < variants_size; ++i) - language_id.variants.append(MUST(String::from_utf8(decode_string(variants[i])))); - - return language_id; - } - - bool matches_variants(Vector const& other_variants) const { - if (variants_size == 0) - return true; - if (other_variants.size() != variants_size) - return false; - - for (size_t i = 0; i < variants_size; ++i) { - if (decode_string(variants[i]) != other_variants[i]) - return false; - } - - return true; - }; - @string_index_type@ language { 0 }; @string_index_type@ script { 0 }; @string_index_type@ region { 0 }; Array<@string_index_type@, @variants_size@> variants {}; size_t variants_size { 0 }; - }; struct LanguageMapping { @@ -1436,7 +824,6 @@ static constexpr Array s_@name@ { { generator.append("} };\n"); }; - append_complex_mapping("complex_alias"sv, cldr.complex_mappings); append_complex_mapping("likely_subtags"sv, cldr.likely_subtags); generator.append(R"~~~( @@ -1529,39 +916,6 @@ static LanguageMapping const* resolve_likely_subtag(LanguageID const& language_i )~~~"); - auto append_mapping_search = [&](StringView enum_snake, StringView from_string_name, StringView collection_name, StringView unique_list) { - generator.set("enum_snake", enum_snake); - generator.set("from_string_name", from_string_name); - generator.set("collection_name", collection_name); - generator.set("unique_list", unique_list); - - generator.append(R"~~~( -Optional get_locale_@enum_snake@_mapping(StringView locale, StringView @enum_snake@) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return {}; - - auto @enum_snake@_value = @from_string_name@_from_string(@enum_snake@); - if (!@enum_snake@_value.has_value()) - return {}; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - auto @enum_snake@_index = to_underlying(*@enum_snake@_value); - - auto mapping_index = @collection_name@.at(locale_index); - auto const& mappings = @unique_list@.at(mapping_index); - - auto @enum_snake@_string_index = mappings.at(@enum_snake@_index); - auto @enum_snake@_mapping = decode_string(@enum_snake@_string_index); - - if (@enum_snake@_mapping.is_empty()) - return {}; - return @enum_snake@_mapping; -} -)~~~"); - }; - auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector const& aliases = {}) -> ErrorOr { HashValueMap hashes; TRY(hashes.try_ensure_capacity(values.size())); @@ -1576,47 +930,7 @@ Optional get_locale_@enum_snake@_mapping(StringView locale, StringVi return {}; }; - auto append_alias_search = [&](StringView enum_snake, auto const& aliases) -> ErrorOr { - HashValueMap hashes; - TRY(hashes.try_ensure_capacity(aliases.size())); - - for (auto const& alias : aliases) - hashes.set(alias.key.hash(), alias.value); - - ValueFromStringOptions options {}; - options.return_type = "StringView"sv; - options.return_format = "decode_string({})"sv; - - generate_value_from_string(generator, "resolve_{}_alias"sv, string_index_type, enum_snake, move(hashes), options); - - return {}; - }; - TRY(append_from_string("Locale"sv, "locale"sv, cldr.locales.keys(), cldr.locale_aliases)); - - TRY(append_from_string("Language"sv, "language"sv, cldr.languages)); - append_mapping_search("language"sv, "language"sv, "s_languages"sv, "s_language_lists"sv); - TRY(append_alias_search("language"sv, cldr.language_aliases)); - - TRY(append_from_string("Territory"sv, "territory"sv, cldr.territories)); - append_mapping_search("territory"sv, "territory"sv, "s_territories"sv, "s_territory_lists"sv); - TRY(append_alias_search("territory"sv, cldr.territory_aliases)); - - TRY(append_from_string("ScriptTag"sv, "script_tag"sv, cldr.scripts)); - append_mapping_search("script"sv, "script_tag"sv, "s_scripts"sv, "s_script_lists"sv); - TRY(append_alias_search("script_tag"sv, cldr.script_aliases)); - - TRY(append_from_string("Currency"sv, "currency"sv, cldr.currencies)); - append_mapping_search("long_currency"sv, "currency"sv, "s_long_currencies"sv, "s_currency_lists"sv); - append_mapping_search("short_currency"sv, "currency"sv, "s_short_currencies"sv, "s_currency_lists"sv); - append_mapping_search("narrow_currency"sv, "currency"sv, "s_narrow_currencies"sv, "s_currency_lists"sv); - append_mapping_search("numeric_currency"sv, "currency"sv, "s_numeric_currencies"sv, "s_currency_lists"sv); - - TRY(append_from_string("DateField"sv, "date_field"sv, cldr.date_fields, cldr.date_field_aliases)); - append_mapping_search("long_date_field"sv, "date_field"sv, "s_long_date_fields"sv, "s_date_field_lists"sv); - append_mapping_search("short_date_field"sv, "date_field"sv, "s_short_date_fields"sv, "s_date_field_lists"sv); - append_mapping_search("narrow_date_field"sv, "date_field"sv, "s_narrow_date_fields"sv, "s_date_field_lists"sv); - TRY(append_from_string("Key"sv, "key"sv, cldr.keywords.keys())); for (auto const& keyword : cldr.keywords) { @@ -1630,11 +944,6 @@ Optional get_locale_@enum_snake@_mapping(StringView locale, StringVi TRY(append_from_string(enum_name, enum_snake, keyword.value)); } - append_mapping_search("calendar"sv, "keyword_ca"sv, "s_calendars"sv, "s_calendar_lists"sv); - - TRY(append_alias_search("variant"sv, cldr.variant_aliases)); - TRY(append_alias_search("subdivision"sv, cldr.subdivision_aliases)); - TRY(append_from_string("ListPatternType"sv, "list_pattern_type"sv, cldr.list_pattern_types)); TRY(append_from_string("CharacterOrder"sv, "character_order"sv, cldr.character_orders)); @@ -1735,19 +1044,6 @@ Vector get_keywords_for_locale(StringView locale, StringView key) return keywords; } -Optional get_locale_display_patterns(StringView locale) -{ - auto locale_value = locale_from_string(locale); - if (!locale_value.has_value()) - return {}; - - auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. - auto display_patterns_index = s_locale_display_patterns.at(locale_index); - - auto const& display_patterns = s_display_patterns.at(display_patterns_index); - return display_patterns.to_display_pattern(); -} - Optional get_locale_list_patterns(StringView locale, StringView list_pattern_type, Style list_pattern_style) { auto locale_value = locale_from_string(locale); @@ -1798,37 +1094,6 @@ Optional character_order_for_locale(StringView locale) return {}; } -void resolve_complex_language_aliases(LanguageID& language_id) -{ - for (auto const& map : s_complex_alias) { - auto key_language = decode_string(map.key.language); - auto key_script = decode_string(map.key.script); - auto key_region = decode_string(map.key.region); - - if ((key_language != language_id.language) && (key_language != "und"sv)) - continue; - if (!key_script.is_empty() && (key_script != language_id.script)) - continue; - if (!key_region.is_empty() && (key_region != language_id.region)) - continue; - if (!map.key.matches_variants(language_id.variants)) - continue; - - auto alias = map.alias.to_unicode_language_id(); - - if (alias.language == "und"sv) - alias.language = move(language_id.language); - if (key_script.is_empty() && !alias.script.has_value()) - alias.script = move(language_id.script); - if (key_region.is_empty() && !alias.region.has_value()) - alias.region = move(language_id.region); - if (map.key.variants_size == 0 && alias.variants.is_empty()) - alias.variants = move(language_id.variants); - - language_id = move(alias); - break; - } -} Optional add_likely_subtags(LanguageID const& language_id) { @@ -1856,13 +1121,6 @@ Optional add_likely_subtags(LanguageID const& language_id) return maximized; } -Optional resolve_most_likely_territory(LanguageID const& language_id) -{ - if (auto const* likely_subtag = resolve_likely_subtag(language_id); likely_subtag != nullptr) - return MUST(String::from_utf8(decode_string(likely_subtag->alias.region))); - return OptionalNone {}; -} - } )~~~"); @@ -1876,7 +1134,6 @@ ErrorOr serenity_main(Main::Arguments arguments) StringView generated_implementation_path; StringView bcp47_path; StringView core_path; - StringView locale_names_path; StringView misc_path; StringView numbers_path; StringView dates_path; @@ -1886,7 +1143,6 @@ ErrorOr serenity_main(Main::Arguments arguments) args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); args_parser.add_option(bcp47_path, "Path to cldr-bcp47 directory", "bcp47-path", 'b', "bcp47-path"); args_parser.add_option(core_path, "Path to cldr-core directory", "core-path", 'r', "core-path"); - args_parser.add_option(locale_names_path, "Path to cldr-localenames directory", "locale-names-path", 'l', "locale-names-path"); args_parser.add_option(misc_path, "Path to cldr-misc directory", "misc-path", 'm', "misc-path"); args_parser.add_option(numbers_path, "Path to cldr-numbers directory", "numbers-path", 'n', "numbers-path"); args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path"); @@ -1896,7 +1152,7 @@ ErrorOr serenity_main(Main::Arguments arguments) auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write)); CLDR cldr; - TRY(parse_all_locales(bcp47_path, core_path, locale_names_path, misc_path, numbers_path, dates_path, cldr)); + TRY(parse_all_locales(bcp47_path, core_path, misc_path, numbers_path, dates_path, cldr)); TRY(generate_unicode_locale_header(*generated_header_file, cldr)); TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr)); diff --git a/Tests/LibLocale/CMakeLists.txt b/Tests/LibLocale/CMakeLists.txt index a2b44e84dd6..e31bf262b4e 100644 --- a/Tests/LibLocale/CMakeLists.txt +++ b/Tests/LibLocale/CMakeLists.txt @@ -1,5 +1,6 @@ set(TEST_SOURCES TestDateTimeFormat.cpp + TestDisplayNames.cpp TestLocale.cpp ) diff --git a/Tests/LibLocale/TestDisplayNames.cpp b/Tests/LibLocale/TestDisplayNames.cpp new file mode 100644 index 00000000000..44df60557df --- /dev/null +++ b/Tests/LibLocale/TestDisplayNames.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2021, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +#include +#include + +TEST_CASE(locale_mappings_en) +{ + auto language = Locale::language_display_name("en"sv, "en"sv, Locale::LanguageDisplay::Standard); + EXPECT(language.has_value()); + EXPECT_EQ(*language, "English"sv); + + language = Locale::language_display_name("en"sv, "i-defintely-don't-exist"sv, Locale::LanguageDisplay::Standard); + EXPECT(!language.has_value()); + + auto territory = Locale::region_display_name("en"sv, "US"sv); + EXPECT(territory.has_value()); + EXPECT_EQ(*territory, "United States"sv); + + territory = Locale::region_display_name("en"sv, "i-defintely-don't-exist"sv); + EXPECT(!territory.has_value()); + + auto script = Locale::script_display_name("en"sv, "Latn"sv); + EXPECT(script.has_value()); + EXPECT_EQ(*script, "Latin"sv); + + script = Locale::script_display_name("en"sv, "i-defintely-don't-exist"sv); + EXPECT(!script.has_value()); +} + +TEST_CASE(locale_mappings_fr) +{ + auto language = Locale::language_display_name("fr"sv, "en"sv, Locale::LanguageDisplay::Standard); + EXPECT(language.has_value()); + EXPECT_EQ(*language, "anglais"sv); + + language = Locale::language_display_name("fr"sv, "i-defintely-don't-exist"sv, Locale::LanguageDisplay::Standard); + EXPECT(!language.has_value()); + + auto territory = Locale::region_display_name("fr"sv, "US"sv); + EXPECT(territory.has_value()); + EXPECT_EQ(*territory, "États-Unis"sv); + + territory = Locale::region_display_name("fr"sv, "i-defintely-don't-exist"sv); + EXPECT(!territory.has_value()); + + auto script = Locale::script_display_name("fr"sv, "Latn"sv); + EXPECT(script.has_value()); + EXPECT_EQ(*script, "latin"sv); + + script = Locale::script_display_name("fr"sv, "i-defintely-don't-exist"sv); + EXPECT(!script.has_value()); +} + +TEST_CASE(locale_mappings_root) +{ + auto language = Locale::language_display_name("und"sv, "en"sv, Locale::LanguageDisplay::Standard); + EXPECT(language.has_value()); + EXPECT_EQ(*language, "en"sv); + + language = Locale::language_display_name("und"sv, "i-defintely-don't-exist"sv, Locale::LanguageDisplay::Standard); + EXPECT(!language.has_value()); + + auto territory = Locale::region_display_name("und"sv, "US"sv); + EXPECT(territory.has_value()); + EXPECT_EQ(*territory, "US"sv); + + territory = Locale::region_display_name("und"sv, "i-defintely-don't-exist"sv); + EXPECT(!territory.has_value()); + + auto script = Locale::script_display_name("und"sv, "Latn"sv); + EXPECT(script.has_value()); + EXPECT_EQ(*script, "Latn"sv); + + script = Locale::script_display_name("und"sv, "i-defintely-don't-exist"sv); + EXPECT(!script.has_value()); +} diff --git a/Tests/LibLocale/TestLocale.cpp b/Tests/LibLocale/TestLocale.cpp index 76458868648..022c47b2198 100644 --- a/Tests/LibLocale/TestLocale.cpp +++ b/Tests/LibLocale/TestLocale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -338,11 +338,8 @@ TEST_CASE(parse_unicode_locale_id_with_private_use_extension) TEST_CASE(canonicalize_unicode_locale_id) { auto test = [](StringView locale, StringView expected_canonical_locale) { - auto locale_id = Locale::parse_unicode_locale_id(locale); - VERIFY(locale_id.has_value()); - - auto canonical_locale = Locale::canonicalize_unicode_locale_id(*locale_id); - EXPECT_EQ(*canonical_locale, expected_canonical_locale); + auto canonical_locale = Locale::canonicalize_unicode_locale_id(locale); + EXPECT_EQ(canonical_locale, expected_canonical_locale); }; test("aaa"sv, "aaa"sv); @@ -373,8 +370,8 @@ TEST_CASE(canonicalize_unicode_locale_id) test("EN-U-KB-YES"sv, "en-u-kb"sv); test("en-u-kb-yes-abcd"sv, "en-u-kb-yes-abcd"sv); test("EN-U-KB-YES-ABCD"sv, "en-u-kb-yes-abcd"sv); - test("en-u-ka-yes"sv, "en-u-ka-yes"sv); - test("EN-U-KA-YES"sv, "en-u-ka-yes"sv); + test("en-u-ka-yes"sv, "en-u-ka"sv); + test("EN-U-KA-YES"sv, "en-u-ka"sv); test("en-u-1k-names"sv, "en-u-1k-names"sv); test("EN-U-1K-NAMES"sv, "en-u-1k-names"sv); test("en-u-ks-primary"sv, "en-u-ks-level1"sv); @@ -518,75 +515,3 @@ TEST_CASE(supports_locale_aliases) EXPECT(Locale::is_locale_available("zh-TW"sv)); EXPECT(Locale::is_locale_available("zh-Hant-TW"sv)); } - -TEST_CASE(locale_mappings_en) -{ - auto language = Locale::get_locale_language_mapping("en"sv, "en"sv); - EXPECT(language.has_value()); - EXPECT_EQ(*language, "English"sv); - - language = Locale::get_locale_language_mapping("en"sv, "i-defintely-don't-exist"sv); - EXPECT(!language.has_value()); - - auto territory = Locale::get_locale_territory_mapping("en"sv, "US"sv); - EXPECT(territory.has_value()); - EXPECT_EQ(*territory, "United States"sv); - - territory = Locale::get_locale_territory_mapping("en"sv, "i-defintely-don't-exist"sv); - EXPECT(!territory.has_value()); - - auto script = Locale::get_locale_script_mapping("en"sv, "Latn"sv); - EXPECT(script.has_value()); - EXPECT_EQ(*script, "Latin"sv); - - script = Locale::get_locale_script_mapping("en"sv, "i-defintely-don't-exist"sv); - EXPECT(!script.has_value()); -} - -TEST_CASE(locale_mappings_fr) -{ - auto language = Locale::get_locale_language_mapping("fr"sv, "en"sv); - EXPECT(language.has_value()); - EXPECT_EQ(*language, "anglais"sv); - - language = Locale::get_locale_language_mapping("fr"sv, "i-defintely-don't-exist"sv); - EXPECT(!language.has_value()); - - auto territory = Locale::get_locale_territory_mapping("fr"sv, "US"sv); - EXPECT(territory.has_value()); - EXPECT_EQ(*territory, "États-Unis"sv); - - territory = Locale::get_locale_territory_mapping("fr"sv, "i-defintely-don't-exist"sv); - EXPECT(!territory.has_value()); - - auto script = Locale::get_locale_script_mapping("fr"sv, "Latn"sv); - EXPECT(script.has_value()); - EXPECT_EQ(*script, "latin"sv); - - script = Locale::get_locale_script_mapping("fr"sv, "i-defintely-don't-exist"sv); - EXPECT(!script.has_value()); -} - -TEST_CASE(locale_mappings_root) -{ - auto language = Locale::get_locale_language_mapping("und"sv, "en"sv); - EXPECT(language.has_value()); - EXPECT_EQ(*language, "en"sv); - - language = Locale::get_locale_language_mapping("und"sv, "i-defintely-don't-exist"sv); - EXPECT(!language.has_value()); - - auto territory = Locale::get_locale_territory_mapping("und"sv, "US"sv); - EXPECT(territory.has_value()); - EXPECT_EQ(*territory, "US"sv); - - territory = Locale::get_locale_territory_mapping("und"sv, "i-defintely-don't-exist"sv); - EXPECT(!territory.has_value()); - - auto script = Locale::get_locale_script_mapping("und"sv, "Latn"sv); - EXPECT(script.has_value()); - EXPECT_EQ(*script, "Latn"sv); - - script = Locale::get_locale_script_mapping("und"sv, "i-defintely-don't-exist"sv); - EXPECT(!script.has_value()); -} diff --git a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp index 20af3aac1bd..931963d3f7e 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -21,7 +21,7 @@ namespace JS::Intl { // 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag -Optional<::Locale::LocaleID> is_structurally_valid_language_tag(StringView locale) +bool is_structurally_valid_language_tag(StringView locale) { auto contains_duplicate_variant = [&](auto& variants) { if (variants.is_empty()) @@ -37,90 +37,78 @@ Optional<::Locale::LocaleID> is_structurally_valid_language_tag(StringView local return false; }; - // IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise: + // 1. Let lowerLocale be the ASCII-lowercase of locale. + // NOTE: LibLocale's parsing is case-insensitive. - // locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML § 3.2 Unicode Locale Identifier; + // 2. If lowerLocale cannot be matched by the unicode_locale_id Unicode locale nonterminal, return false. auto locale_id = ::Locale::parse_unicode_locale_id(locale); if (!locale_id.has_value()) - return {}; + return false; - // locale does not use any of the backwards compatibility syntax described in Unicode Technical Standard #35 LDML § 3.3 BCP 47 Conformance; - // https://unicode.org/reports/tr35/#BCP_47_Conformance + // 3. If lowerLocale uses any of the backwards compatibility syntax described in Unicode Technical Standard #35 Part 1 Core, + // Section 3.3 BCP 47 Conformance, return false. + // https://unicode.org/reports/tr35/#BCP_47_Conformance if (locale.contains('_') || locale_id->language_id.is_root || !locale_id->language_id.language.has_value()) - return {}; + return false; - // the unicode_language_id within locale contains no duplicate unicode_variant_subtag subtags; and - if (contains_duplicate_variant(locale_id->language_id.variants)) - return {}; + // 4. Let languageId be the longest prefix of lowerLocale matched by the unicode_language_id Unicode locale nonterminal. + auto& language_id = locale_id->language_id; - // if locale contains an extensions* component, that component - Vector unique_keys; + // 5. Let variants be GetLocaleVariants(languageId). + // 6. If variants is not undefined, then + if (auto& variants = language_id.variants; !variants.is_empty()) { + // a. If variants contains any duplicate subtags, return false. + if (contains_duplicate_variant(variants)) + return false; + } + + HashTable unique_keys; + + // 7. Let allExtensions be the suffix of lowerLocale following languageId. + // 8. If allExtensions contains a substring matched by the pu_extensions Unicode locale nonterminal, let extensions be + // the prefix of allExtensions preceding the longest such substring. Otherwise, let extensions be allExtensions. + // 9. If extensions is not the empty String, then for (auto& extension : locale_id->extensions) { - // does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags, - // contains at most one unicode_locale_extensions component, - // contains at most one transformed_extensions component, and char key = extension.visit( [](::Locale::LocaleExtension const&) { return 'u'; }, [](::Locale::TransformedExtension const&) { return 't'; }, [](::Locale::OtherExtension const& ext) { return static_cast(to_ascii_lowercase(ext.key)); }); - if (unique_keys.contains_slow(key)) - return {}; + // a. If extensions contains any duplicate singleton subtags, return false. + if (unique_keys.set(key) != HashSetResult::InsertedNewEntry) + return false; - unique_keys.append(key); - - // if a transformed_extensions component that contains a tlang component is present, then - // the tlang component contains no duplicate unicode_variant_subtag subtags. + // b. Let transformExtension be the longest substring of extensions matched by the transformed_extensions Unicode + // locale nonterminal. If there is no such substring, return true. if (auto* transformed = extension.get_pointer<::Locale::TransformedExtension>()) { - auto& language = transformed->language; - if (language.has_value() && contains_duplicate_variant(language->variants)) - return {}; + // c. Assert: The substring of transformExtension from 0 to 3 is "-t-". + // d. Let tPrefix be the substring of transformExtension from 3. + + // e. Let tlang be the longest prefix of tPrefix matched by the tlang Unicode locale nonterminal. If there is + // no such prefix, return true. + auto& transformed_language = transformed->language; + if (!transformed_language.has_value()) + continue; + + // f. Let tlangRefinements be the longest suffix of tlang following a non-empty prefix matched by the + // unicode_language_subtag Unicode locale nonterminal. + auto& transformed_refinements = transformed_language->variants; + + // g. If tlangRefinements contains any duplicate substrings matched greedily by the unicode_variant_subtag + // Unicode locale nonterminal, return false. + if (contains_duplicate_variant(transformed_refinements)) + return false; } } - return locale_id; + // 10. Return true. + return true; } // 6.2.3 CanonicalizeUnicodeLocaleId ( locale ), https://tc39.es/ecma402/#sec-canonicalizeunicodelocaleid -String canonicalize_unicode_locale_id(::Locale::LocaleID& locale) +String canonicalize_unicode_locale_id(StringView locale) { - // Note: This implementation differs from the spec in how Step 3 is implemented. The spec assumes - // the input to this method is a string, and is written such that operations are performed on parts - // of that string. LibUnicode gives us the parsed locale in a structure, so we can mutate that - // structure directly. From a footnote in the spec: - // - // The third step of this algorithm ensures that a Unicode locale extension sequence in the - // returned language tag contains: - // * only the first instance of any attribute duplicated in the input, and - // * only the first keyword for a given key in the input. - for (auto& extension : locale.extensions) { - if (!extension.has<::Locale::LocaleExtension>()) - continue; - - auto& locale_extension = extension.get<::Locale::LocaleExtension>(); - - auto attributes = move(locale_extension.attributes); - for (auto& attribute : attributes) { - if (!locale_extension.attributes.contains_slow(attribute)) - locale_extension.attributes.append(move(attribute)); - } - - auto keywords = move(locale_extension.keywords); - for (auto& keyword : keywords) { - if (!any_of(locale_extension.keywords, [&](auto const& k) { return k.key == keyword.key; })) - locale_extension.keywords.append(move(keyword)); - } - - break; - } - - // 1. Let localeId be the string locale after performing the algorithm to transform it to canonical syntax per Unicode Technical Standard #35 LDML § 3.2.1 Canonical Unicode Locale Identifiers. - // 2. Let localeId be the string localeId after performing the algorithm to transform it to canonical form. - auto locale_id = ::Locale::canonicalize_unicode_locale_id(locale); - VERIFY(locale_id.has_value()); - - // 4. Return localeId. - return locale_id.release_value(); + return ::Locale::canonicalize_unicode_locale_id(locale); } // 6.3.1 IsWellFormedCurrencyCode ( currency ), https://tc39.es/ecma402/#sec-iswellformedcurrencycode @@ -246,12 +234,11 @@ ThrowCompletionOr> canonicalize_locale_list(VM& vm, Value locales } // v. If ! IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. - auto locale_id = is_structurally_valid_language_tag(tag); - if (!locale_id.has_value()) + if (!is_structurally_valid_language_tag(tag)) return vm.throw_completion(ErrorType::IntlInvalidLanguageTag, tag); // vi. Let canonicalizedTag be ! CanonicalizeUnicodeLocaleId(tag). - auto canonicalized_tag = JS::Intl::canonicalize_unicode_locale_id(*locale_id); + auto canonicalized_tag = canonicalize_unicode_locale_id(tag); // vii. If canonicalizedTag is not an element of seen, append canonicalizedTag as the last element of seen. if (!seen.contains_slow(canonicalized_tag)) @@ -355,7 +342,7 @@ String insert_unicode_extension_and_canonicalize(::Locale::LocaleID locale, ::Lo // structure directly. locale.extensions.append(move(extension)); - return JS::Intl::canonicalize_unicode_locale_id(locale); + return JS::Intl::canonicalize_unicode_locale_id(locale.to_string()); } template diff --git a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.h b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.h index a4a3e1a0208..ea1593adf68 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/AbstractOperations.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -81,8 +81,8 @@ struct PatternPartitionWithSource : public PatternPartition { using StringOrBoolean = Variant; -Optional<::Locale::LocaleID> is_structurally_valid_language_tag(StringView locale); -String canonicalize_unicode_locale_id(::Locale::LocaleID& locale); +bool is_structurally_valid_language_tag(StringView locale); +String canonicalize_unicode_locale_id(StringView locale); bool is_well_formed_currency_code(StringView currency); bool is_well_formed_unit_identifier(StringView unit_identifier); ThrowCompletionOr> canonicalize_locale_list(VM&, Value locales); diff --git a/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.cpp b/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.cpp index 37ed147a7b6..a3e5c3923bc 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -78,30 +78,6 @@ StringView DisplayNames::fallback_string() const } } -void DisplayNames::set_language_display(StringView language_display) -{ - if (language_display == "dialect"sv) - m_language_display = LanguageDisplay::Dialect; - else if (language_display == "standard"sv) - m_language_display = LanguageDisplay::Standard; - else - VERIFY_NOT_REACHED(); -} - -StringView DisplayNames::language_display_string() const -{ - VERIFY(m_language_display.has_value()); - - switch (*m_language_display) { - case LanguageDisplay::Dialect: - return "dialect"sv; - case LanguageDisplay::Standard: - return "standard"sv; - default: - VERIFY_NOT_REACHED(); - } -} - // 12.5.1 CanonicalCodeForDisplayNames ( type, code ), https://tc39.es/ecma402/#sec-canonicalcodefordisplaynames ThrowCompletionOr canonical_code_for_display_names(VM& vm, DisplayNames::Type type, StringView code) { @@ -112,12 +88,11 @@ ThrowCompletionOr canonical_code_for_display_names(VM& vm, DisplayNames:: return vm.throw_completion(ErrorType::OptionIsNotValidValue, code, "language"sv); // b. If IsStructurallyValidLanguageTag(code) is false, throw a RangeError exception. - auto locale_id = is_structurally_valid_language_tag(code); - if (!locale_id.has_value()) + if (!is_structurally_valid_language_tag(code)) return vm.throw_completion(ErrorType::IntlInvalidLanguageTag, code); // c. Return ! CanonicalizeUnicodeLocaleId(code). - auto canonicalized_tag = JS::Intl::canonicalize_unicode_locale_id(*locale_id); + auto canonicalized_tag = canonicalize_unicode_locale_id(code); return PrimitiveString::create(vm, move(canonicalized_tag)); } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.h b/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.h index b10056ccfcd..905ab331678 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/DisplayNames.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -10,6 +10,7 @@ #include #include #include +#include #include namespace JS::Intl { @@ -34,11 +35,6 @@ class DisplayNames final : public Object { Code, }; - enum class LanguageDisplay { - Dialect, - Standard, - }; - public: virtual ~DisplayNames() override = default; @@ -58,18 +54,18 @@ public: StringView fallback_string() const; bool has_language_display() const { return m_language_display.has_value(); } - LanguageDisplay language_display() const { return *m_language_display; } - void set_language_display(StringView language_display); - StringView language_display_string() const; + ::Locale::LanguageDisplay language_display() const { return *m_language_display; } + void set_language_display(StringView language_display) { m_language_display = ::Locale::language_display_from_string(language_display); } + StringView language_display_string() const { return ::Locale::language_display_to_string(*m_language_display); } private: DisplayNames(Object& prototype); - String m_locale; // [[Locale]] - ::Locale::Style m_style { ::Locale::Style::Long }; // [[Style]] - Type m_type { Type::Invalid }; // [[Type]] - Fallback m_fallback { Fallback::Invalid }; // [[Fallback]] - Optional m_language_display {}; // [[LanguageDisplay]] + String m_locale; // [[Locale]] + ::Locale::Style m_style { ::Locale::Style::Long }; // [[Style]] + Type m_type { Type::Invalid }; // [[Type]] + Fallback m_fallback { Fallback::Invalid }; // [[Fallback]] + Optional<::Locale::LanguageDisplay> m_language_display; // [[LanguageDisplay]] }; ThrowCompletionOr canonical_code_for_display_names(VM&, DisplayNames::Type, StringView code); diff --git a/Userland/Libraries/LibJS/Runtime/Intl/DisplayNamesPrototype.cpp b/Userland/Libraries/LibJS/Runtime/Intl/DisplayNamesPrototype.cpp index c30f3982f1a..89c7aac6263 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/DisplayNamesPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/DisplayNamesPrototype.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -9,7 +9,7 @@ #include #include #include -#include +#include namespace JS::Intl { @@ -53,58 +53,26 @@ JS_DEFINE_NATIVE_FUNCTION(DisplayNamesPrototype::of) // 5. Let fields be displayNames.[[Fields]]. // 6. If fields has a field [[]], return fields.[[]]. - Optional result; - Optional formatted_result; + Optional result; switch (display_names->type()) { case DisplayNames::Type::Language: - if (display_names->language_display() == DisplayNames::LanguageDisplay::Dialect) { - result = ::Locale::get_locale_language_mapping(display_names->locale(), code_string); - if (result.has_value()) - break; - } - - if (auto locale = is_structurally_valid_language_tag(code_string); locale.has_value()) - formatted_result = ::Locale::format_locale_for_display(display_names->locale(), locale.release_value()); + result = ::Locale::language_display_name(display_names->locale(), code_string, display_names->language_display()); break; case DisplayNames::Type::Region: - result = ::Locale::get_locale_territory_mapping(display_names->locale(), code_string); + result = ::Locale::region_display_name(display_names->locale(), code_string); break; case DisplayNames::Type::Script: - result = ::Locale::get_locale_script_mapping(display_names->locale(), code_string); + result = ::Locale::script_display_name(display_names->locale(), code_string); break; case DisplayNames::Type::Currency: - switch (display_names->style()) { - case ::Locale::Style::Long: - result = ::Locale::get_locale_long_currency_mapping(display_names->locale(), code_string); - break; - case ::Locale::Style::Short: - result = ::Locale::get_locale_short_currency_mapping(display_names->locale(), code_string); - break; - case ::Locale::Style::Narrow: - result = ::Locale::get_locale_narrow_currency_mapping(display_names->locale(), code_string); - break; - default: - VERIFY_NOT_REACHED(); - } + result = ::Locale::currency_display_name(display_names->locale(), code_string, display_names->style()); break; case DisplayNames::Type::Calendar: - result = ::Locale::get_locale_calendar_mapping(display_names->locale(), code_string); + result = ::Locale::calendar_display_name(display_names->locale(), code_string); break; case DisplayNames::Type::DateTimeField: - switch (display_names->style()) { - case ::Locale::Style::Long: - result = ::Locale::get_locale_long_date_field_mapping(display_names->locale(), code_string); - break; - case ::Locale::Style::Short: - result = ::Locale::get_locale_short_date_field_mapping(display_names->locale(), code_string); - break; - case ::Locale::Style::Narrow: - result = ::Locale::get_locale_narrow_date_field_mapping(display_names->locale(), code_string); - break; - default: - VERIFY_NOT_REACHED(); - } + result = ::Locale::date_time_field_display_name(display_names->locale(), code_string, display_names->style()); break; default: VERIFY_NOT_REACHED(); @@ -112,8 +80,6 @@ JS_DEFINE_NATIVE_FUNCTION(DisplayNamesPrototype::of) if (result.has_value()) return PrimitiveString::create(vm, result.release_value()); - if (formatted_result.has_value()) - return PrimitiveString::create(vm, formatted_result.release_value()); // 7. If displayNames.[[Fallback]] is "code", return code. if (display_names->fallback() == DisplayNames::Fallback::Code) diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Intl.cpp b/Userland/Libraries/LibJS/Runtime/Intl/Intl.cpp index 3b12bf94e3a..3102ce80f22 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/Intl.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/Intl.cpp @@ -116,7 +116,7 @@ JS_DEFINE_NATIVE_FUNCTION(Intl::supported_values_of) // 1. Let key be ? ToString(key). auto key = TRY(vm.argument(0).to_string(vm)); - ReadonlySpan list; + Optional, ReadonlySpan>> list; // 2. If key is "calendar", then if (key == "calendar"sv) { @@ -131,7 +131,8 @@ JS_DEFINE_NATIVE_FUNCTION(Intl::supported_values_of) // 4. Else if key is "currency", then else if (key == "currency"sv) { // a. Let list be ! AvailableCanonicalCurrencies( ). - list = ::Locale::get_available_currencies(); + static auto const currencies = ::Locale::available_currencies(); + list = currencies.span(); } // 5. Else if key is "numberingSystem", then else if (key == "numberingSystem"sv) { @@ -141,13 +142,13 @@ JS_DEFINE_NATIVE_FUNCTION(Intl::supported_values_of) // 6. Else if key is "timeZone", then else if (key == "timeZone"sv) { // a. Let list be ! AvailableCanonicalTimeZones( ). - static auto time_zones = available_canonical_time_zones(); + static auto const time_zones = available_canonical_time_zones(); list = time_zones.span(); } // 7. Else if key is "unit", then else if (key == "unit"sv) { // a. Let list be ! AvailableCanonicalUnits( ). - static auto units = sanctioned_single_unit_identifiers(); + static auto const units = sanctioned_single_unit_identifiers(); list = units.span(); } // 8. Else, @@ -157,8 +158,10 @@ JS_DEFINE_NATIVE_FUNCTION(Intl::supported_values_of) } // 9. Return CreateArrayFromList( list ). - return Array::create_from(realm, list, [&](auto value) { - return PrimitiveString::create(vm, value); + return list->visit([&](ReadonlySpan list) { + return Array::create_from(realm, list, [&](auto value) { + return PrimitiveString::create(vm, value); + }); }); } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp b/Userland/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp index 3f8a042d4f2..186b3270996 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -49,8 +49,7 @@ static ThrowCompletionOr apply_options_to_tag(VM& vm, StringView tag, Ob // 2. Assert: Type(options) is Object. // 3. If ! IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. - auto locale_id = is_structurally_valid_language_tag(tag); - if (!locale_id.has_value()) + if (!is_structurally_valid_language_tag(tag)) return vm.throw_completion(ErrorType::IntlInvalidLanguageTag, tag); // 4. Let language be ? GetOption(options, "language", string, empty, undefined). @@ -69,10 +68,10 @@ static ThrowCompletionOr apply_options_to_tag(VM& vm, StringView tag, Ob auto region = TRY(get_string_option(vm, options, vm.names.region, ::Locale::is_unicode_region_subtag)); // 10. Set tag to ! CanonicalizeUnicodeLocaleId(tag). - auto canonicalized_tag = JS::Intl::canonicalize_unicode_locale_id(*locale_id); + auto canonicalized_tag = JS::Intl::canonicalize_unicode_locale_id(tag); // 11. Assert: tag matches the unicode_locale_id production. - locale_id = ::Locale::parse_unicode_locale_id(canonicalized_tag); + auto locale_id = ::Locale::parse_unicode_locale_id(canonicalized_tag); VERIFY(locale_id.has_value()); // 12. Let languageId be the substring of tag corresponding to the unicode_language_id production. @@ -103,8 +102,10 @@ static ThrowCompletionOr apply_options_to_tag(VM& vm, StringView tag, Ob } // 16. Set tag to tag with the substring corresponding to the unicode_language_id production replaced by the string languageId. + canonicalized_tag = locale_id->to_string(); + // 17. Return ! CanonicalizeUnicodeLocaleId(tag). - return JS::Intl::canonicalize_unicode_locale_id(*locale_id); + return JS::Intl::canonicalize_unicode_locale_id(canonicalized_tag); } // 14.1.3 ApplyUnicodeExtensionToTag ( tag, options, relevantExtensionKeys ), https://tc39.es/ecma402/#sec-apply-unicode-extension-to-tag diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp index b11a53098c1..e03c9a984fb 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -99,13 +100,13 @@ StringView NumberFormat::resolve_currency_display() m_resolved_currency_display = currency(); break; case NumberFormat::CurrencyDisplay::Symbol: - m_resolved_currency_display = ::Locale::get_locale_short_currency_mapping(data_locale(), currency()); + m_resolved_currency_display = ::Locale::currency_display_name(data_locale(), currency(), ::Locale::Style::Short); break; case NumberFormat::CurrencyDisplay::NarrowSymbol: - m_resolved_currency_display = ::Locale::get_locale_narrow_currency_mapping(data_locale(), currency()); + m_resolved_currency_display = ::Locale::currency_display_name(data_locale(), currency(), ::Locale::Style::Narrow); break; case NumberFormat::CurrencyDisplay::Name: - m_resolved_currency_display = ::Locale::get_locale_numeric_currency_mapping(data_locale(), currency()); + m_resolved_currency_display = ::Locale::currency_numeric_display_name(data_locale(), currency()); break; default: VERIFY_NOT_REACHED(); diff --git a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h index 700dcbdd78a..55dc2bc6934 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/NumberFormat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -268,7 +268,7 @@ private: GCPtr m_bound_format; // [[BoundFormat]] // Non-standard. Stores the resolved currency display string based on [[Locale]], [[Currency]], and [[CurrencyDisplay]]. - Optional m_resolved_currency_display; + Optional m_resolved_currency_display; // Non-standard. Stores the resolved compact number format based on [[Locale]], [[Notation], [[Style]], and [[CompactDisplay]]. Optional<::Locale::NumberFormat> m_compact_format; diff --git a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js index bda470aa460..1fde5a9b730 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js +++ b/Userland/Libraries/LibJS/Tests/builtins/Intl/Intl.getCanonicalLocales.js @@ -88,8 +88,8 @@ describe("normal behavior", () => { expect(Intl.getCanonicalLocales(["EN-US"])).toEqual(["en-US"]); expect(Intl.getCanonicalLocales(["EN-US", "Fr"])).toEqual(["en-US", "fr"]); expect(Intl.getCanonicalLocales("EN-lATN-US")).toEqual(["en-Latn-US"]); - expect(Intl.getCanonicalLocales("EN-US-POSIX")).toEqual(["en-US-posix"]); - expect(Intl.getCanonicalLocales("EN-LATN-US-POSIX")).toEqual(["en-Latn-US-posix"]); + expect(Intl.getCanonicalLocales("EN-US-POSIX")).toEqual(["en-US-u-va-posix"]); + expect(Intl.getCanonicalLocales("EN-LATN-US-POSIX")).toEqual(["en-Latn-US-u-va-posix"]); }); test("duplicate locales", () => { diff --git a/Userland/Libraries/LibLocale/CMakeLists.txt b/Userland/Libraries/LibLocale/CMakeLists.txt index 3f9df1ffcb0..a944cefdc97 100644 --- a/Userland/Libraries/LibLocale/CMakeLists.txt +++ b/Userland/Libraries/LibLocale/CMakeLists.txt @@ -11,6 +11,8 @@ endif() set(SOURCES DateTimeFormat.cpp + DisplayNames.cpp + ICU.cpp Locale.cpp NumberFormat.cpp PluralRules.cpp diff --git a/Userland/Libraries/LibLocale/DisplayNames.cpp b/Userland/Libraries/LibLocale/DisplayNames.cpp new file mode 100644 index 00000000000..a1fa1585f1f --- /dev/null +++ b/Userland/Libraries/LibLocale/DisplayNames.cpp @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2024, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#define AK_DONT_REPLACE_STD + +#include +#include +#include + +#include +#include +#include +#include + +namespace Locale { + +LanguageDisplay language_display_from_string(StringView language_display) +{ + if (language_display == "standard"sv) + return LanguageDisplay::Standard; + if (language_display == "dialect"sv) + return LanguageDisplay::Dialect; + VERIFY_NOT_REACHED(); +} + +StringView language_display_to_string(LanguageDisplay language_display) +{ + switch (language_display) { + case LanguageDisplay::Standard: + return "standard"sv; + case LanguageDisplay::Dialect: + return "dialect"sv; + default: + VERIFY_NOT_REACHED(); + } +} + +Optional language_display_name(StringView locale, StringView language, LanguageDisplay display) +{ + auto locale_data = LocaleData::for_locale(locale); + if (!locale_data.has_value()) + return {}; + + auto language_data = LocaleData::for_locale(language); + if (!language_data.has_value()) + return {}; + + auto& display_names = display == LanguageDisplay::Standard + ? locale_data->standard_display_names() + : locale_data->dialect_display_names(); + + icu::UnicodeString result; + display_names.localeDisplayName(language_data->locale().getName(), result); + + return icu_string_to_string(result); +} + +Optional region_display_name(StringView locale, StringView region) +{ + UErrorCode status = U_ZERO_ERROR; + + auto locale_data = LocaleData::for_locale(locale); + if (!locale_data.has_value()) + return {}; + + auto icu_region = icu::LocaleBuilder().setRegion(icu_string_piece(region)).build(status); + if (icu_failure(status)) + return {}; + + icu::UnicodeString result; + locale_data->standard_display_names().regionDisplayName(icu_region.getCountry(), result); + + return icu_string_to_string(result); +} + +Optional script_display_name(StringView locale, StringView script) +{ + UErrorCode status = U_ZERO_ERROR; + + auto locale_data = LocaleData::for_locale(locale); + if (!locale_data.has_value()) + return {}; + + auto icu_script = icu::LocaleBuilder().setScript(icu_string_piece(script)).build(status); + if (icu_failure(status)) + return {}; + + icu::UnicodeString result; + locale_data->standard_display_names().scriptDisplayName(icu_script.getScript(), result); + + return icu_string_to_string(result); +} + +Optional calendar_display_name(StringView locale, StringView calendar) +{ + auto locale_data = LocaleData::for_locale(locale); + if (!locale_data.has_value()) + return {}; + + if (calendar == "gregory"sv) + calendar = "gregorian"sv; + if (calendar == "islamicc"sv) + calendar = "islamic-civil"sv; + if (calendar == "ethioaa"sv) + calendar = "ethiopic-amete-alem"sv; + + icu::UnicodeString result; + locale_data->standard_display_names().keyValueDisplayName("calendar", ByteString(calendar).characters(), result); + + return icu_string_to_string(result); +} + +static constexpr UDateTimePatternField icu_date_time_field(StringView field) +{ + if (field == "day"sv) + return UDATPG_DAY_FIELD; + if (field == "dayPeriod"sv) + return UDATPG_DAYPERIOD_FIELD; + if (field == "era"sv) + return UDATPG_ERA_FIELD; + if (field == "hour"sv) + return UDATPG_HOUR_FIELD; + if (field == "minute"sv) + return UDATPG_MINUTE_FIELD; + if (field == "month"sv) + return UDATPG_MONTH_FIELD; + if (field == "quarter"sv) + return UDATPG_QUARTER_FIELD; + if (field == "second"sv) + return UDATPG_SECOND_FIELD; + if (field == "timeZoneName"sv) + return UDATPG_ZONE_FIELD; + if (field == "weekOfYear"sv) + return UDATPG_WEEK_OF_YEAR_FIELD; + if (field == "weekday"sv) + return UDATPG_WEEKDAY_FIELD; + if (field == "year"sv) + return UDATPG_YEAR_FIELD; + VERIFY_NOT_REACHED(); +} + +static constexpr UDateTimePGDisplayWidth icu_date_time_style(Style style) +{ + switch (style) { + case Style::Long: + return UDATPG_WIDE; + case Style::Short: + return UDATPG_ABBREVIATED; + case Style::Narrow: + return UDATPG_NARROW; + } + + VERIFY_NOT_REACHED(); +} + +Optional date_time_field_display_name(StringView locale, StringView field, Style style) +{ + auto locale_data = LocaleData::for_locale(locale); + if (!locale_data.has_value()) + return {}; + + auto icu_field = icu_date_time_field(field); + auto icu_style = icu_date_time_style(style); + + icu::UnicodeString result; + result = locale_data->date_time_pattern_generator().getFieldDisplayName(icu_field, icu_style); + + return icu_string_to_string(result); +} + +static constexpr Array icu_currency_code(StringView currency) +{ + VERIFY(currency.length() == 3); + + return to_array({ + static_cast(currency[0]), + static_cast(currency[1]), + static_cast(currency[2]), + u'\0', + }); +} + +static constexpr UCurrNameStyle icu_currency_style(Style style) +{ + switch (style) { + case Style::Long: + return UCURR_LONG_NAME; + case Style::Short: + return UCURR_SYMBOL_NAME; + case Style::Narrow: + return UCURR_NARROW_SYMBOL_NAME; + } + + VERIFY_NOT_REACHED(); +} + +Optional currency_display_name(StringView locale, StringView currency, Style style) +{ + UErrorCode status = U_ZERO_ERROR; + + auto locale_data = LocaleData::for_locale(locale); + if (!locale_data.has_value()) + return {}; + + auto icu_currency = icu_currency_code(currency); + + i32 length = 0; + UChar const* result = ucurr_getName(icu_currency.data(), locale_data->locale().getName(), icu_currency_style(style), nullptr, &length, &status); + + if (icu_failure(status)) + return {}; + if ((status == U_USING_DEFAULT_WARNING) && (result == icu_currency.data())) + return {}; + + return icu_string_to_string(result, length); +} + +Optional currency_numeric_display_name(StringView locale, StringView currency) +{ + UErrorCode status = U_ZERO_ERROR; + + auto locale_data = LocaleData::for_locale(locale); + if (!locale_data.has_value()) + return {}; + + auto icu_currency = icu_currency_code(currency); + + i32 length = 0; + UChar const* result = ucurr_getPluralName(icu_currency.data(), locale_data->locale().getName(), nullptr, "other", &length, &status); + + if (icu_failure(status)) + return {}; + if ((status == U_USING_DEFAULT_WARNING) && (result == icu_currency.data())) + return {}; + + return icu_string_to_string(result, length); +} + +} diff --git a/Userland/Libraries/LibLocale/DisplayNames.h b/Userland/Libraries/LibLocale/DisplayNames.h new file mode 100644 index 00000000000..30b34438622 --- /dev/null +++ b/Userland/Libraries/LibLocale/DisplayNames.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +namespace Locale { + +enum class LanguageDisplay { + Standard, + Dialect, +}; + +LanguageDisplay language_display_from_string(StringView language_display); +StringView language_display_to_string(LanguageDisplay language_display); + +Optional language_display_name(StringView locale, StringView language, LanguageDisplay); +Optional region_display_name(StringView locale, StringView region); +Optional script_display_name(StringView locale, StringView script); +Optional calendar_display_name(StringView locale, StringView calendar); +Optional date_time_field_display_name(StringView locale, StringView field, Style); +Optional currency_display_name(StringView locale, StringView currency, Style); +Optional currency_numeric_display_name(StringView locale, StringView currency); + +} diff --git a/Userland/Libraries/LibLocale/Forward.h b/Userland/Libraries/LibLocale/Forward.h index b503f19feb4..dc1fa2b0854 100644 --- a/Userland/Libraries/LibLocale/Forward.h +++ b/Userland/Libraries/LibLocale/Forward.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -15,9 +15,6 @@ enum class CalendarPatternStyle : u8; enum class CalendarSymbol : u8; enum class CharacterOrder : u8; enum class CompactNumberFormatType : u8; -enum class Condition : u8; -enum class Currency : u16; -enum class DateField : u8; enum class DayPeriod : u8; enum class Era : u8; enum class FirstDayRegion : u8; @@ -30,17 +27,14 @@ enum class KeywordColCaseFirst : u8; enum class KeywordColNumeric : u8; enum class KeywordHours : u8; enum class KeywordNumbers : u8; -enum class Language : u16; enum class ListPatternType : u8; enum class Locale : u16; enum class MinimumDaysRegion : u8; enum class Month : u8; enum class NumericSymbol : u8; enum class PluralCategory : u8; -enum class ScriptTag : u8; enum class StandardNumberFormatType : u8; enum class Style : u8; -enum class Territory : u8; enum class Weekday : u8; enum class WeekendEndRegion : u8; enum class WeekendStartRegion : u8; diff --git a/Userland/Libraries/LibLocale/ICU.cpp b/Userland/Libraries/LibLocale/ICU.cpp new file mode 100644 index 00000000000..35e2dc475fa --- /dev/null +++ b/Userland/Libraries/LibLocale/ICU.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2024, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#define AK_DONT_REPLACE_STD + +#include +#include +#include +#include + +#include +#include +#include + +namespace Locale { + +static HashMap> s_locale_cache; + +Optional LocaleData::for_locale(StringView locale) +{ + auto locale_data = s_locale_cache.get(locale); + + if (!locale_data.has_value()) { + locale_data = s_locale_cache.ensure(MUST(String::from_utf8(locale)), [&]() -> OwnPtr { + UErrorCode status = U_ZERO_ERROR; + + auto icu_locale = icu::Locale::forLanguageTag(icu_string_piece(locale), status); + if (icu_failure(status)) + return nullptr; + + return adopt_own(*new LocaleData { move(icu_locale) }); + }); + } + + if (locale_data.value()) + return *locale_data.value(); + return {}; +} + +LocaleData::LocaleData(icu::Locale locale) + : m_locale(move(locale)) +{ +} + +String LocaleData::to_string() +{ + if (!m_locale_string.has_value()) { + UErrorCode status = U_ZERO_ERROR; + + auto result = locale().toLanguageTag(status); + VERIFY(icu_success(status)); + + m_locale_string = MUST(result.to_string()); + } + + return *m_locale_string; +} + +icu::LocaleDisplayNames& LocaleData::standard_display_names() +{ + if (!m_standard_display_names) + m_standard_display_names = adopt_own(*icu::LocaleDisplayNames::createInstance(locale())); + return *m_standard_display_names; +} + +icu::LocaleDisplayNames& LocaleData::dialect_display_names() +{ + if (!m_dialect_display_names) + m_dialect_display_names = adopt_own(*icu::LocaleDisplayNames::createInstance(locale(), ULDN_DIALECT_NAMES)); + return *m_dialect_display_names; +} + +icu::DateTimePatternGenerator& LocaleData::date_time_pattern_generator() +{ + if (!m_date_time_pattern_generator) { + UErrorCode status = U_ZERO_ERROR; + + m_date_time_pattern_generator = adopt_own(*icu::DateTimePatternGenerator::createInstance(locale(), status)); + VERIFY(icu_success(status)); + } + + return *m_date_time_pattern_generator; +} + +icu::StringPiece icu_string_piece(StringView string) +{ + return { string.characters_without_null_termination(), static_cast(string.length()) }; +} + +String icu_string_to_string(icu::UnicodeString const& string) +{ + return icu_string_to_string(string.getBuffer(), string.length()); +} + +String icu_string_to_string(UChar const* string, i32 length) +{ + ReadonlySpan view { reinterpret_cast(string), static_cast(length) }; + return MUST(Utf16View { view }.to_utf8()); +} + +} diff --git a/Userland/Libraries/LibLocale/ICU.h b/Userland/Libraries/LibLocale/ICU.h new file mode 100644 index 00000000000..99b998354ad --- /dev/null +++ b/Userland/Libraries/LibLocale/ICU.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2024, Tim Flynn + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#define AK_DONT_REPLACE_STD + +#include +#include +#include +#include + +#include +#include +#include +#include + +U_NAMESPACE_BEGIN +class DateTimePatternGenerator; +class LocaleDisplayNames; +class UnicodeString; +U_NAMESPACE_END + +namespace Locale { + +class LocaleData { +public: + static Optional for_locale(StringView locale); + + ALWAYS_INLINE icu::Locale& locale() { return m_locale; } + + String to_string(); + + icu::LocaleDisplayNames& standard_display_names(); + icu::LocaleDisplayNames& dialect_display_names(); + + icu::DateTimePatternGenerator& date_time_pattern_generator(); + +private: + explicit LocaleData(icu::Locale locale); + + icu::Locale m_locale; + Optional m_locale_string; + + OwnPtr m_standard_display_names; + OwnPtr m_dialect_display_names; + OwnPtr m_date_time_pattern_generator; +}; + +static constexpr bool icu_success(UErrorCode code) +{ + return static_cast(U_SUCCESS(code)); +} + +static constexpr bool icu_failure(UErrorCode code) +{ + return static_cast(U_FAILURE(code)); +} + +icu::StringPiece icu_string_piece(StringView string); +String icu_string_to_string(icu::UnicodeString const& string); +String icu_string_to_string(UChar const*, i32 length); + +} diff --git a/Userland/Libraries/LibLocale/Locale.cpp b/Userland/Libraries/LibLocale/Locale.cpp index 8ec3bd5548e..2011b7df7c9 100644 --- a/Userland/Libraries/LibLocale/Locale.cpp +++ b/Userland/Libraries/LibLocale/Locale.cpp @@ -1,17 +1,25 @@ /* - * Copyright (c) 2021-2023, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ +#define AK_DONT_REPLACE_STD + #include #include #include +#include #include #include +#include #include #include +#include +#include +#include + namespace Locale { static bool is_key(StringView key) @@ -473,266 +481,36 @@ Optional parse_unicode_locale_id(StringView locale) return locale_id; } -static void perform_hard_coded_key_value_substitutions(StringView key, String& value) +String canonicalize_unicode_locale_id(StringView locale) { - // FIXME: In the XML export of CLDR, there are some aliases defined in the following files: - // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/calendar.xml - // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/collation.xml - // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/measure.xml - // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/timezone.xml - // https://github.com/unicode-org/cldr-staging/blob/master/production/common/bcp47/transform.xml - // - // There isn't yet a counterpart in the JSON export. See: https://unicode-org.atlassian.net/browse/CLDR-14571 - Optional result; + UErrorCode status = U_ZERO_ERROR; - if (key == "ca"sv) { - if (value == "islamicc"sv) - result = "islamic-civil"sv; - else if (value == "ethiopic-amete-alem"sv) - result = "ethioaa"sv; - } else if (key.is_one_of("kb"sv, "kc"sv, "kh"sv, "kk"sv, "kn"sv) && (value == "yes"sv)) { - result = "true"sv; - } else if (key == "ks"sv) { - if (value == "primary"sv) - result = "level1"sv; - else if (value == "tertiary"sv) - result = "level3"sv; - // Note: There are also aliases for "secondary", "quaternary", "quarternary", and "identical", - // but those are semantically incorrect values (they are too long), so they can be skipped. - } else if ((key == "m0"sv) && (value == "names"sv)) { - result = "prprname"sv; - } else if ((key == "ms"sv) && (value == "imperial"sv)) { - result = "uksystem"sv; - } else if (key == "tz"sv) { - // Formatter disabled because this block is easier to read / check against timezone.xml as one-liners. - // clang-format off - if (value == "aqams"sv) result = "nzakl"sv; - else if (value == "cnckg"sv) result = "cnsha"sv; - else if (value == "cnhrb"sv) result = "cnsha"sv; - else if (value == "cnkhg"sv) result = "cnurc"sv; - else if (value == "cuba"sv) result = "cuhav"sv; - else if (value == "egypt"sv) result = "egcai"sv; - else if (value == "eire"sv) result = "iedub"sv; - else if (value == "est"sv) result = "utcw05"sv; - else if (value == "gmt0"sv) result = "gmt"sv; - else if (value == "hongkong"sv) result = "hkhkg"sv; - else if (value == "hst"sv) result = "utcw10"sv; - else if (value == "iceland"sv) result = "isrey"sv; - else if (value == "iran"sv) result = "irthr"sv; - else if (value == "israel"sv) result = "jeruslm"sv; - else if (value == "jamaica"sv) result = "jmkin"sv; - else if (value == "japan"sv) result = "jptyo"sv; - else if (value == "kwajalein"sv) result = "mhkwa"sv; - else if (value == "libya"sv) result = "lytip"sv; - else if (value == "mst"sv) result = "utcw07"sv; - else if (value == "navajo"sv) result = "usden"sv; - else if (value == "poland"sv) result = "plwaw"sv; - else if (value == "portugal"sv) result = "ptlis"sv; - else if (value == "prc"sv) result = "cnsha"sv; - else if (value == "roc"sv) result = "twtpe"sv; - else if (value == "rok"sv) result = "krsel"sv; - else if (value == "singapore"sv) result = "sgsin"sv; - else if (value == "turkey"sv) result = "trist"sv; - else if (value == "uct"sv) result = "utc"sv; - else if (value == "usnavajo"sv) result = "usden"sv; - else if (value == "zulu"sv) result = "utc"sv; - // clang-format on - } + auto locale_data = LocaleData::for_locale(locale); + VERIFY(locale_data.has_value()); - if (result.has_value()) - value = MUST(String::from_utf8(*result)); + locale_data->locale().canonicalize(status); + VERIFY(icu_success(status)); + + return locale_data->to_string(); } -void canonicalize_unicode_extension_values(StringView key, String& value, bool remove_true) +void canonicalize_unicode_extension_values(StringView key, String& value, bool) { - value = MUST(value.to_lowercase()); - perform_hard_coded_key_value_substitutions(key, value); + UErrorCode status = U_ZERO_ERROR; - // Note: The spec says to remove "true" type and tfield values but that is believed to be a bug in the spec - // because, for tvalues, that would result in invalid syntax: - // https://unicode-org.atlassian.net/browse/CLDR-14318 - // This has also been noted by test262: - // https://github.com/tc39/test262/blob/18bb955771669541c56c28748603f6afdb2e25ff/test/intl402/Intl/getCanonicalLocales/transformed-ext-canonical.js - if (remove_true && (value == "true"sv)) { - value = {}; - return; - } + icu::LocaleBuilder builder; + builder.setUnicodeLocaleKeyword(icu_string_piece(key), icu_string_piece(value)); - if (key.is_one_of("sd"sv, "rg"sv)) { - if (auto alias = resolve_subdivision_alias(value); alias.has_value()) { - auto aliases = alias->split_view(' '); + auto locale = builder.build(status); + VERIFY(icu_success(status)); - // FIXME: Subdivision subtags do not appear in the CLDR likelySubtags.json file. - // Implement the spec's recommendation of using just the first alias for now, - // but we should determine if there's anything else needed here. - value = MUST(String::from_utf8(aliases[0])); - } - } -} + locale.canonicalize(status); + VERIFY(icu_success(status)); -static void transform_unicode_locale_id_to_canonical_syntax(LocaleID& locale_id) -{ - auto canonicalize_language = [&](LanguageID& language_id, bool force_lowercase) { - language_id.language = MUST(language_id.language->to_lowercase()); - if (language_id.script.has_value()) - language_id.script = MUST(language_id.script->to_titlecase()); - if (language_id.region.has_value()) - language_id.region = MUST(language_id.region->to_uppercase()); - for (auto& variant : language_id.variants) - variant = MUST(variant.to_lowercase()); + auto result = locale.getUnicodeKeywordValue(icu_string_piece(key), status); + VERIFY(icu_success(status)); - resolve_complex_language_aliases(language_id); - - if (auto alias = resolve_language_alias(*language_id.language); alias.has_value()) { - auto language_alias = parse_unicode_language_id(*alias); - VERIFY(language_alias.has_value()); - - language_id.language = move(language_alias->language); - if (!language_id.script.has_value() && language_alias->script.has_value()) - language_id.script = move(language_alias->script); - if (!language_id.region.has_value() && language_alias->region.has_value()) - language_id.region = move(language_alias->region); - if (language_id.variants.is_empty() && !language_alias->variants.is_empty()) - language_id.variants = move(language_alias->variants); - } - - if (language_id.script.has_value()) { - if (auto alias = resolve_script_tag_alias(*language_id.script); alias.has_value()) - language_id.script = MUST(String::from_utf8(*alias)); - } - - if (language_id.region.has_value()) { - if (auto alias = resolve_territory_alias(*language_id.region); alias.has_value()) - language_id.region = resolve_most_likely_territory_alias(language_id, *alias); - } - - quick_sort(language_id.variants); - - for (auto& variant : language_id.variants) { - variant = MUST(variant.to_lowercase()); - if (auto alias = resolve_variant_alias(variant); alias.has_value()) - variant = MUST(String::from_utf8(*alias)); - } - - if (force_lowercase) { - if (language_id.script.has_value()) - language_id.script = MUST(language_id.script->to_lowercase()); - if (language_id.region.has_value()) - language_id.region = MUST(language_id.region->to_lowercase()); - } - }; - - canonicalize_language(locale_id.language_id, false); - - quick_sort(locale_id.extensions, [](auto const& left, auto const& right) { - auto key = [](auto const& extension) { - return extension.visit( - [](LocaleExtension const&) { return 'u'; }, - [](TransformedExtension const&) { return 't'; }, - [](OtherExtension const& ext) { return static_cast(to_ascii_lowercase(ext.key)); }); - }; - - return key(left) < key(right); - }); - - for (auto& extension : locale_id.extensions) { - extension.visit( - [&](LocaleExtension& ext) { - for (auto& attribute : ext.attributes) - attribute = MUST(attribute.to_lowercase()); - - for (auto& keyword : ext.keywords) { - keyword.key = MUST(keyword.key.to_lowercase()); - canonicalize_unicode_extension_values(keyword.key, keyword.value, true); - } - - quick_sort(ext.attributes); - quick_sort(ext.keywords, [](auto const& a, auto const& b) { return a.key < b.key; }); - }, - [&](TransformedExtension& ext) { - if (ext.language.has_value()) - canonicalize_language(*ext.language, true); - - for (auto& field : ext.fields) { - field.key = MUST(field.key.to_lowercase()); - canonicalize_unicode_extension_values(field.key, field.value, false); - } - - quick_sort(ext.fields, [](auto const& a, auto const& b) { return a.key < b.key; }); - }, - [&](OtherExtension& ext) { - ext.key = static_cast(to_ascii_lowercase(ext.key)); - ext.value = MUST(ext.value.to_lowercase()); - }); - } - - for (auto& extension : locale_id.private_use_extensions) - extension = MUST(extension.to_lowercase()); -} - -Optional canonicalize_unicode_locale_id(LocaleID& locale_id) -{ - // https://unicode.org/reports/tr35/#Canonical_Unicode_Locale_Identifiers - StringBuilder builder; - - auto append_sep_and_string = [&](Optional const& string) { - if (!string.has_value() || string->is_empty()) - return; - builder.appendff("-{}", *string); - }; - - if (!locale_id.language_id.language.has_value()) - return {}; - - transform_unicode_locale_id_to_canonical_syntax(locale_id); - - builder.append(MUST(locale_id.language_id.language->to_lowercase())); - append_sep_and_string(locale_id.language_id.script); - append_sep_and_string(locale_id.language_id.region); - for (auto const& variant : locale_id.language_id.variants) - append_sep_and_string(variant); - - for (auto const& extension : locale_id.extensions) { - extension.visit( - [&](LocaleExtension const& ext) { - builder.append("-u"sv); - - for (auto const& attribute : ext.attributes) - append_sep_and_string(attribute); - for (auto const& keyword : ext.keywords) { - append_sep_and_string(keyword.key); - append_sep_and_string(keyword.value); - } - }, - [&](TransformedExtension const& ext) { - builder.append("-t"sv); - - if (ext.language.has_value()) { - append_sep_and_string(ext.language->language); - append_sep_and_string(ext.language->script); - append_sep_and_string(ext.language->region); - for (auto const& variant : ext.language->variants) - append_sep_and_string(variant); - } - - for (auto const& field : ext.fields) { - append_sep_and_string(field.key); - append_sep_and_string(field.value); - } - }, - [&](OtherExtension const& ext) { - builder.appendff("-{:c}", to_ascii_lowercase(ext.key)); - append_sep_and_string(ext.value); - }); - } - - if (!locale_id.private_use_extensions.is_empty()) { - builder.append("-x"sv); - for (auto const& extension : locale_id.private_use_extensions) - append_sep_and_string(extension); - } - - return MUST(builder.to_string()); + value = MUST(result.to_string()); } StringView default_locale() @@ -775,15 +553,9 @@ ReadonlySpan __attribute__((weak)) get_available_calendars() { retur ReadonlySpan __attribute__((weak)) get_available_collation_case_orderings() { return {}; } ReadonlySpan __attribute__((weak)) get_available_collation_numeric_orderings() { return {}; } ReadonlySpan __attribute__((weak)) get_available_collation_types() { return {}; } -ReadonlySpan __attribute__((weak)) get_available_currencies() { return {}; } ReadonlySpan __attribute__((weak)) get_available_hour_cycles() { return {}; } ReadonlySpan __attribute__((weak)) get_available_number_systems() { return {}; } Optional __attribute__((weak)) locale_from_string(StringView) { return {}; } -Optional __attribute__((weak)) language_from_string(StringView) { return {}; } -Optional __attribute__((weak)) territory_from_string(StringView) { return {}; } -Optional __attribute__((weak)) script_tag_from_string(StringView) { return {}; } -Optional __attribute__((weak)) currency_from_string(StringView) { return {}; } -Optional __attribute__((weak)) date_field_from_string(StringView) { return {}; } Optional __attribute__((weak)) list_pattern_type_from_string(StringView) { return {}; } Optional __attribute__((weak)) key_from_string(StringView) { return {}; } Optional __attribute__((weak)) keyword_ca_from_string(StringView) { return {}; } @@ -794,57 +566,33 @@ Optional __attribute__((weak)) keyword_kn_from_string(StringV Optional __attribute__((weak)) keyword_nu_from_string(StringView) { return {}; } Vector __attribute__((weak)) get_keywords_for_locale(StringView, StringView) { return {}; } Optional __attribute__((weak)) get_preferred_keyword_value_for_locale(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_display_patterns(StringView) { return {}; } -Optional __attribute__((weak)) get_locale_language_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_territory_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_script_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_long_currency_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_short_currency_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_narrow_currency_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_numeric_currency_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_calendar_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_long_date_field_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_short_date_field_mapping(StringView, StringView) { return {}; } -Optional __attribute__((weak)) get_locale_narrow_date_field_mapping(StringView, StringView) { return {}; } -// https://www.unicode.org/reports/tr35/tr35-39/tr35-general.html#Display_Name_Elements -Optional format_locale_for_display(StringView locale, LocaleID locale_id) +Vector available_currencies() { - auto language_id = move(locale_id.language_id); - VERIFY(language_id.language.has_value()); + UErrorCode status = U_ZERO_ERROR; - auto patterns = get_locale_display_patterns(locale); - if (!patterns.has_value()) + auto* currencies = ucurr_openISOCurrencies(UCURR_ALL, &status); + ScopeGuard guard { [&]() { uenum_close(currencies); } }; + + if (icu_failure(status)) return {}; - auto primary_tag = get_locale_language_mapping(locale, *language_id.language).value_or(*language_id.language); - Optional script; - Optional region; + Vector result; - if (language_id.script.has_value()) - script = get_locale_script_mapping(locale, *language_id.script).value_or(*language_id.script); - if (language_id.region.has_value()) - region = get_locale_territory_mapping(locale, *language_id.region).value_or(*language_id.region); + while (true) { + i32 length = 0; + char const* next = uenum_next(currencies, &length, &status); - Optional secondary_tag; + if (icu_failure(status)) + return {}; + if (next == nullptr) + break; - if (script.has_value() && region.has_value()) { - secondary_tag = MUST(String::from_utf8(patterns->locale_separator)); - secondary_tag = MUST(secondary_tag->replace("{0}"sv, *script, ReplaceMode::FirstOnly)); - secondary_tag = MUST(secondary_tag->replace("{1}"sv, *region, ReplaceMode::FirstOnly)); - } else if (script.has_value()) { - secondary_tag = MUST(String::from_utf8(*script)); - } else if (region.has_value()) { - secondary_tag = MUST(String::from_utf8(*region)); + // https://unicode-org.atlassian.net/browse/ICU-21687 + if (StringView currency { next, static_cast(length) }; currency != "LSM"sv) + result.append(MUST(String::from_utf8(currency))); } - if (!secondary_tag.has_value()) - return MUST(String::from_utf8(primary_tag)); - - auto result = MUST(String::from_utf8(patterns->locale_pattern)); - result = MUST(result.replace("{0}"sv, primary_tag, ReplaceMode::FirstOnly)); - result = MUST(result.replace("{1}"sv, *secondary_tag, ReplaceMode::FirstOnly)); - return result; } @@ -852,12 +600,6 @@ Optional __attribute__((weak)) get_locale_list_patterns(StringView Optional __attribute__((weak)) character_order_from_string(StringView) { return {}; } StringView __attribute__((weak)) character_order_to_string(CharacterOrder) { return {}; } Optional __attribute__((weak)) character_order_for_locale(StringView) { return {}; } -Optional __attribute__((weak)) resolve_language_alias(StringView) { return {}; } -Optional __attribute__((weak)) resolve_territory_alias(StringView) { return {}; } -Optional __attribute__((weak)) resolve_script_tag_alias(StringView) { return {}; } -Optional __attribute__((weak)) resolve_variant_alias(StringView) { return {}; } -Optional __attribute__((weak)) resolve_subdivision_alias(StringView) { return {}; } -void __attribute__((weak)) resolve_complex_language_aliases(LanguageID&) { } Optional __attribute__((weak)) add_likely_subtags(LanguageID const&) { return {}; } Optional remove_likely_subtags(LanguageID const& language_id) @@ -902,21 +644,6 @@ Optional remove_likely_subtags(LanguageID const& language_id) return return_language_and_variants(maximized.release_value(), move(variants)); } -Optional __attribute__((weak)) resolve_most_likely_territory(LanguageID const&) { return {}; } - -String resolve_most_likely_territory_alias(LanguageID const& language_id, StringView territory_alias) -{ - auto aliases = territory_alias.split_view(' '); - - if (aliases.size() > 1) { - auto territory = resolve_most_likely_territory(language_id); - if (territory.has_value() && aliases.contains_slow(*territory)) - return territory.release_value(); - } - - return MUST(String::from_utf8(aliases[0])); -} - String LanguageID::to_string() const { StringBuilder builder; diff --git a/Userland/Libraries/LibLocale/Locale.h b/Userland/Libraries/LibLocale/Locale.h index ef0b445e98e..bc2ff37d736 100644 --- a/Userland/Libraries/LibLocale/Locale.h +++ b/Userland/Libraries/LibLocale/Locale.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, Tim Flynn + * Copyright (c) 2021-2024, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ @@ -139,8 +139,8 @@ bool is_type_identifier(StringView); Optional parse_unicode_language_id(StringView); Optional parse_unicode_locale_id(StringView); +String canonicalize_unicode_locale_id(StringView); void canonicalize_unicode_extension_values(StringView key, String& value, bool remove_true); -Optional canonicalize_unicode_locale_id(LocaleID&); StringView default_locale(); bool is_locale_available(StringView locale); @@ -150,19 +150,15 @@ ReadonlySpan get_available_calendars(); ReadonlySpan get_available_collation_case_orderings(); ReadonlySpan get_available_collation_numeric_orderings(); ReadonlySpan get_available_collation_types(); -ReadonlySpan get_available_currencies(); ReadonlySpan get_available_hour_cycles(); ReadonlySpan get_available_number_systems(); +Vector available_currencies(); + Style style_from_string(StringView style); StringView style_to_string(Style style); Optional locale_from_string(StringView locale); -Optional language_from_string(StringView language); -Optional territory_from_string(StringView territory); -Optional script_tag_from_string(StringView script_tag); -Optional currency_from_string(StringView currency); -Optional date_field_from_string(StringView calendar); Optional list_pattern_type_from_string(StringView list_pattern_type); Optional key_from_string(StringView key); @@ -175,38 +171,13 @@ Optional keyword_nu_from_string(StringView nu); Vector get_keywords_for_locale(StringView locale, StringView key); Optional get_preferred_keyword_value_for_locale(StringView locale, StringView key); -Optional get_locale_display_patterns(StringView locale); -Optional format_locale_for_display(StringView locale, LocaleID locale_id); - -Optional get_locale_language_mapping(StringView locale, StringView language); -Optional get_locale_territory_mapping(StringView locale, StringView territory); -Optional get_locale_script_mapping(StringView locale, StringView script); -Optional get_locale_long_currency_mapping(StringView locale, StringView currency); -Optional get_locale_short_currency_mapping(StringView locale, StringView currency); -Optional get_locale_narrow_currency_mapping(StringView locale, StringView currency); -Optional get_locale_numeric_currency_mapping(StringView locale, StringView currency); -Optional get_locale_calendar_mapping(StringView locale, StringView calendar); -Optional get_locale_long_date_field_mapping(StringView locale, StringView date_field); -Optional get_locale_short_date_field_mapping(StringView locale, StringView date_field); -Optional get_locale_narrow_date_field_mapping(StringView locale, StringView date_field); - Optional get_locale_list_patterns(StringView locale, StringView type, Style style); Optional character_order_from_string(StringView character_order); StringView character_order_to_string(CharacterOrder character_order); Optional character_order_for_locale(StringView locale); -Optional resolve_language_alias(StringView language); -Optional resolve_territory_alias(StringView territory); -Optional resolve_script_tag_alias(StringView script_tag); -Optional resolve_variant_alias(StringView variant); -Optional resolve_subdivision_alias(StringView subdivision); -void resolve_complex_language_aliases(LanguageID& language_id); - Optional add_likely_subtags(LanguageID const& language_id); Optional remove_likely_subtags(LanguageID const& language_id); -Optional resolve_most_likely_territory(LanguageID const& language_id); -String resolve_most_likely_territory_alias(LanguageID const& language_id, StringView territory_alias); - }