From 12e7c0808a1826e236bb8d0aab1e1c72edf37cb5 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 6 Jul 2022 08:21:32 -0400 Subject: [PATCH] LibUnicode: Generate per-region week data This includes: * The minimum number of days in a week for that week to count as the first week of a new year. * The day to be shown as the first day of the week in a calendar. * The start/end days of the weekend. Like the existing hour cycle data, week data is presented per-region in the CLDR, rather than per-locale. The method to add likely subtags to a locale to perform region lookups is the same. The list of regions in the CLDR for hour cycle, minimum days, first day, and weekend days are quite different. So rather than changing the existing HourCycleRegion enum to a generic Region enum, we generate separate enums for each of the week data fields. This allows each lookup into these fields to remain simple array-based index access, without any "jumps" for regions that don't have CLDR data for a field. --- .../GenerateUnicodeDateTimeFormat.cpp | 117 ++++++++++++++++++ .../Libraries/LibUnicode/DateTimeFormat.cpp | 65 ++++++++-- .../Libraries/LibUnicode/DateTimeFormat.h | 16 +++ Userland/Libraries/LibUnicode/Forward.h | 4 + 4 files changed, 192 insertions(+), 10 deletions(-) diff --git a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp index ab30d47faaa..5a1476462ec 100644 --- a/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/LibUnicode/GenerateUnicodeDateTimeFormat.cpp @@ -7,6 +7,7 @@ #include "GeneratorUtil.h" #include #include +#include #include #include #include @@ -565,6 +566,18 @@ struct UnicodeLocaleData { HashMap hour_cycles; Vector hour_cycle_regions; + HashMap minimum_days; + Vector minimum_days_regions; + + HashMap first_day; + Vector first_day_regions; + + HashMap weekend_start; + Vector weekend_start_regions; + + HashMap weekend_end; + Vector weekend_end_regions; + HashMap> meta_zones; Vector time_zones { "UTC"sv }; @@ -640,6 +653,71 @@ static ErrorOr parse_hour_cycles(String core_path, UnicodeLocaleData& loca return {}; } +static ErrorOr parse_week_data(String core_path, UnicodeLocaleData& locale_data) +{ + // https://unicode.org/reports/tr35/tr35-dates.html#Week_Data + LexicalPath week_data_path(move(core_path)); + week_data_path = week_data_path.append("supplemental"sv); + week_data_path = week_data_path.append("weekData.json"sv); + + auto week_data = TRY(read_json_file(week_data_path.string())); + auto const& supplemental_object = week_data.as_object().get("supplemental"sv); + auto const& week_data_object = supplemental_object.as_object().get("weekData"sv); + + auto parse_weekday = [](StringView day) -> Unicode::Weekday { + if (day == "sun"sv) + return Unicode::Weekday::Sunday; + if (day == "mon"sv) + return Unicode::Weekday::Monday; + if (day == "tue"sv) + return Unicode::Weekday::Tuesday; + if (day == "wed"sv) + return Unicode::Weekday::Wednesday; + if (day == "thu"sv) + return Unicode::Weekday::Thursday; + if (day == "fri"sv) + return Unicode::Weekday::Friday; + if (day == "sat"sv) + return Unicode::Weekday::Saturday; + VERIFY_NOT_REACHED(); + }; + + auto parse_regional_weekdays = [&](auto const& region, auto const& weekday, auto& weekdays_map, auto& weekday_regions) { + if (region.ends_with("alt-variant"sv)) + return; + + weekdays_map.set(region, parse_weekday(weekday)); + + if (!weekday_regions.contains_slow(region)) + weekday_regions.append(region); + }; + + auto const& minimum_days_object = week_data_object.as_object().get("minDays"sv); + auto const& first_day_object = week_data_object.as_object().get("firstDay"sv); + auto const& weekend_start_object = week_data_object.as_object().get("weekendStart"sv); + auto const& weekend_end_object = week_data_object.as_object().get("weekendEnd"sv); + + minimum_days_object.as_object().for_each_member([&](auto const& region, auto const& value) { + auto minimum_days = value.as_string().template to_uint(); + locale_data.minimum_days.set(region, *minimum_days); + + if (!locale_data.minimum_days_regions.contains_slow(region)) + locale_data.minimum_days_regions.append(region); + }); + + first_day_object.as_object().for_each_member([&](auto const& region, auto const& value) { + parse_regional_weekdays(region, value.as_string(), locale_data.first_day, locale_data.first_day_regions); + }); + weekend_start_object.as_object().for_each_member([&](auto const& region, auto const& value) { + parse_regional_weekdays(region, value.as_string(), locale_data.weekend_start, locale_data.weekend_start_regions); + }); + weekend_end_object.as_object().for_each_member([&](auto const& region, auto const& value) { + parse_regional_weekdays(region, value.as_string(), locale_data.weekend_end, locale_data.weekend_end_regions); + }); + + return {}; +} + static ErrorOr parse_meta_zones(String core_path, UnicodeLocaleData& locale_data) { // https://unicode.org/reports/tr35/tr35-dates.html#Metazones @@ -1568,6 +1646,7 @@ static ErrorOr parse_day_periods(String core_path, UnicodeLocaleData& loca static ErrorOr parse_all_locales(String core_path, String dates_path, UnicodeLocaleData& locale_data) { TRY(parse_hour_cycles(core_path, locale_data)); + TRY(parse_week_data(core_path, locale_data)); TRY(parse_meta_zones(core_path, locale_data)); auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); @@ -1631,6 +1710,10 @@ namespace Unicode { generate_enum(generator, format_identifier, "Calendar"sv, {}, locale_data.calendars); generate_enum(generator, format_identifier, "HourCycleRegion"sv, {}, locale_data.hour_cycle_regions); + generate_enum(generator, format_identifier, "MinimumDaysRegion"sv, {}, locale_data.minimum_days_regions); + generate_enum(generator, format_identifier, "FirstDayRegion"sv, {}, locale_data.first_day_regions); + generate_enum(generator, format_identifier, "WeekendStartRegion"sv, {}, locale_data.weekend_start_regions); + generate_enum(generator, format_identifier, "WeekendEndRegion"sv, {}, locale_data.weekend_end_regions); generator.append(R"~~~( } @@ -1912,6 +1995,10 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~"); append_mapping(locales, locale_data.locales, s_time_zone_format_index_type, "s_locale_time_zone_formats"sv, [](auto const& locale) { return locale.time_zone_formats; }); append_mapping(locales, locale_data.locales, s_day_period_index_type, "s_locale_day_periods"sv, [](auto const& locale) { return locale.day_periods; }); append_mapping(locale_data.hour_cycle_regions, locale_data.hour_cycles, s_hour_cycle_list_index_type, "s_hour_cycles"sv, [](auto const& hour_cycles) { return hour_cycles; }); + append_mapping(locale_data.minimum_days_regions, locale_data.minimum_days, "u8"sv, "s_minimum_days"sv, [](auto minimum_days) { return minimum_days; }); + append_mapping(locale_data.first_day_regions, locale_data.first_day, "u8"sv, "s_first_day"sv, [](auto first_day) { return to_underlying(first_day); }); + append_mapping(locale_data.weekend_start_regions, locale_data.weekend_start, "u8"sv, "s_weekend_start"sv, [](auto weekend_start) { return to_underlying(weekend_start); }); + append_mapping(locale_data.weekend_end_regions, locale_data.weekend_end, "u8"sv, "s_weekend_end"sv, [](auto weekend_end) { return to_underlying(weekend_end); }); generator.append("\n"); auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector const& aliases = {}) { @@ -1927,6 +2014,10 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~"); }; append_from_string("HourCycleRegion"sv, "hour_cycle_region"sv, locale_data.hour_cycle_regions); + append_from_string("MinimumDaysRegion"sv, "minimum_days_region"sv, locale_data.minimum_days_regions); + append_from_string("FirstDayRegion"sv, "first_day_region"sv, locale_data.first_day_regions); + append_from_string("WeekendStartRegion"sv, "weekend_start_region"sv, locale_data.weekend_start_regions); + append_from_string("WeekendEndRegion"sv, "weekend_end_region"sv, locale_data.weekend_end_regions); generator.append(R"~~~( static Optional keyword_to_calendar(KeywordCalendar keyword) @@ -1965,7 +2056,33 @@ Vector get_regional_hour_cycles(StringView region) return hour_cycles; } +)~~~"); + auto append_regional_lookup = [&](StringView return_type, StringView lookup_type) { + generator.set("return_type", return_type); + generator.set("lookup_type", lookup_type); + + generator.append(R"~~~( +Optional<@return_type@> get_regional_@lookup_type@(StringView region) +{ + auto region_value = @lookup_type@_region_from_string(region); + if (!region_value.has_value()) + return {}; + + auto region_index = to_underlying(*region_value); + auto @lookup_type@ = s_@lookup_type@.at(region_index); + + return static_cast<@return_type@>(@lookup_type@); +} +)~~~"); + }; + + append_regional_lookup("u8"sv, "minimum_days"sv); + append_regional_lookup("Unicode::Weekday"sv, "first_day"sv); + append_regional_lookup("Unicode::Weekday"sv, "weekend_start"sv); + append_regional_lookup("Unicode::Weekday"sv, "weekend_end"sv); + + generator.append(R"~~~( static CalendarData const* find_calendar_data(StringView locale, StringView calendar) { auto locale_value = locale_from_string(locale); diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.cpp b/Userland/Libraries/LibUnicode/DateTimeFormat.cpp index 5e2bfe5b423..8e96350396f 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.cpp +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.cpp @@ -94,27 +94,40 @@ StringView calendar_pattern_style_to_string(CalendarPatternStyle style) Optional __attribute__((weak)) hour_cycle_region_from_string(StringView) { return {}; } Vector __attribute__((weak)) get_regional_hour_cycles(StringView) { return {}; } -// https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table -Vector get_locale_hour_cycles(StringView locale) +template +static auto find_regional_values_for_locale(StringView locale, GetRegionalValues&& get_regional_values) { - if (auto hour_cycles = get_regional_hour_cycles(locale); !hour_cycles.is_empty()) - return hour_cycles; + auto has_value = [](auto const& container) { + if constexpr (requires { container.has_value(); }) + return container.has_value(); + else + return !container.is_empty(); + }; - auto return_default_hour_cycles = [&]() { return get_regional_hour_cycles("001"sv); }; + if (auto regional_values = get_regional_values(locale); has_value(regional_values)) + return regional_values; + + auto return_default_values = [&]() { return get_regional_values("001"sv); }; auto language = parse_unicode_language_id(locale); if (!language.has_value()) - return return_default_hour_cycles(); + return return_default_values(); if (!language->region.has_value()) language = add_likely_subtags(*language); if (!language.has_value() || !language->region.has_value()) - return return_default_hour_cycles(); + return return_default_values(); - if (auto hour_cycles = get_regional_hour_cycles(*language->region); !hour_cycles.is_empty()) - return hour_cycles; + if (auto regional_values = get_regional_values(*language->region); has_value(regional_values)) + return regional_values; - return return_default_hour_cycles(); + return return_default_values(); +} + +// https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table +Vector get_locale_hour_cycles(StringView locale) +{ + return find_regional_values_for_locale(locale, get_regional_hour_cycles); } Optional get_default_regional_hour_cycle(StringView locale) @@ -124,6 +137,38 @@ Optional get_default_regional_hour_cycle(StringView locale) return {}; } +Optional __attribute__((weak)) minimum_days_region_from_string(StringView) { return {}; } +Optional __attribute__((weak)) get_regional_minimum_days(StringView) { return {}; } + +Optional get_locale_minimum_days(StringView locale) +{ + return find_regional_values_for_locale(locale, get_regional_minimum_days); +} + +Optional __attribute__((weak)) first_day_region_from_string(StringView) { return {}; } +Optional __attribute__((weak)) get_regional_first_day(StringView) { return {}; } + +Optional get_locale_first_day(StringView locale) +{ + return find_regional_values_for_locale(locale, get_regional_first_day); +} + +Optional __attribute__((weak)) weekend_start_region_from_string(StringView) { return {}; } +Optional __attribute__((weak)) get_regional_weekend_start(StringView) { return {}; } + +Optional get_locale_weekend_start(StringView locale) +{ + return find_regional_values_for_locale(locale, get_regional_weekend_start); +} + +Optional __attribute__((weak)) weekend_end_region_from_string(StringView) { return {}; } +Optional __attribute__((weak)) get_regional_weekend_end(StringView) { return {}; } + +Optional get_locale_weekend_end(StringView locale) +{ + return find_regional_values_for_locale(locale, get_regional_weekend_end); +} + String combine_skeletons(StringView first, StringView second) { // https://unicode.org/reports/tr35/tr35-dates.html#availableFormats_appendItems diff --git a/Userland/Libraries/LibUnicode/DateTimeFormat.h b/Userland/Libraries/LibUnicode/DateTimeFormat.h index 7ab2b46bcae..46bc0344467 100644 --- a/Userland/Libraries/LibUnicode/DateTimeFormat.h +++ b/Userland/Libraries/LibUnicode/DateTimeFormat.h @@ -191,6 +191,22 @@ Vector get_regional_hour_cycles(StringView region); Vector get_locale_hour_cycles(StringView locale); Optional get_default_regional_hour_cycle(StringView locale); +Optional minimum_days_region_from_string(StringView minimum_days_region); +Optional get_regional_minimum_days(StringView region); +Optional get_locale_minimum_days(StringView region); + +Optional first_day_region_from_string(StringView first_day_region); +Optional get_regional_first_day(StringView region); +Optional get_locale_first_day(StringView region); + +Optional weekend_start_region_from_string(StringView weekend_start_region); +Optional get_regional_weekend_start(StringView region); +Optional get_locale_weekend_start(StringView region); + +Optional weekend_end_region_from_string(StringView weekend_end_region); +Optional get_regional_weekend_end(StringView region); +Optional get_locale_weekend_end(StringView region); + String combine_skeletons(StringView first, StringView second); Optional get_calendar_date_format(StringView locale, StringView calendar); diff --git a/Userland/Libraries/LibUnicode/Forward.h b/Userland/Libraries/LibUnicode/Forward.h index 38bf202e685..4edea7b0f54 100644 --- a/Userland/Libraries/LibUnicode/Forward.h +++ b/Userland/Libraries/LibUnicode/Forward.h @@ -21,6 +21,7 @@ enum class Currency : u16; enum class DateField : u8; enum class DayPeriod : u8; enum class Era : u8; +enum class FirstDayRegion : u8; enum class GeneralCategory : u8; enum class GraphemeBreakProperty : u8; enum class HourCycle : u8; @@ -34,6 +35,7 @@ enum class Language : u16; enum class ListPatternStyle : u8; enum class ListPatternType : u8; enum class Locale : u16; +enum class MinimumDaysRegion : u8; enum class Month : u8; enum class NumericSymbol : u8; enum class Property : u8; @@ -44,6 +46,8 @@ enum class StandardNumberFormatType : u8; enum class Style : u8; enum class Territory : u8; enum class Weekday : u8; +enum class WeekendEndRegion : u8; +enum class WeekendStartRegion : u8; enum class WordBreakProperty : u8; struct CalendarFormat;