mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-20 19:45:12 +00:00
LibUnicode: Parse and generate time zone names in long and short form
This commit is contained in:
parent
2bbf8aa24c
commit
b76e44f66f
Notes:
sideshowbarker
2024-07-17 23:05:10 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/b76e44f66f2 Pull-request: https://github.com/SerenityOS/serenity/pull/11184
4 changed files with 208 additions and 2 deletions
|
@ -144,8 +144,15 @@ struct Calendar {
|
|||
HashMap<String, CalendarSymbols> symbols {};
|
||||
};
|
||||
|
||||
struct TimeZone {
|
||||
StringIndexType time_zone { 0 };
|
||||
StringIndexType long_name { 0 };
|
||||
StringIndexType short_name { 0 };
|
||||
};
|
||||
|
||||
struct Locale {
|
||||
HashMap<String, Calendar> calendars;
|
||||
HashMap<String, TimeZone> time_zones;
|
||||
};
|
||||
|
||||
struct UnicodeLocaleData {
|
||||
|
@ -156,6 +163,9 @@ struct UnicodeLocaleData {
|
|||
HashMap<String, Vector<Unicode::HourCycle>> hour_cycles;
|
||||
Vector<String> hour_cycle_regions;
|
||||
|
||||
HashMap<String, StringIndexType> meta_zones;
|
||||
Vector<String> time_zones;
|
||||
|
||||
Vector<String> calendars;
|
||||
Vector<Alias> calendar_aliases {
|
||||
// FIXME: Aliases should come from BCP47. See: https://unicode-org.atlassian.net/browse/CLDR-15158
|
||||
|
@ -209,6 +219,35 @@ static ErrorOr<void> parse_hour_cycles(String core_path, UnicodeLocaleData& loca
|
|||
return {};
|
||||
};
|
||||
|
||||
static ErrorOr<void> parse_meta_zones(String core_path, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
// https://unicode.org/reports/tr35/tr35-dates.html#Metazones
|
||||
LexicalPath meta_zone_path(move(core_path));
|
||||
meta_zone_path = meta_zone_path.append("supplemental"sv);
|
||||
meta_zone_path = meta_zone_path.append("metaZones.json"sv);
|
||||
|
||||
auto meta_zone_file = TRY(Core::File::open(meta_zone_path.string(), Core::OpenMode::ReadOnly));
|
||||
auto meta_zone = TRY(JsonValue::from_string(meta_zone_file->read_all()));
|
||||
|
||||
auto const& supplemental_object = meta_zone.as_object().get("supplemental"sv);
|
||||
auto const& meta_zone_object = supplemental_object.as_object().get("metaZones"sv);
|
||||
auto const& meta_zone_array = meta_zone_object.as_object().get("metazones"sv);
|
||||
|
||||
meta_zone_array.as_array().for_each([&](JsonValue const& value) {
|
||||
auto const& mapping = value.as_object().get("mapZone"sv);
|
||||
auto const& meta_zone = mapping.as_object().get("_other"sv);
|
||||
auto const& golden_zone = mapping.as_object().get("_type"sv);
|
||||
|
||||
auto golden_zone_index = locale_data.unique_strings.ensure(golden_zone.as_string());
|
||||
locale_data.meta_zones.set(meta_zone.as_string(), golden_zone_index);
|
||||
});
|
||||
|
||||
// UTC does not appear in metaZones.json. Define it for convenience so other parsers don't need to check for its existence.
|
||||
locale_data.meta_zones.set("UTC"sv, locale_data.unique_strings.ensure("UTC"sv));
|
||||
|
||||
return {};
|
||||
};
|
||||
|
||||
static constexpr auto is_char(char ch)
|
||||
{
|
||||
return [ch](auto c) { return c == ch; };
|
||||
|
@ -766,9 +805,74 @@ static ErrorOr<void> parse_calendars(String locale_calendars_path, UnicodeLocale
|
|||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> parse_time_zone_names(String locale_time_zone_names_path, UnicodeLocaleData& locale_data, Locale& locale)
|
||||
{
|
||||
LexicalPath time_zone_names_path(move(locale_time_zone_names_path));
|
||||
time_zone_names_path = time_zone_names_path.append("timeZoneNames.json"sv);
|
||||
|
||||
auto time_zone_names_file = TRY(Core::File::open(time_zone_names_path.string(), Core::OpenMode::ReadOnly));
|
||||
auto time_zone_names = TRY(JsonValue::from_string(time_zone_names_file->read_all()));
|
||||
|
||||
auto const& main_object = time_zone_names.as_object().get("main"sv);
|
||||
auto const& locale_object = main_object.as_object().get(time_zone_names_path.parent().basename());
|
||||
auto const& dates_object = locale_object.as_object().get("dates"sv);
|
||||
auto const& time_zone_names_object = dates_object.as_object().get("timeZoneNames"sv);
|
||||
auto const& meta_zone_object = time_zone_names_object.as_object().get("metazone"sv);
|
||||
|
||||
if (meta_zone_object.is_null())
|
||||
return {};
|
||||
|
||||
auto parse_name = [&](StringView type, JsonObject const& meta_zone_object) -> Optional<StringIndexType> {
|
||||
auto const& names = meta_zone_object.get(type);
|
||||
if (!names.is_object())
|
||||
return {};
|
||||
|
||||
auto const& daylight = names.as_object().get("daylight"sv);
|
||||
if (daylight.is_string())
|
||||
return locale_data.unique_strings.ensure(daylight.as_string());
|
||||
|
||||
auto const& standard = names.as_object().get("standard"sv);
|
||||
if (standard.is_string())
|
||||
return locale_data.unique_strings.ensure(standard.as_string());
|
||||
|
||||
return {};
|
||||
};
|
||||
|
||||
auto parse_time_zone = [&](StringView meta_zone, JsonObject const& meta_zone_object) {
|
||||
auto golden_zone = locale_data.meta_zones.get(meta_zone).value();
|
||||
TimeZone time_zone { .time_zone = golden_zone };
|
||||
|
||||
if (auto long_name = parse_name("long"sv, meta_zone_object); long_name.has_value())
|
||||
time_zone.long_name = long_name.value();
|
||||
if (auto short_name = parse_name("short"sv, meta_zone_object); short_name.has_value())
|
||||
time_zone.short_name = short_name.value();
|
||||
|
||||
auto const& time_zone_name = locale_data.unique_strings.get(golden_zone);
|
||||
|
||||
if (!locale_data.time_zones.contains_slow(time_zone_name))
|
||||
locale_data.time_zones.append(time_zone_name);
|
||||
|
||||
locale.time_zones.set(time_zone_name, move(time_zone));
|
||||
};
|
||||
|
||||
meta_zone_object.as_object().for_each_member([&](auto const& meta_zone, JsonValue const& value) {
|
||||
parse_time_zone(meta_zone, value.as_object());
|
||||
});
|
||||
|
||||
// The long and short names for UTC are not under the "timeZoneNames/metazone" object, but are under "timeZoneNames/zone/Etc".
|
||||
auto const& zone_object = time_zone_names_object.as_object().get("zone"sv);
|
||||
auto const& etc_object = zone_object.as_object().get("Etc"sv);
|
||||
auto const& utc_object = etc_object.as_object().get("UTC"sv);
|
||||
parse_time_zone("UTC"sv, utc_object.as_object());
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
static ErrorOr<void> parse_all_locales(String core_path, String dates_path, UnicodeLocaleData& locale_data)
|
||||
{
|
||||
TRY(parse_hour_cycles(move(core_path), locale_data));
|
||||
TRY(parse_hour_cycles(core_path, locale_data));
|
||||
TRY(parse_meta_zones(move(core_path), locale_data));
|
||||
|
||||
auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
|
||||
|
||||
auto remove_variants_from_path = [&](String path) -> ErrorOr<String> {
|
||||
|
@ -795,6 +899,8 @@ static ErrorOr<void> parse_all_locales(String core_path, String dates_path, Unic
|
|||
auto calendars_path = TRY(next_path_from_dir_iterator(calendars_iterator));
|
||||
TRY(parse_calendars(move(calendars_path), locale_data, locale));
|
||||
}
|
||||
|
||||
TRY(parse_time_zone_names(move(dates_path), locale_data, locale));
|
||||
}
|
||||
|
||||
return {};
|
||||
|
@ -803,6 +909,7 @@ static ErrorOr<void> parse_all_locales(String core_path, String dates_path, Unic
|
|||
static String format_identifier(StringView owner, String identifier)
|
||||
{
|
||||
identifier = identifier.replace("-"sv, "_"sv, true);
|
||||
identifier = identifier.replace("/"sv, "_"sv, true);
|
||||
|
||||
if (all_of(identifier, is_ascii_digit))
|
||||
return String::formatted("{}_{}", owner[0], identifier);
|
||||
|
@ -829,6 +936,7 @@ namespace Unicode {
|
|||
generate_enum(generator, format_identifier, "Calendar"sv, {}, locale_data.calendars, locale_data.calendar_aliases);
|
||||
generate_enum(generator, format_identifier, "HourCycleRegion"sv, {}, locale_data.hour_cycle_regions);
|
||||
generate_enum(generator, format_identifier, "CalendarSymbol"sv, {}, locale_data.symbols);
|
||||
generate_enum(generator, format_identifier, "TimeZone"sv, {}, locale_data.time_zones);
|
||||
|
||||
generator.append(R"~~~(
|
||||
namespace Detail {
|
||||
|
@ -848,6 +956,9 @@ Optional<StringView> get_calendar_month_symbol(StringView locale, StringView cal
|
|||
Optional<StringView> get_calendar_weekday_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::Weekday value);
|
||||
Optional<StringView> get_calendar_day_period_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::DayPeriod value);
|
||||
|
||||
Optional<TimeZone> time_zone_from_string(StringView time_zone);
|
||||
Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style);
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -959,6 +1070,12 @@ struct CalendarData {
|
|||
Span<@calendar_pattern_index_type@ const> available_formats {};
|
||||
Array<Span<CalendarSymbols const>, @calendar_symbols_size@> symbols {};
|
||||
};
|
||||
|
||||
struct TimeZoneData {
|
||||
@string_index_type@ time_zone { 0 };
|
||||
@string_index_type@ long_name { 0 };
|
||||
@string_index_type@ short_name { 0 };
|
||||
};
|
||||
)~~~");
|
||||
|
||||
auto append_calendar_format = [&](auto const& calendar_format) {
|
||||
|
@ -1079,6 +1196,42 @@ static constexpr Array<CalendarData, @size@> @name@ { {)~~~");
|
|||
)~~~");
|
||||
};
|
||||
|
||||
auto append_time_zones = [&](String name, auto const& time_zones) {
|
||||
generator.set("name", name);
|
||||
generator.set("size", String::number(locale_data.time_zones.size()));
|
||||
|
||||
generator.append(R"~~~(
|
||||
static constexpr Array<TimeZoneData, @size@> @name@ { {)~~~");
|
||||
|
||||
constexpr size_t max_values_per_row = 20;
|
||||
size_t values_in_current_row = 0;
|
||||
|
||||
for (auto const& time_zone_key : locale_data.time_zones) {
|
||||
auto time_zone = time_zones.find(time_zone_key);
|
||||
|
||||
if (values_in_current_row++ > 0)
|
||||
generator.append(" ");
|
||||
|
||||
if (time_zone == time_zones.end()) {
|
||||
generator.append("{},");
|
||||
} else {
|
||||
generator.set("time_zone", String::number(time_zone->value.time_zone));
|
||||
generator.set("long_name", String::number(time_zone->value.long_name));
|
||||
generator.set("short_name", String::number(time_zone->value.short_name));
|
||||
generator.append("{ @time_zone@, @long_name@, @short_name@ },");
|
||||
}
|
||||
|
||||
if (values_in_current_row == max_values_per_row) {
|
||||
values_in_current_row = 0;
|
||||
generator.append("\n ");
|
||||
}
|
||||
}
|
||||
|
||||
generator.append(R"~~~(
|
||||
} };
|
||||
)~~~");
|
||||
};
|
||||
|
||||
auto append_hour_cycles = [&](String name, auto const& hour_cycle_region) {
|
||||
auto const& hour_cycles = locale_data.hour_cycles.find(hour_cycle_region)->value;
|
||||
|
||||
|
@ -1097,6 +1250,7 @@ static constexpr Array<u8, @size@> @name@ { { )~~~");
|
|||
};
|
||||
|
||||
generate_mapping(generator, locale_data.locales, "CalendarData"sv, "s_calendars"sv, "s_calendars_{}", [&](auto const& name, auto const& value) { append_calendars(name, value.calendars); });
|
||||
generate_mapping(generator, locale_data.locales, "TimeZoneData"sv, "s_time_zones"sv, "s_time_zones_{}", [&](auto const& name, auto const& value) { append_time_zones(name, value.time_zones); });
|
||||
generate_mapping(generator, locale_data.hour_cycle_regions, "u8"sv, "s_hour_cycles"sv, "s_hour_cycles_{}", [&](auto const& name, auto const& value) { append_hour_cycles(name, value); });
|
||||
|
||||
auto append_from_string = [&](StringView enum_title, StringView enum_snake, auto const& values, Vector<Alias> const& aliases = {}) {
|
||||
|
@ -1113,6 +1267,7 @@ static constexpr Array<u8, @size@> @name@ { { )~~~");
|
|||
|
||||
append_from_string("Calendar"sv, "calendar"sv, locale_data.calendars, locale_data.calendar_aliases);
|
||||
append_from_string("HourCycleRegion"sv, "hour_cycle_region"sv, locale_data.hour_cycle_regions);
|
||||
append_from_string("TimeZone"sv, "time_zone"sv, locale_data.time_zones);
|
||||
|
||||
generator.append(R"~~~(
|
||||
Vector<Unicode::HourCycle> get_regional_hour_cycles(StringView region)
|
||||
|
@ -1240,6 +1395,47 @@ Optional<StringView> get_calendar_day_period_symbol(StringView locale, StringVie
|
|||
return {};
|
||||
}
|
||||
|
||||
static TimeZoneData const* find_time_zone_data(StringView locale, StringView time_zone)
|
||||
{
|
||||
auto locale_value = locale_from_string(locale);
|
||||
if (!locale_value.has_value())
|
||||
return nullptr;
|
||||
|
||||
auto time_zone_value = time_zone_from_string(time_zone);
|
||||
if (!time_zone_value.has_value())
|
||||
return nullptr;
|
||||
|
||||
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
|
||||
auto time_zone_index = to_underlying(*time_zone_value);
|
||||
|
||||
auto const& time_zones = s_time_zones.at(locale_index);
|
||||
return &time_zones[time_zone_index];
|
||||
}
|
||||
|
||||
Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style)
|
||||
{
|
||||
if (auto const* data = find_time_zone_data(locale, time_zone); data != nullptr) {
|
||||
@string_index_type@ time_zone_index = 0;
|
||||
|
||||
switch (style) {
|
||||
case CalendarPatternStyle::Long:
|
||||
time_zone_index = data->long_name;
|
||||
break;
|
||||
case CalendarPatternStyle::Short:
|
||||
time_zone_index = data->short_name;
|
||||
break;
|
||||
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
if (time_zone_index != 0)
|
||||
return s_string_list[time_zone_index];
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
||||
)~~~");
|
||||
|
||||
|
|
|
@ -302,7 +302,7 @@ template<typename LocalesType, typename ListFormatter>
|
|||
void generate_mapping(SourceGenerator& generator, LocalesType const& locales, StringView type, StringView name, StringView format, ListFormatter&& format_list)
|
||||
{
|
||||
auto format_mapping_name = [](StringView format, StringView name) {
|
||||
auto mapping_name = name.to_lowercase_string().replace("-"sv, "_"sv, true);
|
||||
auto mapping_name = name.to_lowercase_string().replace("-"sv, "_"sv, true).replace("/"sv, "_"sv, true);
|
||||
return String::formatted(format, mapping_name);
|
||||
};
|
||||
|
||||
|
|
|
@ -176,4 +176,13 @@ Optional<StringView> get_calendar_day_period_symbol([[maybe_unused]] StringView
|
|||
#endif
|
||||
}
|
||||
|
||||
Optional<StringView> get_time_zone_name([[maybe_unused]] StringView locale, [[maybe_unused]] StringView time_zone, [[maybe_unused]] CalendarPatternStyle style)
|
||||
{
|
||||
#if ENABLE_UNICODE_DATA
|
||||
return Detail::get_time_zone_name(locale, time_zone, style);
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -142,5 +142,6 @@ Optional<StringView> get_calendar_era_symbol(StringView locale, StringView calen
|
|||
Optional<StringView> get_calendar_month_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::Month value);
|
||||
Optional<StringView> get_calendar_weekday_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::Weekday value);
|
||||
Optional<StringView> get_calendar_day_period_symbol(StringView locale, StringView calendar, CalendarPatternStyle style, Unicode::DayPeriod value);
|
||||
Optional<StringView> get_time_zone_name(StringView locale, StringView time_zone, CalendarPatternStyle style);
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue