LibJS+LibLocale: Replace preferred hour cycle lookups with ICU

This commit is contained in:
Timothy Flynn 2024-06-12 16:16:49 -04:00 committed by Andreas Kling
parent e2bffe5612
commit 9cb1857dc6
Notes: sideshowbarker 2024-07-16 20:12:13 +09:00
6 changed files with 33 additions and 124 deletions

View file

@ -21,22 +21,7 @@
#include <LibCore/Directory.h>
#include <LibLocale/DateTimeFormat.h>
using HourCycleList = Vector<Locale::HourCycle>;
template<>
struct AK::Formatter<Locale::HourCycle> : Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, Locale::HourCycle hour_cycle)
{
return builder.put_u64(to_underlying(hour_cycle));
}
};
struct CLDR {
UniqueStorage<HourCycleList> unique_hour_cycle_lists;
HashMap<ByteString, size_t> hour_cycles;
Vector<ByteString> hour_cycle_regions;
HashMap<ByteString, u8> minimum_days;
Vector<ByteString> minimum_days_regions;
@ -50,50 +35,6 @@ struct CLDR {
Vector<ByteString> weekend_end_regions;
};
static ErrorOr<void> parse_hour_cycles(ByteString core_path, CLDR& cldr)
{
// https://unicode.org/reports/tr35/tr35-dates.html#Time_Data
LexicalPath time_data_path(move(core_path));
time_data_path = time_data_path.append("supplemental"sv);
time_data_path = time_data_path.append("timeData.json"sv);
auto time_data = TRY(read_json_file(time_data_path.string()));
auto const& supplemental_object = time_data.as_object().get_object("supplemental"sv).value();
auto const& time_data_object = supplemental_object.get_object("timeData"sv).value();
auto parse_hour_cycle = [](StringView hour_cycle) -> Optional<Locale::HourCycle> {
if (hour_cycle.is_one_of("h"sv, "hb"sv, "hB"sv))
return Locale::HourCycle::H12;
if (hour_cycle.is_one_of("H"sv, "Hb"sv, "HB"sv))
return Locale::HourCycle::H23;
if (hour_cycle == "K"sv)
return Locale::HourCycle::H11;
if (hour_cycle == "k"sv)
return Locale::HourCycle::H24;
return {};
};
time_data_object.for_each_member([&](auto const& key, JsonValue const& value) {
auto allowed_hour_cycles_string = value.as_object().get_byte_string("_allowed"sv).value();
auto allowed_hour_cycles = allowed_hour_cycles_string.split_view(' ');
Vector<Locale::HourCycle> hour_cycles;
for (auto allowed_hour_cycle : allowed_hour_cycles) {
if (auto hour_cycle = parse_hour_cycle(allowed_hour_cycle); hour_cycle.has_value())
hour_cycles.append(*hour_cycle);
}
auto hour_cycles_index = cldr.unique_hour_cycle_lists.ensure(move(hour_cycles));
cldr.hour_cycles.set(key, hour_cycles_index);
if (!cldr.hour_cycle_regions.contains_slow(key))
cldr.hour_cycle_regions.append(key);
});
return {};
}
static ErrorOr<void> parse_week_data(ByteString core_path, CLDR& cldr)
{
// https://unicode.org/reports/tr35/tr35-dates.html#Week_Data
@ -161,7 +102,6 @@ static ErrorOr<void> parse_week_data(ByteString core_path, CLDR& cldr)
static ErrorOr<void> parse_all_locales(ByteString core_path, CLDR& cldr)
{
TRY(parse_hour_cycles(core_path, cldr));
TRY(parse_week_data(core_path, cldr));
return {};
}
@ -191,7 +131,6 @@ static ErrorOr<void> generate_unicode_locale_header(Core::InputBufferedFile& fil
namespace Locale {
)~~~");
generate_enum(generator, format_identifier, "HourCycleRegion"sv, {}, cldr.hour_cycle_regions);
generate_enum(generator, format_identifier, "MinimumDaysRegion"sv, {}, cldr.minimum_days_regions);
generate_enum(generator, format_identifier, "FirstDayRegion"sv, {}, cldr.first_day_regions);
generate_enum(generator, format_identifier, "WeekendStartRegion"sv, {}, cldr.weekend_start_regions);
@ -225,8 +164,6 @@ static ErrorOr<void> generate_unicode_locale_implementation(Core::InputBufferedF
namespace Locale {
)~~~");
cldr.unique_hour_cycle_lists.generate(generator, cldr.unique_hour_cycle_lists.type_that_fits(), "s_hour_cycle_lists"sv);
auto append_mapping = [&](auto const& keys, auto const& map, auto type, auto name, auto mapping_getter) {
generator.set("type", type);
generator.set("name", name);
@ -248,7 +185,6 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
generator.append(" } };");
};
append_mapping(cldr.hour_cycle_regions, cldr.hour_cycles, cldr.unique_hour_cycle_lists.type_that_fits(), "s_hour_cycles"sv, [](auto const& hour_cycles) { return hour_cycles; });
append_mapping(cldr.minimum_days_regions, cldr.minimum_days, "u8"sv, "s_minimum_days"sv, [](auto minimum_days) { return minimum_days; });
append_mapping(cldr.first_day_regions, cldr.first_day, "u8"sv, "s_first_day"sv, [](auto first_day) { return to_underlying(first_day); });
append_mapping(cldr.weekend_start_regions, cldr.weekend_start, "u8"sv, "s_weekend_start"sv, [](auto weekend_start) { return to_underlying(weekend_start); });
@ -269,34 +205,11 @@ static constexpr Array<@type@, @size@> @name@ { {)~~~");
return {};
};
TRY(append_from_string("HourCycleRegion"sv, "hour_cycle_region"sv, cldr.hour_cycle_regions));
TRY(append_from_string("MinimumDaysRegion"sv, "minimum_days_region"sv, cldr.minimum_days_regions));
TRY(append_from_string("FirstDayRegion"sv, "first_day_region"sv, cldr.first_day_regions));
TRY(append_from_string("WeekendStartRegion"sv, "weekend_start_region"sv, cldr.weekend_start_regions));
TRY(append_from_string("WeekendEndRegion"sv, "weekend_end_region"sv, cldr.weekend_end_regions));
generator.append(R"~~~(
Vector<HourCycle> get_regional_hour_cycles(StringView region)
{
auto region_value = hour_cycle_region_from_string(region);
if (!region_value.has_value())
return {};
auto region_index = to_underlying(*region_value);
auto regional_hour_cycles_index = s_hour_cycles.at(region_index);
auto const& regional_hour_cycles = s_hour_cycle_lists.at(regional_hour_cycles_index);
Vector<HourCycle> hour_cycles;
hour_cycles.ensure_capacity(regional_hour_cycles.size());
for (auto hour_cycle : regional_hour_cycles)
hour_cycles.unchecked_append(static_cast<HourCycle>(hour_cycle));
return hour_cycles;
}
)~~~");
auto append_regional_lookup = [&](StringView return_type, StringView lookup_type) {
generator.set("return_type", return_type);
generator.set("lookup_type", lookup_type);

View file

@ -578,11 +578,9 @@ Optional<StringView> get_preferred_keyword_value_for_locale(StringView locale, S
// FIXME: Calendar keywords are also region-based, and will need to be handled here when we support non-Gregorian calendars:
// https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/calendarPreferenceData.json
if (key == "hc"sv) {
auto hour_cycles = get_locale_hour_cycles(locale);
if (hour_cycles.is_empty())
return OptionalNone {};
return Optional<StringView> { hour_cycle_to_string(hour_cycles[0]) };
if (auto hour_cycle = default_hour_cycle(locale); hour_cycle.has_value())
return hour_cycle_to_string(*hour_cycle);
return {};
}
// FIXME: Generate locale-preferred collation data when available in the CLDR.
@ -607,15 +605,9 @@ Vector<StringView> get_keywords_for_locale(StringView locale, StringView key)
// FIXME: Calendar keywords are also region-based, and will need to be handled here when we support non-Gregorian calendars:
// https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/supplemental/calendarPreferenceData.json
if (key == "hc"sv) {
auto hour_cycles = get_locale_hour_cycles(locale);
Vector<StringView> values;
values.ensure_capacity(hour_cycles.size());
for (auto hour_cycle : hour_cycles)
values.unchecked_append(hour_cycle_to_string(hour_cycle));
return values;
if (auto hour_cycle = default_hour_cycle(locale); hour_cycle.has_value())
return { hour_cycle_to_string(*hour_cycle) };
return {};
}
// FIXME: Generate locale-preferred collation data when available in the CLDR.

View file

@ -189,7 +189,7 @@ ThrowCompletionOr<NonnullGCPtr<DateTimeFormat>> create_date_time_format(VM& vm,
// c. If hc is null, set hc to dataLocaleData.[[hourCycle]].
if (!hour_cycle_value.has_value())
hour_cycle_value = ::Locale::get_default_regional_hour_cycle(data_locale);
hour_cycle_value = ::Locale::default_hour_cycle(data_locale);
}
// 28. Set dateTimeFormat.[[HourCycle]] to hc.

View file

@ -98,6 +98,31 @@ StringView hour_cycle_to_string(HourCycle hour_cycle)
VERIFY_NOT_REACHED();
}
Optional<HourCycle> default_hour_cycle(StringView locale)
{
UErrorCode status = U_ZERO_ERROR;
auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value())
return {};
auto hour_cycle = locale_data->date_time_pattern_generator().getDefaultHourCycle(status);
if (icu_failure(status))
return {};
switch (hour_cycle) {
case UDAT_HOUR_CYCLE_11:
return HourCycle::H11;
case UDAT_HOUR_CYCLE_12:
return HourCycle::H12;
case UDAT_HOUR_CYCLE_23:
return HourCycle::H23;
case UDAT_HOUR_CYCLE_24:
return HourCycle::H24;
}
VERIFY_NOT_REACHED();
}
static constexpr char icu_hour_cycle(Optional<HourCycle> const& hour_cycle, Optional<bool> const& hour12)
{
if (hour12.has_value())
@ -472,9 +497,6 @@ CalendarPattern CalendarPattern::create_from_pattern(StringView pattern)
return format;
}
Optional<HourCycleRegion> __attribute__((weak)) hour_cycle_region_from_string(StringView) { return {}; }
Vector<HourCycle> __attribute__((weak)) get_regional_hour_cycles(StringView) { return {}; }
template<typename T, typename GetRegionalValues>
static T find_regional_values_for_locale(StringView locale, GetRegionalValues&& get_regional_values)
{
@ -508,19 +530,6 @@ static T find_regional_values_for_locale(StringView locale, GetRegionalValues&&
return return_default_values();
}
// https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table
Vector<HourCycle> get_locale_hour_cycles(StringView locale)
{
return find_regional_values_for_locale<Vector<HourCycle>>(locale, get_regional_hour_cycles);
}
Optional<HourCycle> get_default_regional_hour_cycle(StringView locale)
{
if (auto hour_cycles = get_locale_hour_cycles(locale); !hour_cycles.is_empty())
return hour_cycles.first();
return {};
}
Optional<MinimumDaysRegion> __attribute__((weak)) minimum_days_region_from_string(StringView) { return {}; }
Optional<u8> __attribute__((weak)) get_regional_minimum_days(StringView) { return {}; }

View file

@ -43,6 +43,7 @@ enum class HourCycle : u8 {
};
HourCycle hour_cycle_from_string(StringView hour_cycle);
StringView hour_cycle_to_string(HourCycle hour_cycle);
Optional<HourCycle> default_hour_cycle(StringView locale);
enum class CalendarPatternStyle : u8 {
Narrow,
@ -96,11 +97,6 @@ struct CalendarPattern {
Optional<CalendarPatternStyle> time_zone_name;
};
Optional<HourCycleRegion> hour_cycle_region_from_string(StringView hour_cycle_region);
Vector<HourCycle> get_regional_hour_cycles(StringView region);
Vector<HourCycle> get_locale_hour_cycles(StringView locale);
Optional<HourCycle> get_default_regional_hour_cycle(StringView locale);
Optional<MinimumDaysRegion> minimum_days_region_from_string(StringView minimum_days_region);
Optional<u8> get_regional_minimum_days(StringView region);
Optional<u8> get_locale_minimum_days(StringView locale);

View file

@ -13,7 +13,6 @@ namespace Locale {
enum class CalendarPatternStyle : u8;
enum class FirstDayRegion : u8;
enum class HourCycle : u8;
enum class HourCycleRegion : u16;
enum class Key : u8;
enum class KeywordCalendar : u8;
enum class KeywordCollation : u8;