LibUnicode: Generate unique lists of languages, territories, and scripts

This commit is contained in:
Timothy Flynn 2021-12-13 11:59:21 -05:00 committed by Brian Gianforcaro
parent 6e5f0b139b
commit a45f2ccc25
Notes: sideshowbarker 2024-07-17 22:48:25 +09:00

View file

@ -24,6 +24,15 @@
using StringIndexType = u16;
constexpr auto s_string_index_type = "u16"sv;
using LanguageListIndexType = u8;
constexpr auto s_language_list_index_type = "u8"sv;
using TerritoryListIndexType = u8;
constexpr auto s_territory_list_index_type = "u8"sv;
using ScriptListIndexType = u8;
constexpr auto s_script_list_index_type = "u8"sv;
struct ListPatterns {
String type;
String style;
@ -33,13 +42,17 @@ struct ListPatterns {
StringIndexType pair { 0 };
};
using LanguageList = Vector<StringIndexType>;
using TerritoryList = Vector<StringIndexType>;
using ScriptList = Vector<StringIndexType>;
struct Locale {
String language;
Optional<String> territory;
Optional<String> variant;
HashMap<String, StringIndexType> languages;
HashMap<String, StringIndexType> territories;
HashMap<String, StringIndexType> scripts;
LanguageListIndexType languages { 0 };
TerritoryListIndexType territories { 0 };
ScriptListIndexType scripts { 0 };
HashMap<String, StringIndexType> long_currencies;
HashMap<String, StringIndexType> short_currencies;
HashMap<String, StringIndexType> narrow_currencies;
@ -55,6 +68,9 @@ struct LanguageMapping {
struct UnicodeLocaleData {
UniqueStringStorage<StringIndexType> unique_strings;
UniqueStorage<LanguageList, LanguageListIndexType> unique_language_lists;
UniqueStorage<TerritoryList, TerritoryListIndexType> unique_territory_lists;
UniqueStorage<ScriptList, ScriptListIndexType> unique_script_lists;
HashMap<String, Locale> locales;
Vector<Alias> locale_aliases;
@ -164,6 +180,7 @@ static ErrorOr<void> parse_identity(String locale_path, UnicodeLocaleData& local
auto const& identity_object = locale_object.as_object().get("identity"sv);
auto const& language_string = identity_object.as_object().get("language"sv);
auto const& territory_string = identity_object.as_object().get("territory"sv);
auto const& script_string = identity_object.as_object().get("script"sv);
auto const& variant_string = identity_object.as_object().get("variant"sv);
locale.language = language_string.as_string();
@ -176,6 +193,12 @@ static ErrorOr<void> parse_identity(String locale_path, UnicodeLocaleData& local
locale_data.territories.append(*locale.territory);
}
if (script_string.is_string()) {
auto script = script_string.as_string();
if (!locale_data.scripts.contains_slow(script))
locale_data.scripts.append(script);
}
if (variant_string.is_string()) {
locale.variant = variant_string.as_string();
if (!locale_data.variants.contains_slow(*locale.variant))
@ -191,21 +214,22 @@ static ErrorOr<void> parse_locale_languages(String locale_path, UnicodeLocaleDat
languages_path = languages_path.append("languages.json"sv);
auto languages_file = TRY(Core::File::open(languages_path.string(), Core::OpenMode::ReadOnly));
auto languages = TRY(JsonValue::from_string(languages_file->read_all()));
auto locale_languages = TRY(JsonValue::from_string(languages_file->read_all()));
auto const& main_object = languages.as_object().get("main"sv);
auto const& main_object = locale_languages.as_object().get("main"sv);
auto const& locale_object = main_object.as_object().get(languages_path.parent().basename());
auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
auto const& languages_object = locale_display_names_object.as_object().get("languages"sv);
languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
if (!locale_data.languages.contains_slow(key))
return;
LanguageList languages;
languages.resize(locale_data.languages.size());
auto index = locale_data.unique_strings.ensure(value.as_string());
locale.languages.set(key, index);
languages_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
if (auto index = locale_data.languages.find_first_index(key); index.has_value())
languages[*index] = locale_data.unique_strings.ensure(value.as_string());
});
locale.languages = locale_data.unique_language_lists.ensure(move(languages));
return {};
}
@ -215,21 +239,22 @@ static ErrorOr<void> parse_locale_territories(String locale_path, UnicodeLocaleD
territories_path = territories_path.append("territories.json"sv);
auto territories_file = TRY(Core::File::open(territories_path.string(), Core::OpenMode::ReadOnly));
auto territories = TRY(JsonValue::from_string(territories_file->read_all()));
auto locale_territories = TRY(JsonValue::from_string(territories_file->read_all()));
auto const& main_object = territories.as_object().get("main"sv);
auto const& main_object = locale_territories.as_object().get("main"sv);
auto const& locale_object = main_object.as_object().get(territories_path.parent().basename());
auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
auto const& territories_object = locale_display_names_object.as_object().get("territories"sv);
territories_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
if (!locale_data.territories.contains_slow(key))
return;
TerritoryList territories;
territories.resize(locale_data.territories.size());
auto index = locale_data.unique_strings.ensure(value.as_string());
locale.territories.set(key, index);
territories_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
if (auto index = locale_data.territories.find_first_index(key); index.has_value())
territories[*index] = locale_data.unique_strings.ensure(value.as_string());
});
locale.territories = locale_data.unique_territory_lists.ensure(move(territories));
return {};
}
@ -239,21 +264,22 @@ static ErrorOr<void> parse_locale_scripts(String locale_path, UnicodeLocaleData&
scripts_path = scripts_path.append("scripts.json"sv);
auto scripts_file = TRY(Core::File::open(scripts_path.string(), Core::OpenMode::ReadOnly));
auto scripts = TRY(JsonValue::from_string(scripts_file->read_all()));
auto locale_scripts = TRY(JsonValue::from_string(scripts_file->read_all()));
auto const& main_object = scripts.as_object().get("main"sv);
auto const& main_object = locale_scripts.as_object().get("main"sv);
auto const& locale_object = main_object.as_object().get(scripts_path.parent().basename());
auto const& locale_display_names_object = locale_object.as_object().get("localeDisplayNames"sv);
auto const& scripts_object = locale_display_names_object.as_object().get("scripts"sv);
scripts_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
auto index = locale_data.unique_strings.ensure(value.as_string());
locale.scripts.set(key, index);
ScriptList scripts;
scripts.resize(locale_data.scripts.size());
if (!locale_data.scripts.contains_slow(key))
locale_data.scripts.append(key);
scripts_object.as_object().for_each_member([&](auto const& key, JsonValue const& value) {
if (auto index = locale_data.scripts.find_first_index(key); index.has_value())
scripts[*index] = locale_data.unique_strings.ensure(value.as_string());
});
locale.scripts = locale_data.unique_script_lists.ensure(move(scripts));
return {};
}
@ -544,6 +570,10 @@ static ErrorOr<void> parse_all_locales(String core_path, String locale_names_pat
TRY(parse_identity(locale_path, locale_data, locale));
}
quick_sort(locale_data.languages);
quick_sort(locale_data.territories);
quick_sort(locale_data.scripts);
while (locale_names_iterator.has_next()) {
auto locale_path = TRY(next_path_from_dir_iterator(locale_names_iterator));
auto language = TRY(remove_variants_from_path(locale_path));
@ -699,6 +729,9 @@ struct Patterns {
)~~~");
locale_data.unique_strings.generate(generator);
locale_data.unique_language_lists.generate(generator, s_string_index_type, "s_language_lists"sv);
locale_data.unique_territory_lists.generate(generator, s_string_index_type, "s_territory_lists"sv);
locale_data.unique_script_lists.generate(generator, s_string_index_type, "s_script_lists"sv);
auto append_index = [&](auto index) {
generator.append(String::formatted(", {}", index));
@ -720,6 +753,27 @@ struct Patterns {
generator.append(String::formatted(" }}, {}", list.size()));
};
auto append_mapping = [&](auto const& keys, auto const& map, auto type, auto name, auto mapping_getter) {
generator.set("type", type);
generator.set("name", name);
generator.set("size", String::number(keys.size()));
generator.append(R"~~~(
static constexpr Array<@type@, @size@> @name@ { {)~~~");
bool first = true;
for (auto const& key : keys) {
auto const& value = map.find(key)->value;
auto mapping = mapping_getter(value);
generator.append(first ? " " : ", ");
generator.append(String::number(mapping));
first = false;
}
generator.append(" } };");
};
auto append_string_index_list = [&](String name, auto const& keys, auto const& mappings) {
generator.set("name", name);
generator.set("size", String::number(keys.size()));
@ -776,9 +830,12 @@ static constexpr Array<Patterns, @size@> @name@ { {)~~~");
)~~~");
};
generate_mapping(generator, locale_data.locales, s_string_index_type, "s_languages"sv, "s_languages_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.languages, value.languages); });
generate_mapping(generator, locale_data.locales, s_string_index_type, "s_territories"sv, "s_territories_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.territories, value.territories); });
generate_mapping(generator, locale_data.locales, s_string_index_type, "s_scripts"sv, "s_scripts_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.scripts, value.scripts); });
auto locales = locale_data.locales.keys();
quick_sort(locales);
append_mapping(locales, locale_data.locales, s_language_list_index_type, "s_languages"sv, [&](auto const& locale) { return locale.languages; });
append_mapping(locales, locale_data.locales, s_territory_list_index_type, "s_territories"sv, [&](auto const& locale) { return locale.territories; });
append_mapping(locales, locale_data.locales, s_script_list_index_type, "s_scripts"sv, [&](auto const& locale) { return locale.scripts; });
generate_mapping(generator, locale_data.locales, s_string_index_type, "s_long_currencies"sv, "s_long_currencies_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.currencies, value.long_currencies); });
generate_mapping(generator, locale_data.locales, s_string_index_type, "s_short_currencies"sv, "s_short_currencies_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.currencies, value.short_currencies); });
generate_mapping(generator, locale_data.locales, s_string_index_type, "s_narrow_currencies"sv, "s_narrow_currencies_{}", [&](auto const& name, auto const& value) { append_string_index_list(name, locale_data.currencies, value.narrow_currencies); });
@ -966,7 +1023,7 @@ static LanguageMapping const* resolve_likely_subtag(Unicode::LanguageID const& l
)~~~");
auto append_mapping_search = [&](StringView enum_snake, StringView from_string_name, StringView collection_name) {
auto append_mapping_search = [&](StringView enum_snake, StringView from_string_name, StringView collection_name, StringView unique_list = {}) {
generator.set("enum_snake", enum_snake);
generator.set("from_string_name", from_string_name);
generator.set("collection_name", collection_name);
@ -983,8 +1040,21 @@ Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringVi
auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
auto @enum_snake@_index = to_underlying(*@enum_snake@_value);
)~~~");
if (unique_list.is_empty()) {
generator.append(R"~~~(
auto const& mappings = @collection_name@.at(locale_index);
)~~~");
} else {
generator.set("unique_list", unique_list);
generator.append(R"~~~(
auto mapping_index = @collection_name@.at(locale_index);
auto const& mappings = @unique_list@.at(mapping_index);
)~~~");
}
generator.append(R"~~~(
auto @enum_snake@_string_index = mappings.at(@enum_snake@_index);
auto @enum_snake@_mapping = s_string_list.at(@enum_snake@_string_index);
@ -1019,15 +1089,15 @@ Optional<StringView> get_locale_@enum_snake@_mapping(StringView locale, StringVi
append_from_string("Locale"sv, "locale"sv, locale_data.locales.keys(), locale_data.locale_aliases);
append_mapping_search("language"sv, "language"sv, "s_languages"sv);
append_mapping_search("language"sv, "language"sv, "s_languages"sv, "s_language_lists"sv);
append_from_string("Language"sv, "language"sv, locale_data.languages);
append_alias_search("language"sv, locale_data.language_aliases);
append_mapping_search("territory"sv, "territory"sv, "s_territories"sv);
append_mapping_search("territory"sv, "territory"sv, "s_territories"sv, "s_territory_lists"sv);
append_from_string("Territory"sv, "territory"sv, locale_data.territories);
append_alias_search("territory"sv, locale_data.territory_aliases);
append_mapping_search("script_tag"sv, "script_tag"sv, "s_scripts"sv);
append_mapping_search("script_tag"sv, "script_tag"sv, "s_scripts"sv, "s_script_lists"sv);
append_from_string("ScriptTag"sv, "script_tag"sv, locale_data.scripts);
append_alias_search("script_tag"sv, locale_data.script_aliases);