From 208a5e6763dcdfe7e25fcac8d3fbb5b2e3793f70 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 4 Jun 2025 07:43:09 -0400 Subject: [PATCH] LibJS: Update the Intl.Locale constructor to the latest editorial spec This has been refactored a bit recently. There are upcoming normative changes that do not apply cleanly without this update. --- .../LibJS/Runtime/Intl/LocaleConstructor.cpp | 222 ++++++++---------- 1 file changed, 103 insertions(+), 119 deletions(-) diff --git a/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp b/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp index 05c6cdd052f..31a84770322 100644 --- a/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp +++ b/Libraries/LibJS/Runtime/Intl/LocaleConstructor.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -29,10 +28,12 @@ struct LocaleAndKeys { Optional nu; }; -// Note: This is not an AO in the spec. This just serves to abstract very similar steps in ApplyOptionsToTag and the Intl.Locale constructor. -static ThrowCompletionOr> get_string_option(VM& vm, Object const& options, PropertyKey const& property, Function validator, ReadonlySpan values = {}) +// NOTE: This is not an AO in the spec. This just serves to abstract very similar steps in UpdateLanguageId and the Intl.Locale constructor. +static ThrowCompletionOr> get_string_option(VM& vm, Object const& options, PropertyKey const& property, Function validator, ReadonlySpan values = {}, Optional const& fallback = {}) { - auto option = TRY(get_option(vm, options, property, OptionType::String, values, Empty {})); + auto option_default = fallback.has_value() ? OptionDefault { *fallback } : Empty {}; + + auto option = TRY(get_option(vm, options, property, OptionType::String, values, option_default)); if (option.is_undefined()) return OptionalNone {}; @@ -42,74 +43,53 @@ static ThrowCompletionOr> get_string_option(VM& vm, Object cons return option.as_string().utf8_string(); } -// 15.1.2 ApplyOptionsToTag ( tag, options ), https://tc39.es/ecma402/#sec-apply-options-to-tag -static ThrowCompletionOr apply_options_to_tag(VM& vm, StringView tag, Object const& options) +// 15.1.2 UpdateLanguageId ( tag, options ), https://tc39.es/ecma402/#sec-updatelanguageid +static ThrowCompletionOr update_language_id(VM& vm, StringView tag, Object const& options) { - // 1. Assert: Type(tag) is String. - // 2. Assert: Type(options) is Object. - - // 3. If ! IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. - if (!is_structurally_valid_language_tag(tag)) - return vm.throw_completion(ErrorType::IntlInvalidLanguageTag, tag); - - // 4. Let language be ? GetOption(options, "language", string, empty, undefined). - // 5. If language is not undefined, then - // a. If language does not match the unicode_language_subtag production, throw a RangeError exception. - auto language = TRY(get_string_option(vm, options, vm.names.language, Unicode::is_unicode_language_subtag)); - - // 6. Let script be ? GetOption(options, "script", string, empty, undefined). - // 7. If script is not undefined, then - // a. If script does not match the unicode_script_subtag production, throw a RangeError exception. - auto script = TRY(get_string_option(vm, options, vm.names.script, Unicode::is_unicode_script_subtag)); - - // 8. Let region be ? GetOption(options, "region", string, empty, undefined). - // 9. If region is not undefined, then - // a. If region does not match the unicode_region_subtag production, throw a RangeError exception. - auto region = TRY(get_string_option(vm, options, vm.names.region, Unicode::is_unicode_region_subtag)); - - // 10. Set tag to ! CanonicalizeUnicodeLocaleId(tag). - auto canonicalized_tag = JS::Intl::canonicalize_unicode_locale_id(tag); - - // 11. Assert: tag matches the unicode_locale_id production. - auto locale_id = Unicode::parse_unicode_locale_id(canonicalized_tag); + auto locale_id = Unicode::parse_unicode_locale_id(tag); VERIFY(locale_id.has_value()); - // 12. Let languageId be the substring of tag corresponding to the unicode_language_id production. - auto& language_id = locale_id->language_id; + // 1. Let baseName be GetLocaleBaseName(tag). + auto const& base_name = locale_id->language_id; - // 13. If language is not undefined, then - if (language.has_value()) { - // a. Set languageId to languageId with the substring corresponding to the unicode_language_subtag production replaced by the string language. - language_id.language = language.release_value(); - } + // 2. Let language be ? GetOption(options, "language", STRING, EMPTY, GetLocaleLanguage(baseName)). + // 3. If language cannot be matched by the unicode_language_subtag Unicode locale nonterminal, throw a RangeError exception. + auto language = TRY(get_string_option(vm, options, vm.names.language, Unicode::is_unicode_language_subtag, {}, *base_name.language)); - // 14. If script is not undefined, then - if (script.has_value()) { - // a. If languageId does not contain a unicode_script_subtag production, then - // i. Set languageId to the string-concatenation of the unicode_language_subtag production of languageId, "-", script, and the rest of languageId. - // b. Else, - // i. Set languageId to languageId with the substring corresponding to the unicode_script_subtag production replaced by the string script. - language_id.script = script.release_value(); - } + // 4. Let script be ? GetOption(options, "script", STRING, EMPTY, GetLocaleScript(baseName)). + // 5. If script is not undefined, then + // a. If script cannot be matched by the unicode_script_subtag Unicode locale nonterminal, throw a RangeError exception. + auto script = TRY(get_string_option(vm, options, vm.names.script, Unicode::is_unicode_script_subtag, {}, base_name.script)); - // 15. If region is not undefined, then - if (region.has_value()) { - // a. If languageId does not contain a unicode_region_subtag production, then - // i. Set languageId to the string-concatenation of the unicode_language_subtag production of languageId, the substring corresponding to "-"` and the `unicode_script_subtag` production if present, `"-", region, and the rest of languageId. - // b. Else, - // i. Set languageId to languageId with the substring corresponding to the unicode_region_subtag production replaced by the string region. - language_id.region = region.release_value(); - } + // 6. Let region be ? GetOption(options, "region", STRING, EMPTY, GetLocaleRegion(baseName)). + // 7. If region is not undefined, then + // a. If region cannot be matched by the unicode_region_subtag Unicode locale nonterminal, throw a RangeError exception. + auto region = TRY(get_string_option(vm, options, vm.names.region, Unicode::is_unicode_region_subtag, {}, base_name.region)); - // 16. Set tag to tag with the substring corresponding to the unicode_language_id production replaced by the string languageId. - canonicalized_tag = locale_id->to_string(); + // 8. Let allExtensions be the suffix of tag following baseName. + auto& extensions = locale_id->extensions; + auto& private_use_extensions = locale_id->private_use_extensions; - // 17. Return ! CanonicalizeUnicodeLocaleId(tag). - return JS::Intl::canonicalize_unicode_locale_id(canonicalized_tag); + // 9. Let newTag be language. + Unicode::LocaleID new_tag; + new_tag.language_id.language = move(language); + + // 10. If script is not undefined, set newTag to the string-concatenation of newTag, "-", and script. + new_tag.language_id.script = move(script); + + // 11. If region is not undefined, set newTag to the string-concatenation of newTag, "-", and region. + new_tag.language_id.region = move(region); + + // 12. Set newTag to the string-concatenation of newTag and allExtensions. + new_tag.extensions = move(extensions); + new_tag.private_use_extensions = move(private_use_extensions); + + // 13. Return newTag. + return new_tag.to_string(); } -// 15.1.3 ApplyUnicodeExtensionToTag ( tag, options, relevantExtensionKeys ), https://tc39.es/ecma402/#sec-apply-unicode-extension-to-tag -static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKeys options, ReadonlySpan relevant_extension_keys) +// 15.1.3 MakeLocaleRecord ( tag, options, localeExtensionKeys ), https://tc39.es/ecma402/#sec-makelocalerecord +static LocaleAndKeys make_locale_record(StringView tag, LocaleAndKeys options, ReadonlySpan locale_extension_keys) { auto locale_id = Unicode::parse_unicode_locale_id(tag); VERIFY(locale_id.has_value()); @@ -123,7 +103,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey continue; // a. Let extension be the String value consisting of the substring of the Unicode locale extension sequence within tag. - // b. Let components be ! UnicodeExtensionComponents(extension). + // b. Let components be UnicodeExtensionComponents(extension). auto& components = extension.get(); // c. Let attributes be components.[[Attributes]]. attributes = move(components.attributes); @@ -157,8 +137,8 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey // 3. Let result be a new Record. LocaleAndKeys result {}; - // 4. For each element key of relevantExtensionKeys, do - for (auto const& key : relevant_extension_keys) { + // 4. For each element key of localeExtensionKeys, do + for (auto const& key : locale_extension_keys) { Unicode::Keyword* entry = nullptr; Optional value; @@ -176,7 +156,7 @@ static LocaleAndKeys apply_unicode_extension_to_tag(StringView tag, LocaleAndKey // c. Assert: options has a field [[]]. // d. Let overrideValue be options.[[]]. - auto override_value = field_from_key(options, key); + auto const& override_value = field_from_key(options, key); // e. If overrideValue is not undefined, then if (override_value.has_value()) { @@ -259,117 +239,121 @@ ThrowCompletionOr> LocaleConstructor::construct(FunctionObject& // a. Append [[CaseFirst]] to internalSlotsList. // 5. If localeExtensionKeys contains "kn", then // a. Append [[Numeric]] to internalSlotsList. - // 6. Let locale be ? OrdinaryCreateFromConstructor(NewTarget, "%Intl.Locale.prototype%", internalSlotsList). auto locale = TRY(ordinary_create_from_constructor(vm, new_target, &Intrinsics::intl_locale_prototype)); - String tag; - - // 7. If Type(tag) is not String or Object, throw a TypeError exception. + // 7. If tag is not a String and tag is not an Object, throw a TypeError exception. if (!tag_value.is_string() && !tag_value.is_object()) return vm.throw_completion(ErrorType::NotAnObjectOrString, "tag"sv); - // 8. If Type(tag) is Object and tag has an [[InitializedLocale]] internal slot, then - if (tag_value.is_object() && is(tag_value.as_object())) { - // a. Let tag be tag.[[Locale]]. - auto const& tag_object = static_cast(tag_value.as_object()); - tag = tag_object.locale(); - } - // 9. Else, - else { - // a. Let tag be ? ToString(tag). - tag = TRY(tag_value.to_string(vm)); - } + auto tag = TRY([&]() -> ThrowCompletionOr { + // 8. If tag is an Object and tag has an [[InitializedLocale]] internal slot, then + // a. Let tag be tag.[[Locale]]. + if (tag_value.is_object()) { + if (auto* locale_tag = as_if(tag_value.as_object())) + return locale_tag->locale(); + } + // 9. Else, + // a. Let tag be ? ToString(tag). + return tag_value.to_string(vm); + }()); // 10. Set options to ? CoerceOptionsToObject(options). auto options = TRY(coerce_options_to_object(vm, options_value)); - // 11. Set tag to ? ApplyOptionsToTag(tag, options). - tag = TRY(apply_options_to_tag(vm, tag, options)); + // 11. If IsStructurallyValidLanguageTag(tag) is false, throw a RangeError exception. + if (!is_structurally_valid_language_tag(tag)) + return vm.throw_completion(ErrorType::IntlInvalidLanguageTag, tag); - // 12. Let opt be a new Record. + // 12. Set tag to CanonicalizeUnicodeLocaleId(tag). + tag = canonicalize_unicode_locale_id(tag); + + // 13. Set tag to ? UpdateLanguageId(tag, options). + tag = TRY(update_language_id(vm, tag, options)); + + // 14. Let opt be a new Record. LocaleAndKeys opt {}; - // 13. Let calendar be ? GetOption(options, "calendar", string, empty, undefined). - // 14. If calendar is not undefined, then - // a. If calendar does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception. - // 15. Set opt.[[ca]] to calendar. + // 15. Let calendar be ? GetOption(options, "calendar", STRING, EMPTY, undefined). + // 16. If calendar is not undefined, then + // a. If calendar cannot be matched by the type Unicode locale nonterminal, throw a RangeError exception. + // 17. Set opt.[[ca]] to calendar. opt.ca = TRY(get_string_option(vm, options, vm.names.calendar, Unicode::is_type_identifier)); - // 16. Let collation be ? GetOption(options, "collation", string, empty, undefined). - // 17. If collation is not undefined, then - // a. If collation does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception. - // 18. Set opt.[[co]] to collation. + // 18. Let collation be ? GetOption(options, "collation", STRING, EMPTY, undefined). + // 19. If collation is not undefined, then + // a. If collation cannot be matched by the type Unicode locale nonterminal, throw a RangeError exception. + // 20. Set opt.[[co]] to collation. opt.co = TRY(get_string_option(vm, options, vm.names.collation, Unicode::is_type_identifier)); - // 19. Let fw be ? Let fw be ? GetOption(options, "firstDayOfWeek", "string", undefined, undefined). + // 21. Let fw be ? GetOption(options, "firstDayOfWeek", STRING, EMPTY, undefined). auto first_day_of_week = TRY(get_string_option(vm, options, vm.names.firstDayOfWeek, nullptr)); - // 20. If fw is not undefined, then + // 22. If fw is not undefined, then if (first_day_of_week.has_value()) { - // a. Set fw to !WeekdayToString(fw). + // a. Set fw to WeekdayToString(fw). first_day_of_week = MUST(String::from_utf8(weekday_to_string(*first_day_of_week))); - // b. If fw does not match the type sequence (from UTS 35 Unicode Locale Identifier, section 3.2), throw a RangeError exception. + // b. If fw cannot be matched by the type Unicode locale nonterminal, throw a RangeError exception. if (!Unicode::is_type_identifier(*first_day_of_week)) return vm.throw_completion(ErrorType::OptionIsNotValidValue, *first_day_of_week, vm.names.firstDayOfWeek); } - // 21. Set opt.[[fw]] to firstDay. + // 23. Set opt.[[fw]] to firstDay. opt.fw = move(first_day_of_week); - // 22. Let hc be ? GetOption(options, "hourCycle", string, « "h11", "h12", "h23", "h24" », undefined). - // 23. Set opt.[[hc]] to hc. + // 24. Let hc be ? GetOption(options, "hourCycle", STRING, « "h11", "h12", "h23", "h24" », undefined). + // 25. Set opt.[[hc]] to hc. opt.hc = TRY(get_string_option(vm, options, vm.names.hourCycle, nullptr, AK::Array { "h11"sv, "h12"sv, "h23"sv, "h24"sv })); - // 24. Let kf be ? GetOption(options, "caseFirst", string, « "upper", "lower", "false" », undefined). - // 25. Set opt.[[kf]] to kf. + // 26. Let kf be ? GetOption(options, "caseFirst", STRING, « "upper", "lower", "false" », undefined). + // 27. Set opt.[[kf]] to kf. opt.kf = TRY(get_string_option(vm, options, vm.names.caseFirst, nullptr, AK::Array { "upper"sv, "lower"sv, "false"sv })); - // 26. Let kn be ? GetOption(options, "numeric", boolean, empty, undefined). + // 28. Let kn be ? GetOption(options, "numeric", BOOLEAN, EMPTY, undefined). auto kn = TRY(get_option(vm, options, vm.names.numeric, OptionType::Boolean, {}, Empty {})); - // 27. If kn is not undefined, set kn to ! ToString(kn). - // 28. Set opt.[[kn]] to kn. + // 29. If kn is not undefined, set kn to ! ToString(kn). + // 30. Set opt.[[kn]] to kn. if (!kn.is_undefined()) opt.kn = TRY(kn.to_string(vm)); - // 29. Let numberingSystem be ? GetOption(options, "numberingSystem", string, empty, undefined). - // 30. If numberingSystem is not undefined, then - // a. If numberingSystem does not match the Unicode Locale Identifier type nonterminal, throw a RangeError exception. - // 31. Set opt.[[nu]] to numberingSystem. + // 31. Let numberingSystem be ? GetOption(options, "numberingSystem", STRING, EMPTY, undefined). + // 32. If numberingSystem is not undefined, then + // a. If numberingSystem cannot be matched by the type Unicode locale nonterminal, throw a RangeError exception. + // 33. Set opt.[[nu]] to numberingSystem. opt.nu = TRY(get_string_option(vm, options, vm.names.numberingSystem, Unicode::is_type_identifier)); - // 32. Let r be ! ApplyUnicodeExtensionToTag(tag, opt, localeExtensionKeys). - auto result = apply_unicode_extension_to_tag(tag, move(opt), locale_extension_keys); + // 34. Let r be MakeLocaleRecord(tag, opt, localeExtensionKeys). + auto result = make_locale_record(tag, move(opt), locale_extension_keys); - // 33. Set locale.[[Locale]] to r.[[locale]]. + // 35. Set locale.[[Locale]] to r.[[locale]]. locale->set_locale(move(result.locale)); - // 34. Set locale.[[Calendar]] to r.[[ca]]. + // 36. Set locale.[[Calendar]] to r.[[ca]]. if (result.ca.has_value()) locale->set_calendar(result.ca.release_value()); - // 35. Set locale.[[Collation]] to r.[[co]]. + // 37. Set locale.[[Collation]] to r.[[co]]. if (result.co.has_value()) locale->set_collation(result.co.release_value()); - // 36. Set locale.[[FirstDayOfWeek]] to r.[[fw]]. + // 38. Set locale.[[FirstDayOfWeek]] to r.[[fw]]. if (result.fw.has_value()) locale->set_first_day_of_week(result.fw.release_value()); - // 37. Set locale.[[HourCycle]] to r.[[hc]]. + // 39. Set locale.[[HourCycle]] to r.[[hc]]. if (result.hc.has_value()) locale->set_hour_cycle(result.hc.release_value()); - // 38. If localeExtensionKeys contains "kf", then + // 40. If localeExtensionKeys contains "kf", then if (locale_extension_keys.span().contains_slow("kf"sv)) { // a. Set locale.[[CaseFirst]] to r.[[kf]]. if (result.kf.has_value()) locale->set_case_first(result.kf.release_value()); } - // 39. If localeExtensionKeys contains "kn", then + // 41. If localeExtensionKeys contains "kn", then if (locale_extension_keys.span().contains_slow("kn"sv)) { // a. If SameValue(r.[[kn]], "true") is true or r.[[kn]] is the empty String, then if (result.kn.has_value() && (result.kn == "true"sv || result.kn->is_empty())) { @@ -383,11 +367,11 @@ ThrowCompletionOr> LocaleConstructor::construct(FunctionObject& } } - // 40. Set locale.[[NumberingSystem]] to r.[[nu]]. + // 42. Set locale.[[NumberingSystem]] to r.[[nu]]. if (result.nu.has_value()) locale->set_numbering_system(result.nu.release_value()); - // 41. Return locale. + // 43. Return locale. return locale; }