From ca1257c6f9a95312363d1fc2741e1a44bcc369c5 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 14 Aug 2024 16:18:06 -0400 Subject: [PATCH] LibJS+LibUnicode: Make the collation sensitivity default locale-aware Note this happens to be 'variant' for every locale currently. --- .../Libraries/LibJS/Runtime/Intl/Collator.h | 2 +- .../Runtime/Intl/CollatorConstructor.cpp | 21 +++++++----- .../Collator.prototype.resolvedOptions.js | 7 ++-- Userland/Libraries/LibUnicode/Collator.cpp | 34 +++++++++++++++++-- Userland/Libraries/LibUnicode/Collator.h | 3 +- 5 files changed, 53 insertions(+), 14 deletions(-) diff --git a/Userland/Libraries/LibJS/Runtime/Intl/Collator.h b/Userland/Libraries/LibJS/Runtime/Intl/Collator.h index f35c9e44da7..d4e508fa916 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/Collator.h +++ b/Userland/Libraries/LibJS/Runtime/Intl/Collator.h @@ -37,7 +37,7 @@ public: StringView usage_string() const { return Unicode::usage_to_string(m_usage); } Unicode::Sensitivity sensitivity() const { return m_sensitivity; } - void set_sensitivity(StringView sensitivity) { m_sensitivity = Unicode::sensitivity_from_string(sensitivity); } + void set_sensitivity(Unicode::Sensitivity sensitivity) { m_sensitivity = sensitivity; } StringView sensitivity_string() const { return Unicode::sensitivity_to_string(m_sensitivity); } Unicode::CaseFirst case_first() const { return m_case_first; } diff --git a/Userland/Libraries/LibJS/Runtime/Intl/CollatorConstructor.cpp b/Userland/Libraries/LibJS/Runtime/Intl/CollatorConstructor.cpp index f6b6856d90b..1244aacebcd 100644 --- a/Userland/Libraries/LibJS/Runtime/Intl/CollatorConstructor.cpp +++ b/Userland/Libraries/LibJS/Runtime/Intl/CollatorConstructor.cpp @@ -152,24 +152,26 @@ ThrowCompletionOr> CollatorConstructor::construct(FunctionO // 31. Let resolvedLocaleData be r.[[LocaleData]]. // 32. Let sensitivity be ? GetOption(options, "sensitivity", string, « "base", "accent", "case", "variant" », undefined). - auto sensitivity = TRY(get_option(vm, *options, vm.names.sensitivity, OptionType::String, { "base"sv, "accent"sv, "case"sv, "variant"sv }, Empty {})); + auto sensitivity_value = TRY(get_option(vm, *options, vm.names.sensitivity, OptionType::String, { "base"sv, "accent"sv, "case"sv, "variant"sv }, Empty {})); // 33. If sensitivity is undefined, then - if (sensitivity.is_undefined()) { + if (sensitivity_value.is_undefined()) { // a. If usage is "sort", then if (collator->usage() == Unicode::Usage::Sort) { // i. Set sensitivity to "variant". - sensitivity = PrimitiveString::create(vm, "variant"_string); + sensitivity_value = PrimitiveString::create(vm, "variant"_string); } // b. Else, else { - // FIXME: i. Set sensitivity to resolvedLocaleData.[[sensitivity]]. - sensitivity = PrimitiveString::create(vm, "base"_string); + // i. Set sensitivity to resolvedLocaleData.[[sensitivity]]. + // NOTE: We do not acquire the default [[sensitivity]] here. Instead, we default the option to null, + // and let LibUnicode fill in the default value if an override was not provided here. } } - // 34. Set collator.[[Sensitivity]] to sensitivity. - collator->set_sensitivity(sensitivity.as_string().utf8_string_view()); + Optional sensitivity; + if (!sensitivity_value.is_undefined()) + sensitivity = Unicode::sensitivity_from_string(sensitivity_value.as_string().utf8_string_view()); // 35. Let defaultIgnorePunctuation be resolvedLocaleData.[[ignorePunctuation]]. // NOTE: We do not acquire the default [[ignorePunctuation]] here. Instead, we default the option to null, @@ -187,12 +189,15 @@ ThrowCompletionOr> CollatorConstructor::construct(FunctionO collator->locale(), collator->usage(), collator->collation(), - collator->sensitivity(), + sensitivity, collator->case_first(), collator->numeric(), ignore_punctuation); collator->set_collator(move(icu_collator)); + // 34. Set collator.[[Sensitivity]] to sensitivity. + collator->set_sensitivity(collator->collator().sensitivity()); + // 37. Set collator.[[IgnorePunctuation]] to ignorePunctuation. collator->set_ignore_punctuation(collator->collator().ignore_punctuation()); diff --git a/Userland/Libraries/LibJS/Tests/builtins/Intl/Collator/Collator.prototype.resolvedOptions.js b/Userland/Libraries/LibJS/Tests/builtins/Intl/Collator/Collator.prototype.resolvedOptions.js index e34b2f91720..7f16fe19df9 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/Intl/Collator/Collator.prototype.resolvedOptions.js +++ b/Userland/Libraries/LibJS/Tests/builtins/Intl/Collator/Collator.prototype.resolvedOptions.js @@ -28,9 +28,12 @@ describe("correct behavior", () => { const en1 = new Intl.Collator("en"); expect(en1.resolvedOptions().sensitivity).toBe("variant"); + const en2 = new Intl.Collator("en", { usage: "search" }); + expect(en2.resolvedOptions().sensitivity).toBe("variant"); + ["base", "accent", "case", "variant"].forEach(sensitivity => { - const en2 = new Intl.Collator("en", { sensitivity: sensitivity }); - expect(en2.resolvedOptions().sensitivity).toBe(sensitivity); + const en3 = new Intl.Collator("en", { sensitivity: sensitivity }); + expect(en3.resolvedOptions().sensitivity).toBe(sensitivity); }); }); diff --git a/Userland/Libraries/LibUnicode/Collator.cpp b/Userland/Libraries/LibUnicode/Collator.cpp index a23d549e31c..00279c9a933 100644 --- a/Userland/Libraries/LibUnicode/Collator.cpp +++ b/Userland/Libraries/LibUnicode/Collator.cpp @@ -92,6 +92,28 @@ static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity) VERIFY_NOT_REACHED(); } +static Sensitivity sensitivity_for_collator(icu::Collator const& collator) +{ + UErrorCode status = U_ZERO_ERROR; + + auto attribute = collator.getAttribute(UCOL_STRENGTH, status); + VERIFY(icu_success(status)); + + switch (attribute) { + case UCOL_PRIMARY: + attribute = collator.getAttribute(UCOL_CASE_LEVEL, status); + VERIFY(icu_success(status)); + + return attribute == UCOL_ON ? Sensitivity::Case : Sensitivity::Base; + + case UCOL_SECONDARY: + return Sensitivity::Accent; + + default: + return Sensitivity::Variant; + } +} + CaseFirst case_first_from_string(StringView case_first) { if (case_first == "upper"sv) @@ -165,6 +187,11 @@ public: VERIFY_NOT_REACHED(); } + virtual Sensitivity sensitivity() const override + { + return sensitivity_for_collator(*m_collator); + } + virtual bool ignore_punctuation() const override { return ignore_punctuation_for_collator(*m_collator); @@ -178,7 +205,7 @@ NonnullOwnPtr Collator::create( StringView locale, Usage usage, StringView collation, - Sensitivity sensitivity, + Optional sensitivity, CaseFirst case_first, bool numeric, Optional ignore_punctuation) @@ -198,10 +225,13 @@ NonnullOwnPtr Collator::create( VERIFY(icu_success(status)); }; + if (!sensitivity.has_value()) + sensitivity = sensitivity_for_collator(*collator); + if (!ignore_punctuation.has_value()) ignore_punctuation = ignore_punctuation_for_collator(*collator); - set_attribute(UCOL_STRENGTH, icu_sensitivity(sensitivity)); + set_attribute(UCOL_STRENGTH, icu_sensitivity(*sensitivity)); set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF); set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first)); set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF); diff --git a/Userland/Libraries/LibUnicode/Collator.h b/Userland/Libraries/LibUnicode/Collator.h index 7e37c7afaac..fa043a7eed5 100644 --- a/Userland/Libraries/LibUnicode/Collator.h +++ b/Userland/Libraries/LibUnicode/Collator.h @@ -41,7 +41,7 @@ public: StringView locale, Usage, StringView collation, - Sensitivity, + Optional, CaseFirst, bool numeric, Optional ignore_punctuation); @@ -55,6 +55,7 @@ public: }; virtual Order compare(StringView, StringView) const = 0; + virtual Sensitivity sensitivity() const = 0; virtual bool ignore_punctuation() const = 0; protected: