mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-29 20:29:18 +00:00
LibJS+LibUnicode: Fully implement Intl.Collator with ICU
We were never able to implement anything other than a basic, locale- unaware collator with the JSON export of the CLDR as it did not have collation data. We can now use ICU to implement collation.
This commit is contained in:
parent
dc0d5da086
commit
eb7e3583c9
Notes:
github-actions[bot]
2024-08-15 11:45:44 +00:00
Author: https://github.com/trflynn89
Commit: eb7e3583c9
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1078
Reviewed-by: https://github.com/awesomekling
11 changed files with 384 additions and 142 deletions
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2022-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -16,83 +16,6 @@ Collator::Collator(Object& prototype)
|
|||
{
|
||||
}
|
||||
|
||||
void Collator::set_usage(StringView type)
|
||||
{
|
||||
if (type == "sort"sv)
|
||||
m_usage = Usage::Sort;
|
||||
else if (type == "search"sv)
|
||||
m_usage = Usage::Search;
|
||||
else
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView Collator::usage_string() const
|
||||
{
|
||||
switch (m_usage) {
|
||||
case Usage::Sort:
|
||||
return "sort"sv;
|
||||
case Usage::Search:
|
||||
return "search"sv;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
void Collator::set_sensitivity(StringView type)
|
||||
{
|
||||
if (type == "base"sv)
|
||||
m_sensitivity = Sensitivity::Base;
|
||||
else if (type == "accent"sv)
|
||||
m_sensitivity = Sensitivity::Accent;
|
||||
else if (type == "case"sv)
|
||||
m_sensitivity = Sensitivity::Case;
|
||||
else if (type == "variant"sv)
|
||||
m_sensitivity = Sensitivity::Variant;
|
||||
else
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView Collator::sensitivity_string() const
|
||||
{
|
||||
switch (m_sensitivity) {
|
||||
case Sensitivity::Base:
|
||||
return "base"sv;
|
||||
case Sensitivity::Accent:
|
||||
return "accent"sv;
|
||||
case Sensitivity::Case:
|
||||
return "case"sv;
|
||||
case Sensitivity::Variant:
|
||||
return "variant"sv;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
|
||||
void Collator::set_case_first(StringView case_first)
|
||||
{
|
||||
if (case_first == "upper"sv)
|
||||
m_case_first = CaseFirst::Upper;
|
||||
else if (case_first == "lower"sv)
|
||||
m_case_first = CaseFirst::Lower;
|
||||
else if (case_first == "false"sv)
|
||||
m_case_first = CaseFirst::False;
|
||||
else
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView Collator::case_first_string() const
|
||||
{
|
||||
switch (m_case_first) {
|
||||
case CaseFirst::Upper:
|
||||
return "upper"sv;
|
||||
case CaseFirst::Lower:
|
||||
return "lower"sv;
|
||||
case CaseFirst::False:
|
||||
return "false"sv;
|
||||
default:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
}
|
||||
void Collator::visit_edges(Visitor& visitor)
|
||||
{
|
||||
Base::visit_edges(visitor);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2022-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -11,6 +11,7 @@
|
|||
#include <AK/StringView.h>
|
||||
#include <LibJS/Runtime/Intl/CollatorCompareFunction.h>
|
||||
#include <LibJS/Runtime/Object.h>
|
||||
#include <LibUnicode/Collator.h>
|
||||
|
||||
namespace JS::Intl {
|
||||
|
||||
|
@ -19,24 +20,6 @@ class Collator final : public Object {
|
|||
JS_DECLARE_ALLOCATOR(Collator);
|
||||
|
||||
public:
|
||||
enum class Usage {
|
||||
Sort,
|
||||
Search,
|
||||
};
|
||||
|
||||
enum class Sensitivity {
|
||||
Base,
|
||||
Accent,
|
||||
Case,
|
||||
Variant,
|
||||
};
|
||||
|
||||
enum class CaseFirst {
|
||||
Upper,
|
||||
Lower,
|
||||
False,
|
||||
};
|
||||
|
||||
static constexpr auto relevant_extension_keys()
|
||||
{
|
||||
// 10.2.3 Internal slots, https://tc39.es/ecma402/#sec-intl-collator-internal-slots
|
||||
|
@ -49,17 +32,17 @@ public:
|
|||
String const& locale() const { return m_locale; }
|
||||
void set_locale(String locale) { m_locale = move(locale); }
|
||||
|
||||
Usage usage() const { return m_usage; }
|
||||
void set_usage(StringView usage);
|
||||
StringView usage_string() const;
|
||||
Unicode::Usage usage() const { return m_usage; }
|
||||
void set_usage(StringView usage) { m_usage = Unicode::usage_from_string(usage); }
|
||||
StringView usage_string() const { return Unicode::usage_to_string(m_usage); }
|
||||
|
||||
Sensitivity sensitivity() const { return m_sensitivity; }
|
||||
void set_sensitivity(StringView sensitivity);
|
||||
StringView sensitivity_string() const;
|
||||
Unicode::Sensitivity sensitivity() const { return m_sensitivity; }
|
||||
void set_sensitivity(StringView sensitivity) { m_sensitivity = Unicode::sensitivity_from_string(sensitivity); }
|
||||
StringView sensitivity_string() const { return Unicode::sensitivity_to_string(m_sensitivity); }
|
||||
|
||||
CaseFirst case_first() const { return m_case_first; }
|
||||
void set_case_first(StringView case_first);
|
||||
StringView case_first_string() const;
|
||||
Unicode::CaseFirst case_first() const { return m_case_first; }
|
||||
void set_case_first(StringView case_first) { m_case_first = Unicode::case_first_from_string(case_first); }
|
||||
StringView case_first_string() const { return Unicode::case_first_to_string(m_case_first); }
|
||||
|
||||
String const& collation() const { return m_collation; }
|
||||
void set_collation(String collation) { m_collation = move(collation); }
|
||||
|
@ -73,19 +56,25 @@ public:
|
|||
CollatorCompareFunction* bound_compare() const { return m_bound_compare; }
|
||||
void set_bound_compare(CollatorCompareFunction* bound_compare) { m_bound_compare = bound_compare; }
|
||||
|
||||
Unicode::Collator const& collator() const { return *m_collator; }
|
||||
void set_collator(NonnullOwnPtr<Unicode::Collator> collator) { m_collator = move(collator); }
|
||||
|
||||
private:
|
||||
explicit Collator(Object& prototype);
|
||||
|
||||
virtual void visit_edges(Visitor&) override;
|
||||
|
||||
String m_locale; // [[Locale]]
|
||||
Usage m_usage { Usage::Sort }; // [[Usage]]
|
||||
Sensitivity m_sensitivity { Sensitivity::Variant }; // [[Sensitivity]]
|
||||
CaseFirst m_case_first { CaseFirst::False }; // [[CaseFirst]]
|
||||
Unicode::Usage m_usage { Unicode::Usage::Sort }; // [[Usage]]
|
||||
Unicode::Sensitivity m_sensitivity { Unicode::Sensitivity::Variant }; // [[Sensitivity]]
|
||||
Unicode::CaseFirst m_case_first { Unicode::CaseFirst::False }; // [[CaseFirst]]
|
||||
String m_collation; // [[Collation]]
|
||||
bool m_ignore_punctuation { false }; // [[IgnorePunctuation]]
|
||||
bool m_numeric { false }; // [[Numeric]]
|
||||
GCPtr<CollatorCompareFunction> m_bound_compare; // [[BoundCompare]]
|
||||
|
||||
// Non-standard. Stores the ICU collator for the Intl object's collation options.
|
||||
OwnPtr<Unicode::Collator> m_collator;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -31,22 +31,10 @@ void CollatorCompareFunction::initialize(Realm&)
|
|||
define_direct_property(vm.names.name, PrimitiveString::create(vm, String {}), Attribute::Configurable);
|
||||
}
|
||||
|
||||
// 10.3.3.2 CompareStrings ( collator, x, y ), https://tc39.es/ecma402/#sec-collator-comparestrings
|
||||
double compare_strings(Collator& collator, Utf8View const& x, Utf8View const& y)
|
||||
void CollatorCompareFunction::visit_edges(Visitor& visitor)
|
||||
{
|
||||
// FIXME: Implement https://unicode.org/reports/tr10
|
||||
(void)collator;
|
||||
auto x_iterator = x.begin();
|
||||
auto y_iterator = y.begin();
|
||||
for (; x_iterator != x.end() && y_iterator != y.end(); ++x_iterator, ++y_iterator) {
|
||||
if (*x_iterator != *y_iterator)
|
||||
return static_cast<double>(*x_iterator) - static_cast<double>(*y_iterator);
|
||||
}
|
||||
if (x_iterator != x.end())
|
||||
return 1.0;
|
||||
if (y_iterator != y.end())
|
||||
return -1.0;
|
||||
return 0.0;
|
||||
Base::visit_edges(visitor);
|
||||
visitor.visit(m_collator);
|
||||
}
|
||||
|
||||
// 10.3.3.1 Collator Compare Functions, https://tc39.es/ecma402/#sec-collator-compare-functions
|
||||
|
@ -61,17 +49,32 @@ ThrowCompletionOr<Value> CollatorCompareFunction::call()
|
|||
|
||||
// 5. Let X be ? ToString(x).
|
||||
auto x = TRY(vm.argument(0).to_string(vm));
|
||||
|
||||
// 6. Let Y be ? ToString(y).
|
||||
auto y = TRY(vm.argument(1).to_string(vm));
|
||||
|
||||
// 7. Return CompareStrings(collator, X, Y).
|
||||
return compare_strings(m_collator, x.code_points(), y.code_points());
|
||||
return compare_strings(m_collator, x, y);
|
||||
}
|
||||
|
||||
void CollatorCompareFunction::visit_edges(Visitor& visitor)
|
||||
// 10.3.3.2 CompareStrings ( collator, x, y ), https://tc39.es/ecma402/#sec-collator-comparestrings
|
||||
int compare_strings(Collator const& collator, StringView x, StringView y)
|
||||
{
|
||||
Base::visit_edges(visitor);
|
||||
visitor.visit(m_collator);
|
||||
auto result = collator.collator().compare(x, y);
|
||||
|
||||
// The result is intended to correspond with a sort order of String values according to the effective locale and
|
||||
// collation options of collator, and will be negative when x is ordered before y, positive when x is ordered after
|
||||
// y, and zero in all other cases (representing no relative ordering between x and y).
|
||||
switch (result) {
|
||||
case Unicode::Collator::Order::Before:
|
||||
return -1;
|
||||
case Unicode::Collator::Order::Equal:
|
||||
return 0;
|
||||
case Unicode::Collator::Order::After:
|
||||
return 1;
|
||||
}
|
||||
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -30,6 +30,6 @@ private:
|
|||
NonnullGCPtr<Collator> m_collator; // [[Collator]]
|
||||
};
|
||||
|
||||
double compare_strings(Collator&, Utf8View const& x, Utf8View const& y);
|
||||
int compare_strings(Collator const&, StringView x, StringView y);
|
||||
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ static ThrowCompletionOr<NonnullGCPtr<Collator>> initialize_collator(VM& vm, Col
|
|||
// 27. If sensitivity is undefined, then
|
||||
if (sensitivity.is_undefined()) {
|
||||
// a. If usage is "sort", then
|
||||
if (collator.usage() == Collator::Usage::Sort) {
|
||||
if (collator.usage() == Unicode::Usage::Sort) {
|
||||
// i. Let sensitivity be "variant".
|
||||
sensitivity = PrimitiveString::create(vm, "variant"_string);
|
||||
}
|
||||
|
@ -136,6 +136,17 @@ static ThrowCompletionOr<NonnullGCPtr<Collator>> initialize_collator(VM& vm, Col
|
|||
// 30. Set collator.[[IgnorePunctuation]] to ignorePunctuation.
|
||||
collator.set_ignore_punctuation(ignore_punctuation.as_bool());
|
||||
|
||||
// Non-standard, create an ICU collator for this Intl object.
|
||||
auto icu_collator = Unicode::Collator::create(
|
||||
collator.locale(),
|
||||
collator.usage(),
|
||||
collator.collation(),
|
||||
collator.sensitivity(),
|
||||
collator.case_first(),
|
||||
collator.numeric(),
|
||||
collator.ignore_punctuation());
|
||||
collator.set_collator(move(icu_collator));
|
||||
|
||||
// 31. Return collator.
|
||||
return collator;
|
||||
}
|
||||
|
|
|
@ -566,7 +566,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::locale_compare)
|
|||
auto collator = TRY(construct(vm, realm.intrinsics().intl_collator_constructor(), vm.argument(1), vm.argument(2)));
|
||||
|
||||
// 5. Return CompareStrings(collator, S, thatValue).
|
||||
return Intl::compare_strings(static_cast<Intl::Collator&>(*collator), string.code_points(), that_value.code_points());
|
||||
return Intl::compare_strings(static_cast<Intl::Collator const&>(*collator), string, that_value);
|
||||
}
|
||||
|
||||
// 22.1.3.13 String.prototype.match ( regexp ), https://tc39.es/ecma262/#sec-string.prototype.match
|
||||
|
|
|
@ -17,13 +17,13 @@ describe("correct behavior", () => {
|
|||
const aTob = collator.compare(a, b);
|
||||
const bToa = collator.compare(b, a);
|
||||
|
||||
expect(aTob > 0).toBeTrue();
|
||||
expect(aTob).toBe(-bToa);
|
||||
expect(aTob).toBe(1);
|
||||
expect(bToa).toBe(-1);
|
||||
}
|
||||
|
||||
compareBoth("a", "");
|
||||
compareBoth("1", "");
|
||||
compareBoth("a", "A");
|
||||
compareBoth("A", "a");
|
||||
compareBoth("7", "3");
|
||||
compareBoth("0000", "0");
|
||||
|
||||
|
@ -31,8 +31,65 @@ describe("correct behavior", () => {
|
|||
expect(collator.compare("undefined", undefined)).toBe(0);
|
||||
|
||||
expect(collator.compare("null", null)).toBe(0);
|
||||
expect(collator.compare("null", undefined)).not.toBe(0);
|
||||
expect(collator.compare("null") < 0).toBeTrue();
|
||||
expect(collator.compare("null", undefined)).toBe(-1);
|
||||
expect(collator.compare("null")).toBe(-1);
|
||||
});
|
||||
|
||||
test("canonically equivalent strings", () => {
|
||||
var tests = [
|
||||
["ä\u0306", "a\u0308\u0306"],
|
||||
["ă\u0308", "a\u0306\u0308"],
|
||||
["ạ\u0308", "a\u0323\u0308"],
|
||||
["a\u0308\u0323", "a\u0323\u0308"],
|
||||
["ä\u0323", "a\u0323\u0308"],
|
||||
["Å", "Å"],
|
||||
["Å", "A\u030A"],
|
||||
["Ç", "C\u0327"],
|
||||
["ḋ\u0323", "ḍ\u0307"],
|
||||
["ḋ\u0323", "d\u0323\u0307"],
|
||||
["ô", "o\u0302"],
|
||||
["ö", "o\u0308"],
|
||||
["q\u0307\u0323", "q\u0323\u0307"],
|
||||
["ṩ", "s\u0323\u0307"],
|
||||
["ự", "ụ\u031B"],
|
||||
["ự", "u\u031B\u0323"],
|
||||
["ự", "ư\u0323"],
|
||||
["ự", "u\u0323\u031B"],
|
||||
["Ω", "Ω"],
|
||||
["x\u031B\u0323", "x\u0323\u031B"],
|
||||
["퓛", "\u1111\u1171\u11B6"],
|
||||
["北", "\uD87E\uDC2B"],
|
||||
["가", "\u1100\u1161"],
|
||||
["\uD834\uDD5E", "\uD834\uDD57\uD834\uDD65"],
|
||||
];
|
||||
|
||||
const en = new Intl.Collator("en");
|
||||
const ja = new Intl.Collator("ja");
|
||||
const th = new Intl.Collator("th");
|
||||
|
||||
tests.forEach(test => {
|
||||
expect(en.compare(test[0], test[1])).toBe(0);
|
||||
expect(ja.compare(test[0], test[1])).toBe(0);
|
||||
expect(th.compare(test[0], test[1])).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
test("ignorePunctuation", () => {
|
||||
[undefined, true, false].forEach(ignorePunctuation => {
|
||||
let expected = false;
|
||||
|
||||
const en = new Intl.Collator("en", { ignorePunctuation });
|
||||
expect(en.compare("", " ")).toBe(en.resolvedOptions().ignorePunctuation ? 0 : -1);
|
||||
expect(en.compare("", ",")).toBe(en.resolvedOptions().ignorePunctuation ? 0 : -1);
|
||||
|
||||
const ja = new Intl.Collator("ja", { ignorePunctuation });
|
||||
expect(ja.compare("", " ")).toBe(ja.resolvedOptions().ignorePunctuation ? 0 : -1);
|
||||
expect(ja.compare("", ",")).toBe(ja.resolvedOptions().ignorePunctuation ? 0 : -1);
|
||||
|
||||
const th = new Intl.Collator("th", { ignorePunctuation });
|
||||
expect(th.compare("", " ")).toBe(th.resolvedOptions().ignorePunctuation ? 0 : -1);
|
||||
expect(th.compare("", ",")).toBe(th.resolvedOptions().ignorePunctuation ? 0 : -1);
|
||||
});
|
||||
});
|
||||
|
||||
test("UTF-16", () => {
|
||||
|
|
|
@ -15,7 +15,7 @@ test("basic functionality", () => {
|
|||
|
||||
compareBoth("a", "");
|
||||
compareBoth("1", "");
|
||||
compareBoth("a", "A");
|
||||
compareBoth("A", "a");
|
||||
compareBoth("7", "3");
|
||||
compareBoth("0000", "0");
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ include(${SerenityOS_SOURCE_DIR}/Meta/CMake/unicode_data.cmake)
|
|||
|
||||
set(SOURCES
|
||||
CharacterTypes.cpp
|
||||
Collator.cpp
|
||||
CurrencyCode.cpp
|
||||
DateTimeFormat.cpp
|
||||
DisplayNames.cpp
|
||||
|
|
196
Userland/Libraries/LibUnicode/Collator.cpp
Normal file
196
Userland/Libraries/LibUnicode/Collator.cpp
Normal file
|
@ -0,0 +1,196 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibUnicode/Collator.h>
|
||||
#include <LibUnicode/ICU.h>
|
||||
|
||||
#include <unicode/coll.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
Usage usage_from_string(StringView usage)
|
||||
{
|
||||
if (usage == "sort"sv)
|
||||
return Usage::Sort;
|
||||
if (usage == "search"sv)
|
||||
return Usage::Search;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView usage_to_string(Usage usage)
|
||||
{
|
||||
switch (usage) {
|
||||
case Usage::Sort:
|
||||
return "sort"sv;
|
||||
case Usage::Search:
|
||||
return "search"sv;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
static NonnullOwnPtr<icu::Locale> apply_usage_to_locale(icu::Locale const& locale, Usage usage, StringView collation)
|
||||
{
|
||||
auto result = adopt_own(*locale.clone());
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
switch (usage) {
|
||||
case Usage::Sort:
|
||||
result->setUnicodeKeywordValue("co", icu_string_piece(collation), status);
|
||||
break;
|
||||
case Usage::Search:
|
||||
result->setUnicodeKeywordValue("co", "search", status);
|
||||
break;
|
||||
}
|
||||
|
||||
VERIFY(icu_success(status));
|
||||
return result;
|
||||
}
|
||||
|
||||
Sensitivity sensitivity_from_string(StringView sensitivity)
|
||||
{
|
||||
if (sensitivity == "base"sv)
|
||||
return Sensitivity::Base;
|
||||
if (sensitivity == "accent"sv)
|
||||
return Sensitivity::Accent;
|
||||
if (sensitivity == "case"sv)
|
||||
return Sensitivity::Case;
|
||||
if (sensitivity == "variant"sv)
|
||||
return Sensitivity::Variant;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView sensitivity_to_string(Sensitivity sensitivity)
|
||||
{
|
||||
switch (sensitivity) {
|
||||
case Sensitivity::Base:
|
||||
return "base"sv;
|
||||
case Sensitivity::Accent:
|
||||
return "accent"sv;
|
||||
case Sensitivity::Case:
|
||||
return "case"sv;
|
||||
case Sensitivity::Variant:
|
||||
return "variant"sv;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
|
||||
{
|
||||
switch (sensitivity) {
|
||||
case Sensitivity::Base:
|
||||
return UCOL_PRIMARY;
|
||||
case Sensitivity::Accent:
|
||||
return UCOL_SECONDARY;
|
||||
case Sensitivity::Case:
|
||||
return UCOL_PRIMARY;
|
||||
case Sensitivity::Variant:
|
||||
return UCOL_TERTIARY;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
CaseFirst case_first_from_string(StringView case_first)
|
||||
{
|
||||
if (case_first == "upper"sv)
|
||||
return CaseFirst::Upper;
|
||||
if (case_first == "lower"sv)
|
||||
return CaseFirst::Lower;
|
||||
if (case_first == "false"sv)
|
||||
return CaseFirst::False;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView case_first_to_string(CaseFirst case_first)
|
||||
{
|
||||
switch (case_first) {
|
||||
case CaseFirst::Upper:
|
||||
return "upper"sv;
|
||||
case CaseFirst::Lower:
|
||||
return "lower"sv;
|
||||
case CaseFirst::False:
|
||||
return "false"sv;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
static constexpr UColAttributeValue icu_case_first(CaseFirst case_first)
|
||||
{
|
||||
switch (case_first) {
|
||||
case CaseFirst::Upper:
|
||||
return UCOL_UPPER_FIRST;
|
||||
case CaseFirst::Lower:
|
||||
return UCOL_LOWER_FIRST;
|
||||
case CaseFirst::False:
|
||||
return UCOL_OFF;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
class CollatorImpl : public Collator {
|
||||
public:
|
||||
explicit CollatorImpl(NonnullOwnPtr<icu::Collator> collator)
|
||||
: m_collator(move(collator))
|
||||
{
|
||||
}
|
||||
|
||||
virtual Collator::Order compare(StringView lhs, StringView rhs) const override
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status);
|
||||
VERIFY(icu_success(status));
|
||||
|
||||
switch (result) {
|
||||
case UCOL_LESS:
|
||||
return Order::Before;
|
||||
case UCOL_EQUAL:
|
||||
return Order::Equal;
|
||||
case UCOL_GREATER:
|
||||
return Order::After;
|
||||
}
|
||||
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
private:
|
||||
NonnullOwnPtr<icu::Collator> m_collator;
|
||||
};
|
||||
|
||||
NonnullOwnPtr<Collator> Collator::create(
|
||||
StringView locale,
|
||||
Usage usage,
|
||||
StringView collation,
|
||||
Sensitivity sensitivity,
|
||||
CaseFirst case_first,
|
||||
bool numeric,
|
||||
bool ignore_punctuation)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
auto locale_data = LocaleData::for_locale(locale);
|
||||
VERIFY(locale_data.has_value());
|
||||
|
||||
auto locale_with_usage = apply_usage_to_locale(locale_data->locale(), usage, collation);
|
||||
|
||||
auto collator = adopt_own(*icu::Collator::createInstance(*locale_with_usage, status));
|
||||
VERIFY(icu_success(status));
|
||||
|
||||
auto set_attribute = [&](UColAttribute attribute, UColAttributeValue value) {
|
||||
collator->setAttribute(attribute, value, status);
|
||||
VERIFY(icu_success(status));
|
||||
};
|
||||
|
||||
set_attribute(UCOL_STRENGTH, icu_sensitivity(sensitivity));
|
||||
set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
|
||||
set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
|
||||
set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);
|
||||
set_attribute(UCOL_ALTERNATE_HANDLING, ignore_punctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE);
|
||||
set_attribute(UCOL_NORMALIZATION_MODE, UCOL_ON);
|
||||
|
||||
return adopt_own(*new CollatorImpl(move(collator)));
|
||||
}
|
||||
|
||||
}
|
62
Userland/Libraries/LibUnicode/Collator.h
Normal file
62
Userland/Libraries/LibUnicode/Collator.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/NonnullOwnPtr.h>
|
||||
#include <AK/StringView.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
enum class Usage {
|
||||
Sort,
|
||||
Search,
|
||||
};
|
||||
Usage usage_from_string(StringView);
|
||||
StringView usage_to_string(Usage);
|
||||
|
||||
enum class Sensitivity {
|
||||
Base,
|
||||
Accent,
|
||||
Case,
|
||||
Variant,
|
||||
};
|
||||
Sensitivity sensitivity_from_string(StringView);
|
||||
StringView sensitivity_to_string(Sensitivity);
|
||||
|
||||
enum class CaseFirst {
|
||||
Upper,
|
||||
Lower,
|
||||
False,
|
||||
};
|
||||
CaseFirst case_first_from_string(StringView);
|
||||
StringView case_first_to_string(CaseFirst);
|
||||
|
||||
class Collator {
|
||||
public:
|
||||
static NonnullOwnPtr<Collator> create(
|
||||
StringView locale,
|
||||
Usage,
|
||||
StringView collation,
|
||||
Sensitivity,
|
||||
CaseFirst,
|
||||
bool numeric,
|
||||
bool ignore_punctuation);
|
||||
|
||||
virtual ~Collator() = default;
|
||||
|
||||
enum class Order {
|
||||
Before,
|
||||
Equal,
|
||||
After,
|
||||
};
|
||||
virtual Order compare(StringView, StringView) const = 0;
|
||||
|
||||
protected:
|
||||
Collator() = default;
|
||||
};
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue