mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-28 19:59:17 +00:00
LibJS+LibUnicode: Fully implement Intl.Collator with ICU
We were never able to implement anything other than a basic, locale- unaware collator with the JSON export of the CLDR as it did not have collation data. We can now use ICU to implement collation.
This commit is contained in:
parent
dc0d5da086
commit
eb7e3583c9
Notes:
github-actions[bot]
2024-08-15 11:45:44 +00:00
Author: https://github.com/trflynn89
Commit: eb7e3583c9
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/1078
Reviewed-by: https://github.com/awesomekling
11 changed files with 384 additions and 142 deletions
196
Userland/Libraries/LibUnicode/Collator.cpp
Normal file
196
Userland/Libraries/LibUnicode/Collator.cpp
Normal file
|
@ -0,0 +1,196 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibUnicode/Collator.h>
|
||||
#include <LibUnicode/ICU.h>
|
||||
|
||||
#include <unicode/coll.h>
|
||||
|
||||
namespace Unicode {
|
||||
|
||||
Usage usage_from_string(StringView usage)
|
||||
{
|
||||
if (usage == "sort"sv)
|
||||
return Usage::Sort;
|
||||
if (usage == "search"sv)
|
||||
return Usage::Search;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView usage_to_string(Usage usage)
|
||||
{
|
||||
switch (usage) {
|
||||
case Usage::Sort:
|
||||
return "sort"sv;
|
||||
case Usage::Search:
|
||||
return "search"sv;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
static NonnullOwnPtr<icu::Locale> apply_usage_to_locale(icu::Locale const& locale, Usage usage, StringView collation)
|
||||
{
|
||||
auto result = adopt_own(*locale.clone());
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
switch (usage) {
|
||||
case Usage::Sort:
|
||||
result->setUnicodeKeywordValue("co", icu_string_piece(collation), status);
|
||||
break;
|
||||
case Usage::Search:
|
||||
result->setUnicodeKeywordValue("co", "search", status);
|
||||
break;
|
||||
}
|
||||
|
||||
VERIFY(icu_success(status));
|
||||
return result;
|
||||
}
|
||||
|
||||
Sensitivity sensitivity_from_string(StringView sensitivity)
|
||||
{
|
||||
if (sensitivity == "base"sv)
|
||||
return Sensitivity::Base;
|
||||
if (sensitivity == "accent"sv)
|
||||
return Sensitivity::Accent;
|
||||
if (sensitivity == "case"sv)
|
||||
return Sensitivity::Case;
|
||||
if (sensitivity == "variant"sv)
|
||||
return Sensitivity::Variant;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView sensitivity_to_string(Sensitivity sensitivity)
|
||||
{
|
||||
switch (sensitivity) {
|
||||
case Sensitivity::Base:
|
||||
return "base"sv;
|
||||
case Sensitivity::Accent:
|
||||
return "accent"sv;
|
||||
case Sensitivity::Case:
|
||||
return "case"sv;
|
||||
case Sensitivity::Variant:
|
||||
return "variant"sv;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
static constexpr UColAttributeValue icu_sensitivity(Sensitivity sensitivity)
|
||||
{
|
||||
switch (sensitivity) {
|
||||
case Sensitivity::Base:
|
||||
return UCOL_PRIMARY;
|
||||
case Sensitivity::Accent:
|
||||
return UCOL_SECONDARY;
|
||||
case Sensitivity::Case:
|
||||
return UCOL_PRIMARY;
|
||||
case Sensitivity::Variant:
|
||||
return UCOL_TERTIARY;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
CaseFirst case_first_from_string(StringView case_first)
|
||||
{
|
||||
if (case_first == "upper"sv)
|
||||
return CaseFirst::Upper;
|
||||
if (case_first == "lower"sv)
|
||||
return CaseFirst::Lower;
|
||||
if (case_first == "false"sv)
|
||||
return CaseFirst::False;
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
StringView case_first_to_string(CaseFirst case_first)
|
||||
{
|
||||
switch (case_first) {
|
||||
case CaseFirst::Upper:
|
||||
return "upper"sv;
|
||||
case CaseFirst::Lower:
|
||||
return "lower"sv;
|
||||
case CaseFirst::False:
|
||||
return "false"sv;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
static constexpr UColAttributeValue icu_case_first(CaseFirst case_first)
|
||||
{
|
||||
switch (case_first) {
|
||||
case CaseFirst::Upper:
|
||||
return UCOL_UPPER_FIRST;
|
||||
case CaseFirst::Lower:
|
||||
return UCOL_LOWER_FIRST;
|
||||
case CaseFirst::False:
|
||||
return UCOL_OFF;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
class CollatorImpl : public Collator {
|
||||
public:
|
||||
explicit CollatorImpl(NonnullOwnPtr<icu::Collator> collator)
|
||||
: m_collator(move(collator))
|
||||
{
|
||||
}
|
||||
|
||||
virtual Collator::Order compare(StringView lhs, StringView rhs) const override
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status);
|
||||
VERIFY(icu_success(status));
|
||||
|
||||
switch (result) {
|
||||
case UCOL_LESS:
|
||||
return Order::Before;
|
||||
case UCOL_EQUAL:
|
||||
return Order::Equal;
|
||||
case UCOL_GREATER:
|
||||
return Order::After;
|
||||
}
|
||||
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
private:
|
||||
NonnullOwnPtr<icu::Collator> m_collator;
|
||||
};
|
||||
|
||||
NonnullOwnPtr<Collator> Collator::create(
|
||||
StringView locale,
|
||||
Usage usage,
|
||||
StringView collation,
|
||||
Sensitivity sensitivity,
|
||||
CaseFirst case_first,
|
||||
bool numeric,
|
||||
bool ignore_punctuation)
|
||||
{
|
||||
UErrorCode status = U_ZERO_ERROR;
|
||||
|
||||
auto locale_data = LocaleData::for_locale(locale);
|
||||
VERIFY(locale_data.has_value());
|
||||
|
||||
auto locale_with_usage = apply_usage_to_locale(locale_data->locale(), usage, collation);
|
||||
|
||||
auto collator = adopt_own(*icu::Collator::createInstance(*locale_with_usage, status));
|
||||
VERIFY(icu_success(status));
|
||||
|
||||
auto set_attribute = [&](UColAttribute attribute, UColAttributeValue value) {
|
||||
collator->setAttribute(attribute, value, status);
|
||||
VERIFY(icu_success(status));
|
||||
};
|
||||
|
||||
set_attribute(UCOL_STRENGTH, icu_sensitivity(sensitivity));
|
||||
set_attribute(UCOL_CASE_LEVEL, sensitivity == Sensitivity::Case ? UCOL_ON : UCOL_OFF);
|
||||
set_attribute(UCOL_CASE_FIRST, icu_case_first(case_first));
|
||||
set_attribute(UCOL_NUMERIC_COLLATION, numeric ? UCOL_ON : UCOL_OFF);
|
||||
set_attribute(UCOL_ALTERNATE_HANDLING, ignore_punctuation ? UCOL_SHIFTED : UCOL_NON_IGNORABLE);
|
||||
set_attribute(UCOL_NORMALIZATION_MODE, UCOL_ON);
|
||||
|
||||
return adopt_own(*new CollatorImpl(move(collator)));
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue