LibJS+LibUnicode: Port Intl.Collator to UTF-16 strings

This commit is contained in:
Timothy Flynn 2025-07-23 13:32:51 -04:00 committed by Andreas Kling
commit b2f053e783
Notes: github-actions[bot] 2025-07-24 08:41:46 +00:00
7 changed files with 30 additions and 12 deletions

View file

@ -48,17 +48,17 @@ ThrowCompletionOr<Value> CollatorCompareFunction::call()
// 4. If y is not provided, let y be undefined. // 4. If y is not provided, let y be undefined.
// 5. Let X be ? ToString(x). // 5. Let X be ? ToString(x).
auto x = TRY(vm.argument(0).to_string(vm)); auto x = TRY(vm.argument(0).to_utf16_string(vm));
// 6. Let Y be ? ToString(y). // 6. Let Y be ? ToString(y).
auto y = TRY(vm.argument(1).to_string(vm)); auto y = TRY(vm.argument(1).to_utf16_string(vm));
// 7. Return CompareStrings(collator, X, Y). // 7. Return CompareStrings(collator, X, Y).
return compare_strings(m_collator, x, y); return compare_strings(m_collator, x, y);
} }
// 10.3.3.2 CompareStrings ( collator, x, y ), https://tc39.es/ecma402/#sec-collator-comparestrings // 10.3.3.2 CompareStrings ( collator, x, y ), https://tc39.es/ecma402/#sec-collator-comparestrings
int compare_strings(Collator const& collator, StringView x, StringView y) int compare_strings(Collator const& collator, Utf16View const& x, Utf16View const& y)
{ {
auto result = collator.collator().compare(x, y); auto result = collator.collator().compare(x, y);

View file

@ -30,6 +30,6 @@ private:
GC::Ref<Collator> m_collator; // [[Collator]] GC::Ref<Collator> m_collator; // [[Collator]]
}; };
int compare_strings(Collator const&, StringView x, StringView y); int compare_strings(Collator const&, Utf16View const& x, Utf16View const& y);
} }

View file

@ -538,10 +538,10 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::locale_compare)
auto object = TRY(require_object_coercible(vm, vm.this_value())); auto object = TRY(require_object_coercible(vm, vm.this_value()));
// 2. Let S be ? ToString(O). // 2. Let S be ? ToString(O).
auto string = TRY(object.to_string(vm)); auto string = TRY(object.to_utf16_string(vm));
// 3. Let thatValue be ? ToString(that). // 3. Let thatValue be ? ToString(that).
auto that_value = TRY(vm.argument(0).to_string(vm)); auto that_value = TRY(vm.argument(0).to_utf16_string(vm));
// 4. Let collator be ? Construct(%Collator%, « locales, options »). // 4. Let collator be ? Construct(%Collator%, « locales, options »).
auto locales = vm.argument(1); auto locales = vm.argument(1);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org> * Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -168,11 +168,14 @@ public:
{ {
} }
virtual Collator::Order compare(StringView lhs, StringView rhs) const override virtual Collator::Order compare(Utf16View const& lhs, Utf16View const& rhs) const override
{ {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status); auto lhs_it = icu_string_iterator(lhs);
auto rhs_it = icu_string_iterator(rhs);
auto result = m_collator->compare(lhs_it, rhs_it, status);
VERIFY(icu_success(status)); VERIFY(icu_success(status));
switch (result) { switch (result) {

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org> * Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -53,7 +53,7 @@ public:
Equal, Equal,
After, After,
}; };
virtual Order compare(StringView, StringView) const = 0; virtual Order compare(Utf16View const&, Utf16View const&) const = 0;
virtual Sensitivity sensitivity() const = 0; virtual Sensitivity sensitivity() const = 0;
virtual bool ignore_punctuation() const = 0; virtual bool ignore_punctuation() const = 0;

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org> * Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -162,4 +162,16 @@ String icu_string_to_string(UChar const* string, i32 length)
return MUST(Utf16View { string, static_cast<size_t>(length) }.to_utf8()); return MUST(Utf16View { string, static_cast<size_t>(length) }.to_utf8());
} }
UCharIterator icu_string_iterator(Utf16View const& string)
{
UCharIterator iterator;
if (string.has_ascii_storage())
uiter_setUTF8(&iterator, string.ascii_span().data(), static_cast<i32>(string.length_in_code_units()));
else
uiter_setString(&iterator, string.utf16_span().data(), static_cast<i32>(string.length_in_code_units()));
return iterator;
}
} }

View file

@ -16,6 +16,7 @@
#include <unicode/locid.h> #include <unicode/locid.h>
#include <unicode/strenum.h> #include <unicode/strenum.h>
#include <unicode/stringpiece.h> #include <unicode/stringpiece.h>
#include <unicode/uiter.h>
#include <unicode/uloc.h> #include <unicode/uloc.h>
#include <unicode/unistr.h> #include <unicode/unistr.h>
#include <unicode/utypes.h> #include <unicode/utypes.h>
@ -102,6 +103,8 @@ Vector<icu::UnicodeString> icu_string_list(ReadonlySpan<String> strings);
String icu_string_to_string(icu::UnicodeString const& string); String icu_string_to_string(icu::UnicodeString const& string);
String icu_string_to_string(UChar const*, i32 length); String icu_string_to_string(UChar const*, i32 length);
UCharIterator icu_string_iterator(Utf16View const&);
template<typename Filter> template<typename Filter>
Vector<String> icu_string_enumeration_to_list(OwnPtr<icu::StringEnumeration> enumeration, char const* bcp47_keyword, Filter&& filter) Vector<String> icu_string_enumeration_to_list(OwnPtr<icu::StringEnumeration> enumeration, char const* bcp47_keyword, Filter&& filter)
{ {