LibJS+LibUnicode: Port Intl.Collator to UTF-16 strings

This commit is contained in:
Timothy Flynn 2025-07-23 13:32:51 -04:00 committed by Andreas Kling
commit b2f053e783
Notes: github-actions[bot] 2025-07-24 08:41:46 +00:00
7 changed files with 30 additions and 12 deletions

View file

@ -48,17 +48,17 @@ ThrowCompletionOr<Value> CollatorCompareFunction::call()
// 4. If y is not provided, let y be undefined.
// 5. Let X be ? ToString(x).
auto x = TRY(vm.argument(0).to_string(vm));
auto x = TRY(vm.argument(0).to_utf16_string(vm));
// 6. Let Y be ? ToString(y).
auto y = TRY(vm.argument(1).to_string(vm));
auto y = TRY(vm.argument(1).to_utf16_string(vm));
// 7. Return CompareStrings(collator, X, Y).
return compare_strings(m_collator, x, y);
}
// 10.3.3.2 CompareStrings ( collator, x, y ), https://tc39.es/ecma402/#sec-collator-comparestrings
int compare_strings(Collator const& collator, StringView x, StringView y)
int compare_strings(Collator const& collator, Utf16View const& x, Utf16View const& y)
{
auto result = collator.collator().compare(x, y);

View file

@ -30,6 +30,6 @@ private:
GC::Ref<Collator> m_collator; // [[Collator]]
};
int compare_strings(Collator const&, StringView x, StringView y);
int compare_strings(Collator const&, Utf16View const& x, Utf16View const& y);
}

View file

@ -538,10 +538,10 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::locale_compare)
auto object = TRY(require_object_coercible(vm, vm.this_value()));
// 2. Let S be ? ToString(O).
auto string = TRY(object.to_string(vm));
auto string = TRY(object.to_utf16_string(vm));
// 3. Let thatValue be ? ToString(that).
auto that_value = TRY(vm.argument(0).to_string(vm));
auto that_value = TRY(vm.argument(0).to_utf16_string(vm));
// 4. Let collator be ? Construct(%Collator%, « locales, options »).
auto locales = vm.argument(1);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -168,11 +168,14 @@ public:
{
}
virtual Collator::Order compare(StringView lhs, StringView rhs) const override
virtual Collator::Order compare(Utf16View const& lhs, Utf16View const& rhs) const override
{
UErrorCode status = U_ZERO_ERROR;
auto result = m_collator->compareUTF8(icu_string_piece(lhs), icu_string_piece(rhs), status);
auto lhs_it = icu_string_iterator(lhs);
auto rhs_it = icu_string_iterator(rhs);
auto result = m_collator->compare(lhs_it, rhs_it, status);
VERIFY(icu_success(status));
switch (result) {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -53,7 +53,7 @@ public:
Equal,
After,
};
virtual Order compare(StringView, StringView) const = 0;
virtual Order compare(Utf16View const&, Utf16View const&) const = 0;
virtual Sensitivity sensitivity() const = 0;
virtual bool ignore_punctuation() const = 0;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -162,4 +162,16 @@ String icu_string_to_string(UChar const* string, i32 length)
return MUST(Utf16View { string, static_cast<size_t>(length) }.to_utf8());
}
UCharIterator icu_string_iterator(Utf16View const& string)
{
UCharIterator iterator;
if (string.has_ascii_storage())
uiter_setUTF8(&iterator, string.ascii_span().data(), static_cast<i32>(string.length_in_code_units()));
else
uiter_setString(&iterator, string.utf16_span().data(), static_cast<i32>(string.length_in_code_units()));
return iterator;
}
}

View file

@ -16,6 +16,7 @@
#include <unicode/locid.h>
#include <unicode/strenum.h>
#include <unicode/stringpiece.h>
#include <unicode/uiter.h>
#include <unicode/uloc.h>
#include <unicode/unistr.h>
#include <unicode/utypes.h>
@ -102,6 +103,8 @@ Vector<icu::UnicodeString> icu_string_list(ReadonlySpan<String> strings);
String icu_string_to_string(icu::UnicodeString const& string);
String icu_string_to_string(UChar const*, i32 length);
UCharIterator icu_string_iterator(Utf16View const&);
template<typename Filter>
Vector<String> icu_string_enumeration_to_list(OwnPtr<icu::StringEnumeration> enumeration, char const* bcp47_keyword, Filter&& filter)
{