LibJS+LibUnicode: Port Intl.ListFormat to UTF-16 strings

This commit is contained in:
Timothy Flynn 2025-07-23 14:42:57 -04:00 committed by Andreas Kling
commit db2148b44a
Notes: github-actions[bot] 2025-07-24 08:41:31 +00:00
7 changed files with 39 additions and 33 deletions

View file

@ -731,13 +731,13 @@ Vector<DurationFormatPart> list_format_parts(VM& vm, DurationFormat const& durat
auto list_format = construct_list_format(vm, duration_format, list_format_options);
// 7. Let strings be a new empty List.
Vector<String> strings;
Vector<Utf16String> strings;
strings.ensure_capacity(partitioned_parts_list.size());
// 8. For each element parts of partitionedPartsList, do
for (auto const& parts : partitioned_parts_list) {
// a. Let string be the empty String.
StringBuilder string;
StringBuilder string(StringBuilder::Mode::UTF16);
// b. For each Record { [[Type]], [[Value]], [[Unit]] } part in parts, do
for (auto const& part : parts) {
@ -746,7 +746,7 @@ Vector<DurationFormatPart> list_format_parts(VM& vm, DurationFormat const& durat
}
// c. Append string to strings.
strings.unchecked_append(MUST(string.to_string()));
strings.unchecked_append(string.to_utf16_string());
}
// 9. Let formattedPartsList be CreatePartsFromList(lf, strings).
@ -786,7 +786,7 @@ Vector<DurationFormatPart> list_format_parts(VM& vm, DurationFormat const& durat
VERIFY(list_part.type == "literal"sv);
// ii. Append the Record { [[Type]]: "literal", [[Value]]: listPart.[[Value]], [[Unit]]: empty } to flattenedPartsList.
flattened_parts_list.append({ .type = "literal"sv, .value = move(list_part.value), .unit = {} });
flattened_parts_list.append({ .type = "literal"sv, .value = list_part.value.to_utf8_but_should_be_ported_to_utf16(), .unit = {} });
}
}

View file

@ -35,13 +35,13 @@ ReadonlySpan<ResolutionOptionDescriptor> ListFormat::resolution_option_descripto
}
// 14.5.2 CreatePartsFromList ( listFormat, list ), https://tc39.es/ecma402/#sec-createpartsfromlist
Vector<Unicode::ListFormat::Partition> create_parts_from_list(ListFormat const& list_format, ReadonlySpan<String> list)
Vector<Unicode::ListFormat::Partition> create_parts_from_list(ListFormat const& list_format, ReadonlySpan<Utf16String> list)
{
return list_format.formatter().format_to_parts(list);
}
// 14.5.3 FormatList ( listFormat, list ), https://tc39.es/ecma402/#sec-formatlist
String format_list(ListFormat const& list_format, ReadonlySpan<String> list)
Utf16String format_list(ListFormat const& list_format, ReadonlySpan<Utf16String> list)
{
// 1. Let parts be ! CreatePartsFromList(listFormat, list).
// 2. Let result be the empty String.
@ -52,7 +52,7 @@ String format_list(ListFormat const& list_format, ReadonlySpan<String> list)
}
// 14.5.4 FormatListToParts ( listFormat, list ), https://tc39.es/ecma402/#sec-formatlisttoparts
GC::Ref<Array> format_list_to_parts(VM& vm, ListFormat const& list_format, ReadonlySpan<String> list)
GC::Ref<Array> format_list_to_parts(VM& vm, ListFormat const& list_format, ReadonlySpan<Utf16String> list)
{
auto& realm = *vm.current_realm();
@ -88,19 +88,19 @@ GC::Ref<Array> format_list_to_parts(VM& vm, ListFormat const& list_format, Reado
}
// 14.5.5 StringListFromIterable ( iterable ), https://tc39.es/ecma402/#sec-createstringlistfromiterable
ThrowCompletionOr<Vector<String>> string_list_from_iterable(VM& vm, Value iterable)
ThrowCompletionOr<Vector<Utf16String>> string_list_from_iterable(VM& vm, Value iterable)
{
// 1. If iterable is undefined, then
if (iterable.is_undefined()) {
// a. Return a new empty List.
return Vector<String> {};
return Vector<Utf16String> {};
}
// 2. Let iteratorRecord be ? GetIterator(iterable, sync).
auto iterator_record = TRY(get_iterator(vm, iterable, IteratorHint::Sync));
// 3. Let list be a new empty List.
Vector<String> list;
Vector<Utf16String> list;
// 4. Repeat,
while (true) {
@ -123,7 +123,7 @@ ThrowCompletionOr<Vector<String>> string_list_from_iterable(VM& vm, Value iterab
}
// iii. Append next to list.
list.append(next->as_string().utf8_string());
list.append(next->as_string().utf16_string());
}
}

View file

@ -57,9 +57,9 @@ private:
OwnPtr<Unicode::ListFormat> m_formatter;
};
Vector<Unicode::ListFormat::Partition> create_parts_from_list(ListFormat const&, ReadonlySpan<String> list);
String format_list(ListFormat const&, ReadonlySpan<String> list);
GC::Ref<Array> format_list_to_parts(VM&, ListFormat const&, ReadonlySpan<String> list);
ThrowCompletionOr<Vector<String>> string_list_from_iterable(VM&, Value iterable);
Vector<Unicode::ListFormat::Partition> create_parts_from_list(ListFormat const&, ReadonlySpan<Utf16String> list);
Utf16String format_list(ListFormat const&, ReadonlySpan<Utf16String> list);
GC::Ref<Array> format_list_to_parts(VM&, ListFormat const&, ReadonlySpan<Utf16String> list);
ThrowCompletionOr<Vector<Utf16String>> string_list_from_iterable(VM&, Value iterable);
}

View file

@ -138,16 +138,13 @@ TimeZoneData::TimeZoneData(NonnullOwnPtr<icu::TimeZone> time_zone)
{
}
Vector<icu::UnicodeString> icu_string_list(ReadonlySpan<String> strings)
Vector<icu::UnicodeString> icu_string_list(ReadonlySpan<Utf16String> strings)
{
Vector<icu::UnicodeString> result;
result.ensure_capacity(strings.size());
for (auto const& string : strings) {
auto view = string.bytes_as_string_view();
icu::UnicodeString icu_string(view.characters_without_null_termination(), static_cast<i32>(view.length()));
result.unchecked_append(move(icu_string));
}
for (auto const& string : strings)
result.unchecked_append(icu_string(string));
return result;
}

View file

@ -99,7 +99,16 @@ ALWAYS_INLINE icu::UnicodeString icu_string(StringView string)
return icu::UnicodeString::fromUTF8(icu_string_piece(string));
}
Vector<icu::UnicodeString> icu_string_list(ReadonlySpan<String> strings);
// If the Utf16View has ASCII storage, this creates an owned icu::UnicodeString. Otherwise, the icu::UnicodeString is
// unowned (i.e. it is effectively a view).
ALWAYS_INLINE icu::UnicodeString icu_string(Utf16View const& string)
{
if (string.has_ascii_storage())
return icu::UnicodeString::fromUTF8(icu_string_piece(string.bytes()));
return { false, string.utf16_span().data(), static_cast<i32>(string.length_in_code_units()) };
}
Vector<icu::UnicodeString> icu_string_list(ReadonlySpan<Utf16String> strings);
String icu_string_to_string(icu::UnicodeString const& string);
String icu_string_to_string(UChar const*, i32 length);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -82,7 +82,7 @@ public:
virtual ~ListFormatImpl() override = default;
virtual String format(ReadonlySpan<String> list) const override
virtual Utf16String format(ReadonlySpan<Utf16String> list) const override
{
UErrorCode status = U_ZERO_ERROR;
@ -94,10 +94,10 @@ public:
if (icu_failure(status))
return {};
return icu_string_to_string(formatted_string);
return icu_string_to_utf16_string(formatted_string);
}
virtual Vector<Partition> format_to_parts(ReadonlySpan<String> list) const override
virtual Vector<Partition> format_to_parts(ReadonlySpan<Utf16String> list) const override
{
UErrorCode status = U_ZERO_ERROR;
@ -118,14 +118,14 @@ public:
auto type = icu_list_format_field_to_string(position.getField());
auto part = formatted_string.tempSubStringBetween(position.getStart(), position.getLimit());
result.empend(type, icu_string_to_string(part));
result.empend(type, icu_string_to_utf16_string(part));
}
return result;
}
private:
Optional<icu::FormattedList> format_list_impl(ReadonlySpan<String> list) const
Optional<icu::FormattedList> format_list_impl(ReadonlySpan<Utf16String> list) const
{
UErrorCode status = U_ZERO_ERROR;

View file

@ -1,12 +1,12 @@
/*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org>
* Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/String.h>
#include <AK/Utf16String.h>
#include <AK/Vector.h>
#include <LibUnicode/Locale.h>
@ -27,11 +27,11 @@ public:
struct Partition {
StringView type;
String value;
Utf16String value;
};
virtual String format(ReadonlySpan<String> list) const = 0;
virtual Vector<Partition> format_to_parts(ReadonlySpan<String> list) const = 0;
virtual Utf16String format(ReadonlySpan<Utf16String> list) const = 0;
virtual Vector<Partition> format_to_parts(ReadonlySpan<Utf16String> list) const = 0;
protected:
ListFormat() = default;