LibJS+LibUnicode: Port Intl.DisplayNames to UTF-16 strings

This commit is contained in:
Timothy Flynn 2025-07-23 14:31:12 -04:00 committed by Andreas Kling
commit 7d80aabbdb
Notes: github-actions[bot] 2025-07-24 08:41:36 +00:00
7 changed files with 45 additions and 39 deletions

View file

@ -1134,7 +1134,7 @@ ByteString time_zone_string(double time)
// Most implementations seem to prefer the long-form display name of the time zone. Not super important, but we may as well match that behavior. // Most implementations seem to prefer the long-form display name of the time zone. Not super important, but we may as well match that behavior.
if (auto name = Unicode::time_zone_display_name(Unicode::default_locale(), tz_name, in_dst, time); name.has_value()) if (auto name = Unicode::time_zone_display_name(Unicode::default_locale(), tz_name, in_dst, time); name.has_value())
tz_name = name.release_value(); tz_name = name->to_utf8_but_should_be_ported_to_utf16();
// 10. Return the string-concatenation of offsetString and tzName. // 10. Return the string-concatenation of offsetString and tzName.
return ByteString::formatted("{} ({})", offset_string, tz_name); return ByteString::formatted("{} ({})", offset_string, tz_name);

View file

@ -83,7 +83,7 @@ JS_DEFINE_NATIVE_FUNCTION(DisplayNamesPrototype::of)
// 5. Let fields be displayNames.[[Fields]]. // 5. Let fields be displayNames.[[Fields]].
// 6. If fields has a field [[<code>]], return fields.[[<code>]]. // 6. If fields has a field [[<code>]], return fields.[[<code>]].
Optional<String> result; Optional<Utf16String> result;
switch (display_names->type()) { switch (display_names->type()) {
case DisplayNames::Type::Language: case DisplayNames::Type::Language:

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org> * Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -37,7 +37,7 @@ StringView language_display_to_string(LanguageDisplay language_display)
} }
} }
Optional<String> language_display_name(StringView locale, StringView language, LanguageDisplay display) Optional<Utf16String> language_display_name(StringView locale, StringView language, LanguageDisplay display)
{ {
auto locale_data = LocaleData::for_locale(locale); auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value()) if (!locale_data.has_value())
@ -54,10 +54,10 @@ Optional<String> language_display_name(StringView locale, StringView language, L
icu::UnicodeString result; icu::UnicodeString result;
display_names.localeDisplayName(language_data->locale().getName(), result); display_names.localeDisplayName(language_data->locale().getName(), result);
return icu_string_to_string(result); return icu_string_to_utf16_string(result);
} }
Optional<String> region_display_name(StringView locale, StringView region) Optional<Utf16String> region_display_name(StringView locale, StringView region)
{ {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
@ -72,10 +72,10 @@ Optional<String> region_display_name(StringView locale, StringView region)
icu::UnicodeString result; icu::UnicodeString result;
locale_data->standard_display_names().regionDisplayName(icu_region.getCountry(), result); locale_data->standard_display_names().regionDisplayName(icu_region.getCountry(), result);
return icu_string_to_string(result); return icu_string_to_utf16_string(result);
} }
Optional<String> script_display_name(StringView locale, StringView script) Optional<Utf16String> script_display_name(StringView locale, StringView script)
{ {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
@ -90,10 +90,10 @@ Optional<String> script_display_name(StringView locale, StringView script)
icu::UnicodeString result; icu::UnicodeString result;
locale_data->standard_display_names().scriptDisplayName(icu_script.getScript(), result); locale_data->standard_display_names().scriptDisplayName(icu_script.getScript(), result);
return icu_string_to_string(result); return icu_string_to_utf16_string(result);
} }
Optional<String> calendar_display_name(StringView locale, StringView calendar) Optional<Utf16String> calendar_display_name(StringView locale, StringView calendar)
{ {
auto locale_data = LocaleData::for_locale(locale); auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value()) if (!locale_data.has_value())
@ -109,7 +109,7 @@ Optional<String> calendar_display_name(StringView locale, StringView calendar)
icu::UnicodeString result; icu::UnicodeString result;
locale_data->standard_display_names().keyValueDisplayName("calendar", ByteString(calendar).characters(), result); locale_data->standard_display_names().keyValueDisplayName("calendar", ByteString(calendar).characters(), result);
return icu_string_to_string(result); return icu_string_to_utf16_string(result);
} }
static constexpr UDateTimePatternField icu_date_time_field(StringView field) static constexpr UDateTimePatternField icu_date_time_field(StringView field)
@ -155,7 +155,7 @@ static constexpr UDateTimePGDisplayWidth icu_date_time_style(Style style)
VERIFY_NOT_REACHED(); VERIFY_NOT_REACHED();
} }
Optional<String> date_time_field_display_name(StringView locale, StringView field, Style style) Optional<Utf16String> date_time_field_display_name(StringView locale, StringView field, Style style)
{ {
auto locale_data = LocaleData::for_locale(locale); auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value()) if (!locale_data.has_value())
@ -167,10 +167,10 @@ Optional<String> date_time_field_display_name(StringView locale, StringView fiel
icu::UnicodeString result; icu::UnicodeString result;
result = locale_data->date_time_pattern_generator().getFieldDisplayName(icu_field, icu_style); result = locale_data->date_time_pattern_generator().getFieldDisplayName(icu_field, icu_style);
return icu_string_to_string(result); return icu_string_to_utf16_string(result);
} }
Optional<String> time_zone_display_name(StringView locale, StringView time_zone_identifier, TimeZoneOffset::InDST in_dst, double time) Optional<Utf16String> time_zone_display_name(StringView locale, StringView time_zone_identifier, TimeZoneOffset::InDST in_dst, double time)
{ {
auto locale_data = LocaleData::for_locale(locale); auto locale_data = LocaleData::for_locale(locale);
if (!locale_data.has_value()) if (!locale_data.has_value())
@ -183,7 +183,7 @@ Optional<String> time_zone_display_name(StringView locale, StringView time_zone_
if (static_cast<bool>(time_zone_name.isBogus())) if (static_cast<bool>(time_zone_name.isBogus()))
return {}; return {};
return icu_string_to_string(time_zone_name); return icu_string_to_utf16_string(time_zone_name);
} }
static constexpr Array<UChar, 4> icu_currency_code(StringView currency) static constexpr Array<UChar, 4> icu_currency_code(StringView currency)
@ -212,7 +212,7 @@ static constexpr UCurrNameStyle icu_currency_style(Style style)
VERIFY_NOT_REACHED(); VERIFY_NOT_REACHED();
} }
Optional<String> currency_display_name(StringView locale, StringView currency, Style style) Optional<Utf16String> currency_display_name(StringView locale, StringView currency, Style style)
{ {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
@ -230,10 +230,10 @@ Optional<String> currency_display_name(StringView locale, StringView currency, S
if ((status == U_USING_DEFAULT_WARNING) && (result == icu_currency.data())) if ((status == U_USING_DEFAULT_WARNING) && (result == icu_currency.data()))
return {}; return {};
return icu_string_to_string(result, length); return icu_string_to_utf16_string(result, length);
} }
Optional<String> currency_numeric_display_name(StringView locale, StringView currency) Optional<Utf16String> currency_numeric_display_name(StringView locale, StringView currency)
{ {
UErrorCode status = U_ZERO_ERROR; UErrorCode status = U_ZERO_ERROR;
@ -251,7 +251,7 @@ Optional<String> currency_numeric_display_name(StringView locale, StringView cur
if ((status == U_USING_DEFAULT_WARNING) && (result == icu_currency.data())) if ((status == U_USING_DEFAULT_WARNING) && (result == icu_currency.data()))
return {}; return {};
return icu_string_to_string(result, length); return icu_string_to_utf16_string(result, length);
} }
} }

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2024, Tim Flynn <trflynn89@serenityos.org> * Copyright (c) 2024-2025, Tim Flynn <trflynn89@ladybird.org>
* *
* SPDX-License-Identifier: BSD-2-Clause * SPDX-License-Identifier: BSD-2-Clause
*/ */
@ -7,8 +7,8 @@
#pragma once #pragma once
#include <AK/Optional.h> #include <AK/Optional.h>
#include <AK/String.h>
#include <AK/StringView.h> #include <AK/StringView.h>
#include <AK/Utf16String.h>
#include <LibUnicode/Locale.h> #include <LibUnicode/Locale.h>
#include <LibUnicode/TimeZone.h> #include <LibUnicode/TimeZone.h>
@ -22,13 +22,13 @@ enum class LanguageDisplay {
LanguageDisplay language_display_from_string(StringView language_display); LanguageDisplay language_display_from_string(StringView language_display);
StringView language_display_to_string(LanguageDisplay language_display); StringView language_display_to_string(LanguageDisplay language_display);
Optional<String> language_display_name(StringView locale, StringView language, LanguageDisplay); Optional<Utf16String> language_display_name(StringView locale, StringView language, LanguageDisplay);
Optional<String> region_display_name(StringView locale, StringView region); Optional<Utf16String> region_display_name(StringView locale, StringView region);
Optional<String> script_display_name(StringView locale, StringView script); Optional<Utf16String> script_display_name(StringView locale, StringView script);
Optional<String> calendar_display_name(StringView locale, StringView calendar); Optional<Utf16String> calendar_display_name(StringView locale, StringView calendar);
Optional<String> date_time_field_display_name(StringView locale, StringView field, Style); Optional<Utf16String> date_time_field_display_name(StringView locale, StringView field, Style);
Optional<String> time_zone_display_name(StringView locale, StringView time_zone_identifier, TimeZoneOffset::InDST, double time); Optional<Utf16String> time_zone_display_name(StringView locale, StringView time_zone_identifier, TimeZoneOffset::InDST, double time);
Optional<String> currency_display_name(StringView locale, StringView currency, Style); Optional<Utf16String> currency_display_name(StringView locale, StringView currency, Style);
Optional<String> currency_numeric_display_name(StringView locale, StringView currency); Optional<Utf16String> currency_numeric_display_name(StringView locale, StringView currency);
} }

View file

@ -164,7 +164,12 @@ String icu_string_to_string(UChar const* string, i32 length)
Utf16String icu_string_to_utf16_string(icu::UnicodeString const& string) Utf16String icu_string_to_utf16_string(icu::UnicodeString const& string)
{ {
return Utf16String::from_utf16_without_validation({ string.getBuffer(), static_cast<size_t>(string.length()) }); return icu_string_to_utf16_string(string.getBuffer(), string.length());
}
Utf16String icu_string_to_utf16_string(UChar const* string, i32 length)
{
return Utf16String::from_utf16_without_validation({ string, static_cast<size_t>(length) });
} }
UCharIterator icu_string_iterator(Utf16View const& string) UCharIterator icu_string_iterator(Utf16View const& string)

View file

@ -105,6 +105,7 @@ String icu_string_to_string(icu::UnicodeString const& string);
String icu_string_to_string(UChar const*, i32 length); String icu_string_to_string(UChar const*, i32 length);
Utf16String icu_string_to_utf16_string(icu::UnicodeString const& string); Utf16String icu_string_to_utf16_string(icu::UnicodeString const& string);
Utf16String icu_string_to_utf16_string(UChar const*, i32 length);
UCharIterator icu_string_iterator(Utf16View const&); UCharIterator icu_string_iterator(Utf16View const&);

View file

@ -13,21 +13,21 @@ TEST_CASE(locale_mappings_en)
{ {
auto language = Unicode::language_display_name("en"sv, "en"sv, Unicode::LanguageDisplay::Standard); auto language = Unicode::language_display_name("en"sv, "en"sv, Unicode::LanguageDisplay::Standard);
EXPECT(language.has_value()); EXPECT(language.has_value());
EXPECT_EQ(*language, "English"sv); EXPECT_EQ(*language, u"English"sv);
language = Unicode::language_display_name("en"sv, "i-definitely-don't-exist"sv, Unicode::LanguageDisplay::Standard); language = Unicode::language_display_name("en"sv, "i-definitely-don't-exist"sv, Unicode::LanguageDisplay::Standard);
EXPECT(!language.has_value()); EXPECT(!language.has_value());
auto territory = Unicode::region_display_name("en"sv, "US"sv); auto territory = Unicode::region_display_name("en"sv, "US"sv);
EXPECT(territory.has_value()); EXPECT(territory.has_value());
EXPECT_EQ(*territory, "United States"sv); EXPECT_EQ(*territory, u"United States"sv);
territory = Unicode::region_display_name("en"sv, "i-definitely-don't-exist"sv); territory = Unicode::region_display_name("en"sv, "i-definitely-don't-exist"sv);
EXPECT(!territory.has_value()); EXPECT(!territory.has_value());
auto script = Unicode::script_display_name("en"sv, "Latn"sv); auto script = Unicode::script_display_name("en"sv, "Latn"sv);
EXPECT(script.has_value()); EXPECT(script.has_value());
EXPECT_EQ(*script, "Latin"sv); EXPECT_EQ(*script, u"Latin"sv);
script = Unicode::script_display_name("en"sv, "i-definitely-don't-exist"sv); script = Unicode::script_display_name("en"sv, "i-definitely-don't-exist"sv);
EXPECT(!script.has_value()); EXPECT(!script.has_value());
@ -37,21 +37,21 @@ TEST_CASE(locale_mappings_fr)
{ {
auto language = Unicode::language_display_name("fr"sv, "en"sv, Unicode::LanguageDisplay::Standard); auto language = Unicode::language_display_name("fr"sv, "en"sv, Unicode::LanguageDisplay::Standard);
EXPECT(language.has_value()); EXPECT(language.has_value());
EXPECT_EQ(*language, "anglais"sv); EXPECT_EQ(*language, u"anglais"sv);
language = Unicode::language_display_name("fr"sv, "i-definitely-don't-exist"sv, Unicode::LanguageDisplay::Standard); language = Unicode::language_display_name("fr"sv, "i-definitely-don't-exist"sv, Unicode::LanguageDisplay::Standard);
EXPECT(!language.has_value()); EXPECT(!language.has_value());
auto territory = Unicode::region_display_name("fr"sv, "US"sv); auto territory = Unicode::region_display_name("fr"sv, "US"sv);
EXPECT(territory.has_value()); EXPECT(territory.has_value());
EXPECT_EQ(*territory, "États-Unis"sv); EXPECT_EQ(*territory, u"États-Unis"sv);
territory = Unicode::region_display_name("fr"sv, "i-definitely-don't-exist"sv); territory = Unicode::region_display_name("fr"sv, "i-definitely-don't-exist"sv);
EXPECT(!territory.has_value()); EXPECT(!territory.has_value());
auto script = Unicode::script_display_name("fr"sv, "Latn"sv); auto script = Unicode::script_display_name("fr"sv, "Latn"sv);
EXPECT(script.has_value()); EXPECT(script.has_value());
EXPECT_EQ(*script, "latin"sv); EXPECT_EQ(*script, u"latin"sv);
script = Unicode::script_display_name("fr"sv, "i-definitely-don't-exist"sv); script = Unicode::script_display_name("fr"sv, "i-definitely-don't-exist"sv);
EXPECT(!script.has_value()); EXPECT(!script.has_value());
@ -61,21 +61,21 @@ TEST_CASE(locale_mappings_root)
{ {
auto language = Unicode::language_display_name("und"sv, "en"sv, Unicode::LanguageDisplay::Standard); auto language = Unicode::language_display_name("und"sv, "en"sv, Unicode::LanguageDisplay::Standard);
EXPECT(language.has_value()); EXPECT(language.has_value());
EXPECT_EQ(*language, "en"sv); EXPECT_EQ(*language, u"en"sv);
language = Unicode::language_display_name("und"sv, "i-definitely-don't-exist"sv, Unicode::LanguageDisplay::Standard); language = Unicode::language_display_name("und"sv, "i-definitely-don't-exist"sv, Unicode::LanguageDisplay::Standard);
EXPECT(!language.has_value()); EXPECT(!language.has_value());
auto territory = Unicode::region_display_name("und"sv, "US"sv); auto territory = Unicode::region_display_name("und"sv, "US"sv);
EXPECT(territory.has_value()); EXPECT(territory.has_value());
EXPECT_EQ(*territory, "US"sv); EXPECT_EQ(*territory, u"US"sv);
territory = Unicode::region_display_name("und"sv, "i-definitely-don't-exist"sv); territory = Unicode::region_display_name("und"sv, "i-definitely-don't-exist"sv);
EXPECT(!territory.has_value()); EXPECT(!territory.has_value());
auto script = Unicode::script_display_name("und"sv, "Latn"sv); auto script = Unicode::script_display_name("und"sv, "Latn"sv);
EXPECT(script.has_value()); EXPECT(script.has_value());
EXPECT_EQ(*script, "Latn"sv); EXPECT_EQ(*script, u"Latn"sv);
script = Unicode::script_display_name("und"sv, "i-definitely-don't-exist"sv); script = Unicode::script_display_name("und"sv, "i-definitely-don't-exist"sv);
EXPECT(!script.has_value()); EXPECT(!script.has_value());