/* * Copyright (c) 2025, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once #include #include #include #include #include #include #include #include #include #include #include #include namespace AK { // Utf16String is a strongly owned sequence of Unicode code points encoded as UTF-16. // // The data may or may not be heap-allocated, and may or may not be reference counted. As a memory optimization, if the // UTF-16 string is entirely ASCII, the string is stored as 8-bit bytes. class [[nodiscard]] Utf16String : public Detail::Utf16StringBase { public: using Utf16StringBase::Utf16StringBase; explicit constexpr Utf16String(Utf16StringBase&& base) : Utf16StringBase(move(base)) { } ALWAYS_INLINE static Utf16String from_utf8(StringView utf8_string) { VERIFY(Utf8View { utf8_string }.validate()); return from_utf8_without_validation(utf8_string); } ALWAYS_INLINE static Utf16String from_utf8(String const& utf8_string) { return from_utf8_without_validation(utf8_string); } ALWAYS_INLINE static Utf16String from_utf8(FlyString const& utf8_string) { return from_utf8_without_validation(utf8_string); } enum class WithBOMHandling { No, Yes, }; static Utf16String from_utf8_with_replacement_character(StringView, WithBOMHandling = WithBOMHandling::Yes); ALWAYS_INLINE static ErrorOr try_from_utf8(StringView utf8_string) { if (!Utf8View { utf8_string }.validate()) return Error::from_string_literal("Input was not valid UTF-8"); return from_utf8_without_validation(utf8_string); } static Utf16String from_utf8_without_validation(StringView); static Utf16String from_utf16(Utf16View const& utf16_string); template requires(IsOneOf, Utf16String, Utf16FlyString>) static Utf16String from_utf16(T&&) = delete; static Utf16String from_utf32(Utf32View const&); ALWAYS_INLINE static Utf16String from_code_point(u32 code_point) { Array code_units; size_t length_in_code_units = 0; (void)UnicodeUtils::code_point_to_utf16(code_point, [&](auto code_unit) { code_units[length_in_code_units++] = code_unit; }); return from_utf16({ code_units.data(), length_in_code_units }); } template ALWAYS_INLINE static Utf16String formatted(CheckedFormatString&& format, Parameters const&... parameters) { StringBuilder builder(StringBuilder::Mode::UTF16); VariadicFormatParams variadic_format_parameters { parameters... }; MUST(vformat(builder, format.view(), variadic_format_parameters)); return builder.to_utf16_string(); } template ALWAYS_INLINE static Utf16String number(T value) { return formatted("{}", value); } template ALWAYS_INLINE static Utf16String join(SeparatorType const& separator, CollectionType const& collection, StringView format = "{}"sv) { StringBuilder builder(StringBuilder::Mode::UTF16); builder.join(separator, collection, format); return builder.to_utf16_string(); } static Utf16String repeated(u32 code_point, size_t count); Utf16String to_well_formed() const; String to_well_formed_utf8() const; // These methods require linking LibUnicode. Utf16String to_lowercase(Optional const& locale = {}) const; Utf16String to_uppercase(Optional const& locale = {}) const; Utf16String to_titlecase(Optional const& locale = {}, TrailingCodePointTransformation trailing_code_point_transformation = TrailingCodePointTransformation::Lowercase) const; Utf16String to_casefold() const; Utf16String to_fullwidth() const; ALWAYS_INLINE Utf16String to_ascii_lowercase() const { auto view = utf16_view(); if (view.has_ascii_storage()) { if (!any_of(view.ascii_span(), is_ascii_upper_alpha)) return *this; } else { if (!any_of(view.utf16_span(), is_ascii_upper_alpha)) return *this; } return view.to_ascii_lowercase(); } ALWAYS_INLINE Utf16String to_ascii_uppercase() const { auto view = utf16_view(); if (view.has_ascii_storage()) { if (!any_of(view.ascii_span(), is_ascii_lower_alpha)) return *this; } else { if (!any_of(view.utf16_span(), is_ascii_lower_alpha)) return *this; } return view.to_ascii_uppercase(); } ALWAYS_INLINE Utf16String to_ascii_titlecase() const { return utf16_view().to_ascii_titlecase(); } ALWAYS_INLINE Utf16String replace(char16_t needle, Utf16View const& replacement, ReplaceMode replace_mode) const { auto view = utf16_view(); if (view.is_empty() || !view.contains(needle)) return *this; return view.replace(needle, replacement, replace_mode); } ALWAYS_INLINE Utf16String replace(Utf16View const& needle, Utf16View const& replacement, ReplaceMode replace_mode) const { auto view = utf16_view(); if (view.is_empty() || !view.contains(needle)) return *this; return view.replace(needle, replacement, replace_mode); } ALWAYS_INLINE Utf16String trim(Utf16View const& code_units, TrimMode mode = TrimMode::Both) const { if (is_empty()) return {}; bool needs_trimming = false; if (mode == TrimMode::Left || mode == TrimMode::Both) needs_trimming |= code_units.contains(code_unit_at(0)); if (mode == TrimMode::Right || mode == TrimMode::Both) needs_trimming |= code_units.contains(code_unit_at(length_in_code_units() - 1)); if (!needs_trimming) return *this; return Utf16String::from_utf16(utf16_view().trim(code_units, mode)); } ALWAYS_INLINE Utf16String trim_ascii_whitespace(TrimMode mode = TrimMode::Both) const { return trim(" \n\t\v\f\r"sv, mode); } ALWAYS_INLINE Utf16String escape_html_entities() const { return utf16_view().escape_html_entities(); } static Utf16String from_string_builder(Badge, StringBuilder& builder); static ErrorOr from_ipc_stream(Stream&, size_t length_in_code_units, bool is_ascii); private: ALWAYS_INLINE explicit Utf16String(NonnullRefPtr value) : Utf16StringBase(move(value)) { } }; template<> struct Formatter : Formatter { ErrorOr format(FormatBuilder&, Utf16String const&); }; template<> struct Traits : public DefaultTraits { static unsigned hash(Utf16String const& s) { return s.hash(); } }; } [[nodiscard]] ALWAYS_INLINE AK::Utf16String operator""_utf16(char const* string, size_t length) { AK::StringView view { string, length }; ASSERT(AK::Utf8View { view }.validate()); return AK::Utf16String::from_utf8_without_validation(view); } [[nodiscard]] ALWAYS_INLINE AK::Utf16String operator""_utf16(char16_t const* string, size_t length) { return AK::Utf16String::from_utf16({ string, length }); }