/* * Copyright (c) 2025, Tim Flynn * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once #include #include #include #include #include #include #include namespace AK::Detail { void did_destroy_utf16_fly_string_data(Badge, Detail::Utf16StringData const&); class Utf16StringData final : public RefCounted { public: enum class StorageType : u8 { ASCII, UTF16, }; enum class AllowASCIIStorage : u8 { No, Yes, }; static NonnullRefPtr from_utf8(StringView, AllowASCIIStorage); static NonnullRefPtr from_utf16(Utf16View const&); static NonnullRefPtr from_utf32(Utf32View const&); static NonnullRefPtr from_string_builder(StringBuilder&); ~Utf16StringData() { if (is_fly_string()) did_destroy_utf16_fly_string_data({}, *this); } [[nodiscard]] static constexpr size_t offset_of_string_storage() { return offsetof(Utf16StringData, m_ascii_data); } void operator delete(void* ptr) { free(ptr); } [[nodiscard]] ALWAYS_INLINE bool operator==(Utf16StringData const& other) const { if (is_fly_string() && other.is_fly_string()) return this == &other; return utf16_view() == other.utf16_view(); } [[nodiscard]] ALWAYS_INLINE bool operator==(Utf16View const& other) const { return utf16_view() == other; } [[nodiscard]] ALWAYS_INLINE bool operator==(StringView const& other) const { if (has_ascii_storage()) return ascii_view() == other; return utf16_view() == Utf16View { other.characters_without_null_termination(), other.length() }; } [[nodiscard]] ALWAYS_INLINE bool has_ascii_storage() const { return m_length_in_code_units >> Detail::UTF16_FLAG == 0; } [[nodiscard]] ALWAYS_INLINE bool has_utf16_storage() const { return m_length_in_code_units >> Detail::UTF16_FLAG != 0; } ALWAYS_INLINE u32 hash() const { if (!m_has_hash) m_hash = calculate_hash(); return m_hash; } [[nodiscard]] ALWAYS_INLINE size_t length_in_code_units() const { return m_length_in_code_units & ~(1uz << Detail::UTF16_FLAG); } [[nodiscard]] ALWAYS_INLINE size_t length_in_code_points() const { if (has_ascii_storage()) return length_in_code_units(); if (m_length_in_code_points == NumericLimits::max()) m_length_in_code_points = calculate_code_point_length(); return m_length_in_code_points; } [[nodiscard]] ALWAYS_INLINE StringView ascii_view() const { ASSERT(has_ascii_storage()); return { m_ascii_data, length_in_code_units() }; } [[nodiscard]] ALWAYS_INLINE Utf16View utf16_view() const { if (has_ascii_storage()) return { m_ascii_data, length_in_code_units() }; Utf16View view { m_utf16_data, length_in_code_units() }; view.m_length_in_code_points = m_length_in_code_points; return view; } ALWAYS_INLINE void mark_as_fly_string(Badge) const { m_is_fly_string = true; } [[nodiscard]] ALWAYS_INLINE bool is_fly_string() const { return m_is_fly_string; } private: ALWAYS_INLINE Utf16StringData(StorageType storage_type, size_t code_unit_length) : m_length_in_code_units(code_unit_length) { if (storage_type == StorageType::UTF16) m_length_in_code_units |= 1uz << Detail::UTF16_FLAG; } static NonnullRefPtr create_uninitialized(StorageType storage_type, size_t code_unit_length); template static NonnullRefPtr create_from_code_point_iterable(ViewType const&); [[nodiscard]] size_t calculate_code_point_length() const; [[nodiscard]] ALWAYS_INLINE u32 calculate_hash() const { if (has_ascii_storage()) return ascii_view().hash(); return utf16_view().hash(); } // We store whether this string has ASCII or UTF-16 storage by setting the most significant bit of m_length_in_code_units // to 1 for UTF-16 storage. This shrinks the size of most UTF-16 string related classes, at the cost of not being // allowed to create a string larger than 2**63 - 1. size_t m_length_in_code_units { 0 }; mutable size_t m_length_in_code_points { NumericLimits::max() }; mutable u32 m_hash { 0 }; mutable bool m_has_hash { false }; mutable bool m_is_fly_string { false }; union { char m_ascii_data[0]; char16_t m_utf16_data[0]; }; }; }