mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-19 15:32:31 +00:00
This is a strictly UTF-16 string with some optimizations for ASCII.
* If created from a short UTF-8 or UTF-16 string that is also ASCII,
then the string is stored in an inlined byte buffer.
* If created with a long UTF-8 or UTF-16 string that is also ASCII,
then the string is stored in an outlined char buffer.
* If created with a short or long UTF-8 or UTF-16 string that is not
ASCII, then the string is stored in an outlined char16 buffer.
We do not store short non-ASCII text in the inlined buffer to avoid
confusion with operations such as `length_in_code_units` and
`code_unit_at`. For example, "😀" would be stored as 4 UTF-8 bytes
in short string form. But we still want `length_in_code_units` to
be 2, and `code_unit_at(0)` to be 0xD83D.
157 lines
3.5 KiB
C++
157 lines
3.5 KiB
C++
/*
|
|
* Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#include <AK/StringView.h>
|
|
#include <LibJS/Runtime/Utf16String.h>
|
|
#include <LibJS/Runtime/VM.h>
|
|
|
|
namespace JS {
|
|
namespace Detail {
|
|
|
|
static NonnullRefPtr<Utf16StringImpl> the_empty_utf16_string()
|
|
{
|
|
static NonnullRefPtr<Utf16StringImpl> empty_string = Utf16StringImpl::create();
|
|
return empty_string;
|
|
}
|
|
|
|
Utf16StringImpl::Utf16StringImpl(Utf16Data string)
|
|
: m_string(move(string))
|
|
{
|
|
}
|
|
|
|
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create()
|
|
{
|
|
return adopt_ref(*new Utf16StringImpl);
|
|
}
|
|
|
|
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16Data string)
|
|
{
|
|
return adopt_ref(*new Utf16StringImpl(move(string)));
|
|
}
|
|
|
|
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
|
|
{
|
|
auto result = MUST(utf8_to_utf16(string));
|
|
auto impl = create(move(result.data));
|
|
impl->m_cached_view.unsafe_set_code_point_length(result.code_point_count);
|
|
return impl;
|
|
}
|
|
|
|
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
|
|
{
|
|
Utf16Data string;
|
|
string.ensure_capacity(view.length_in_code_units());
|
|
|
|
if (view.has_ascii_storage()) {
|
|
for (size_t i = 0; i < view.length_in_code_units(); ++i)
|
|
string.unchecked_append(static_cast<char16_t>(view.code_unit_at(i)));
|
|
} else {
|
|
string.unchecked_append(view.utf16_span().data(), view.length_in_code_units());
|
|
}
|
|
|
|
auto impl = create(move(string));
|
|
if (auto length_in_code_points = view.length_in_code_points_if_known(); length_in_code_points.has_value())
|
|
impl->m_cached_view.unsafe_set_code_point_length(*length_in_code_points);
|
|
|
|
return impl;
|
|
}
|
|
|
|
Utf16Data const& Utf16StringImpl::string() const
|
|
{
|
|
return m_string;
|
|
}
|
|
|
|
Utf16View Utf16StringImpl::view() const
|
|
{
|
|
return m_cached_view;
|
|
}
|
|
|
|
u32 Utf16StringImpl::compute_hash() const
|
|
{
|
|
if (m_string.is_empty())
|
|
return 0;
|
|
return string_hash((char const*)m_string.data(), m_string.size() * sizeof(u16));
|
|
}
|
|
|
|
}
|
|
|
|
Utf16String Utf16String::create()
|
|
{
|
|
return Utf16String { Detail::the_empty_utf16_string() };
|
|
}
|
|
|
|
Utf16String Utf16String::create(Utf16Data string)
|
|
{
|
|
return Utf16String { Detail::Utf16StringImpl::create(move(string)) };
|
|
}
|
|
|
|
Utf16String Utf16String::create(StringView string)
|
|
{
|
|
return Utf16String { Detail::Utf16StringImpl::create(string) };
|
|
}
|
|
|
|
Utf16String Utf16String::create(Utf16View const& string)
|
|
{
|
|
return Utf16String { Detail::Utf16StringImpl::create(string) };
|
|
}
|
|
|
|
Utf16String Utf16String::invalid()
|
|
{
|
|
static auto invalid = Utf16String {};
|
|
return invalid;
|
|
}
|
|
|
|
Utf16String::Utf16String(NonnullRefPtr<Detail::Utf16StringImpl> string)
|
|
: m_string(move(string))
|
|
{
|
|
}
|
|
|
|
Utf16Data const& Utf16String::string() const
|
|
{
|
|
return m_string->string();
|
|
}
|
|
|
|
Utf16View Utf16String::view() const
|
|
{
|
|
return m_string->view();
|
|
}
|
|
|
|
Utf16View Utf16String::substring_view(size_t code_unit_offset, size_t code_unit_length) const
|
|
{
|
|
return view().substring_view(code_unit_offset, code_unit_length);
|
|
}
|
|
|
|
Utf16View Utf16String::substring_view(size_t code_unit_offset) const
|
|
{
|
|
return view().substring_view(code_unit_offset);
|
|
}
|
|
|
|
String Utf16String::to_utf8() const
|
|
{
|
|
return MUST(view().to_utf8());
|
|
}
|
|
|
|
ByteString Utf16String::to_byte_string() const
|
|
{
|
|
return MUST(view().to_byte_string());
|
|
}
|
|
|
|
u16 Utf16String::code_unit_at(size_t index) const
|
|
{
|
|
return view().code_unit_at(index);
|
|
}
|
|
|
|
size_t Utf16String::length_in_code_units() const
|
|
{
|
|
return view().length_in_code_units();
|
|
}
|
|
|
|
bool Utf16String::is_empty() const
|
|
{
|
|
return view().is_empty();
|
|
}
|
|
|
|
}
|