ladybird/Libraries/LibJS/Runtime/Utf16String.cpp
Timothy Flynn 9fc3e72db2 AK+Everywhere: Allow lonely UTF-16 surrogates by default
By definition, the web allows lonely surrogates by default. Let's have
our string APIs reflect this, so we don't have to pass an allow option
all over the place.
2025-07-03 09:51:56 -04:00

151 lines
3.3 KiB
C++

/*
* Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/StringView.h>
#include <LibJS/Runtime/Utf16String.h>
#include <LibJS/Runtime/VM.h>
namespace JS {
namespace Detail {
static NonnullRefPtr<Utf16StringImpl> the_empty_utf16_string()
{
static NonnullRefPtr<Utf16StringImpl> empty_string = Utf16StringImpl::create();
return empty_string;
}
Utf16StringImpl::Utf16StringImpl(Utf16Data string)
: m_string(move(string))
{
}
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create()
{
return adopt_ref(*new Utf16StringImpl);
}
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16Data string)
{
return adopt_ref(*new Utf16StringImpl(move(string)));
}
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
{
auto result = MUST(utf8_to_utf16(string));
auto impl = create(move(result.data));
impl->m_cached_view.unsafe_set_code_point_length(result.code_point_count);
return impl;
}
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
{
Utf16Data string;
string.ensure_capacity(view.length_in_code_units());
string.unchecked_append(view.span().data(), view.length_in_code_units());
auto impl = create(move(string));
if (auto length_in_code_points = view.length_in_code_points_if_known(); length_in_code_points.has_value())
impl->m_cached_view.unsafe_set_code_point_length(*length_in_code_points);
return impl;
}
Utf16Data const& Utf16StringImpl::string() const
{
return m_string;
}
Utf16View Utf16StringImpl::view() const
{
return m_cached_view;
}
u32 Utf16StringImpl::compute_hash() const
{
if (m_string.is_empty())
return 0;
return string_hash((char const*)m_string.data(), m_string.size() * sizeof(u16));
}
}
Utf16String Utf16String::create()
{
return Utf16String { Detail::the_empty_utf16_string() };
}
Utf16String Utf16String::create(Utf16Data string)
{
return Utf16String { Detail::Utf16StringImpl::create(move(string)) };
}
Utf16String Utf16String::create(StringView string)
{
return Utf16String { Detail::Utf16StringImpl::create(string) };
}
Utf16String Utf16String::create(Utf16View const& string)
{
return Utf16String { Detail::Utf16StringImpl::create(string) };
}
Utf16String Utf16String::invalid()
{
static auto invalid = Utf16String {};
return invalid;
}
Utf16String::Utf16String(NonnullRefPtr<Detail::Utf16StringImpl> string)
: m_string(move(string))
{
}
Utf16Data const& Utf16String::string() const
{
return m_string->string();
}
Utf16View Utf16String::view() const
{
return m_string->view();
}
Utf16View Utf16String::substring_view(size_t code_unit_offset, size_t code_unit_length) const
{
return view().substring_view(code_unit_offset, code_unit_length);
}
Utf16View Utf16String::substring_view(size_t code_unit_offset) const
{
return view().substring_view(code_unit_offset);
}
String Utf16String::to_utf8() const
{
return MUST(view().to_utf8());
}
ByteString Utf16String::to_byte_string() const
{
return MUST(view().to_byte_string());
}
u16 Utf16String::code_unit_at(size_t index) const
{
return view().code_unit_at(index);
}
size_t Utf16String::length_in_code_units() const
{
return view().length_in_code_units();
}
bool Utf16String::is_empty() const
{
return view().is_empty();
}
}