mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-08 09:09:43 +00:00
AK+LibJS+LibWeb+LibRegex: Replace AK::Utf16Data with AK::Utf16String
This commit is contained in:
parent
a43cb15e81
commit
9582895759
Notes:
github-actions[bot]
2025-07-18 16:46:53 +00:00
Author: https://github.com/trflynn89
Commit: 9582895759
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5388
Reviewed-by: https://github.com/shannonbooth ✅
22 changed files with 101 additions and 222 deletions
|
@ -67,11 +67,6 @@ ErrorOr<String> String::from_utf8(StringView view)
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
ErrorOr<String> String::from_utf16(Utf16View const& utf16)
|
|
||||||
{
|
|
||||||
return utf16.to_utf8();
|
|
||||||
}
|
|
||||||
|
|
||||||
ErrorOr<String> String::from_utf16_le_with_replacement_character(ReadonlyBytes bytes)
|
ErrorOr<String> String::from_utf16_le_with_replacement_character(ReadonlyBytes bytes)
|
||||||
{
|
{
|
||||||
if (bytes.is_empty())
|
if (bytes.is_empty())
|
||||||
|
@ -80,7 +75,7 @@ ErrorOr<String> String::from_utf16_le_with_replacement_character(ReadonlyBytes b
|
||||||
auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
|
auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
|
||||||
auto utf16_length = bytes.size() / 2;
|
auto utf16_length = bytes.size() / 2;
|
||||||
|
|
||||||
Utf16Data well_formed_utf16;
|
Vector<char16_t> well_formed_utf16;
|
||||||
|
|
||||||
if (!validate_utf16_le(bytes)) {
|
if (!validate_utf16_le(bytes)) {
|
||||||
well_formed_utf16.resize(bytes.size());
|
well_formed_utf16.resize(bytes.size());
|
||||||
|
@ -109,7 +104,7 @@ ErrorOr<String> String::from_utf16_be_with_replacement_character(ReadonlyBytes b
|
||||||
auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
|
auto const* utf16_data = reinterpret_cast<char16_t const*>(bytes.data());
|
||||||
auto utf16_length = bytes.size() / 2;
|
auto utf16_length = bytes.size() / 2;
|
||||||
|
|
||||||
Utf16Data well_formed_utf16;
|
Vector<char16_t> well_formed_utf16;
|
||||||
|
|
||||||
if (!validate_utf16_le(bytes)) {
|
if (!validate_utf16_le(bytes)) {
|
||||||
well_formed_utf16.resize(bytes.size());
|
well_formed_utf16.resize(bytes.size());
|
||||||
|
|
|
@ -69,7 +69,6 @@ public:
|
||||||
[[nodiscard]] static String from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder&);
|
[[nodiscard]] static String from_string_builder_without_validation(Badge<StringBuilder>, StringBuilder&);
|
||||||
|
|
||||||
// Creates a new String from a sequence of UTF-16 encoded code points.
|
// Creates a new String from a sequence of UTF-16 encoded code points.
|
||||||
static ErrorOr<String> from_utf16(Utf16View const&);
|
|
||||||
static ErrorOr<String> from_utf16_le_with_replacement_character(ReadonlyBytes);
|
static ErrorOr<String> from_utf16_le_with_replacement_character(ReadonlyBytes);
|
||||||
static ErrorOr<String> from_utf16_be_with_replacement_character(ReadonlyBytes);
|
static ErrorOr<String> from_utf16_be_with_replacement_character(ReadonlyBytes);
|
||||||
|
|
||||||
|
|
|
@ -10,77 +10,12 @@
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
#include <AK/Utf16String.h>
|
#include <AK/Utf16String.h>
|
||||||
#include <AK/Utf16View.h>
|
#include <AK/Utf16View.h>
|
||||||
#include <AK/Utf32View.h>
|
|
||||||
#include <AK/Utf8View.h>
|
#include <AK/Utf8View.h>
|
||||||
|
|
||||||
#include <simdutf.h>
|
#include <simdutf.h>
|
||||||
|
|
||||||
namespace AK {
|
namespace AK {
|
||||||
|
|
||||||
template<OneOf<Utf8View, Utf32View> UtfViewType>
|
|
||||||
static ErrorOr<Utf16ConversionResult> to_utf16_slow(UtfViewType const& view)
|
|
||||||
{
|
|
||||||
Utf16Data utf16_data;
|
|
||||||
TRY(utf16_data.try_ensure_capacity(view.length()));
|
|
||||||
|
|
||||||
size_t code_point_count = 0;
|
|
||||||
for (auto code_point : view) {
|
|
||||||
TRY(UnicodeUtils::try_code_point_to_utf16(code_point, [&](auto code_unit) -> ErrorOr<void> {
|
|
||||||
TRY(utf16_data.try_append(code_unit));
|
|
||||||
return {};
|
|
||||||
}));
|
|
||||||
|
|
||||||
code_point_count++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Utf16ConversionResult { move(utf16_data), code_point_count };
|
|
||||||
}
|
|
||||||
|
|
||||||
ErrorOr<Utf16ConversionResult> utf8_to_utf16(StringView utf8_view)
|
|
||||||
{
|
|
||||||
return utf8_to_utf16(Utf8View { utf8_view });
|
|
||||||
}
|
|
||||||
|
|
||||||
ErrorOr<Utf16ConversionResult> utf8_to_utf16(Utf8View const& utf8_view)
|
|
||||||
{
|
|
||||||
if (utf8_view.is_empty())
|
|
||||||
return Utf16ConversionResult { Utf16Data {}, 0 };
|
|
||||||
|
|
||||||
// All callers want to allow lonely surrogates, which simdutf does not permit.
|
|
||||||
if (!utf8_view.validate(AllowLonelySurrogates::No)) [[unlikely]]
|
|
||||||
return to_utf16_slow(utf8_view);
|
|
||||||
|
|
||||||
auto const* data = reinterpret_cast<char const*>(utf8_view.bytes());
|
|
||||||
auto length = utf8_view.byte_length();
|
|
||||||
|
|
||||||
Utf16Data utf16_data;
|
|
||||||
TRY(utf16_data.try_resize(simdutf::utf16_length_from_utf8(data, length)));
|
|
||||||
// FIXME: simdutf _could_ be telling us about this, but it doesn't -- so we have to compute it again.
|
|
||||||
auto code_point_length = simdutf::count_utf8(data, length);
|
|
||||||
|
|
||||||
[[maybe_unused]] auto result = simdutf::convert_utf8_to_utf16(data, length, reinterpret_cast<char16_t*>(utf16_data.data()));
|
|
||||||
ASSERT(result == utf16_data.size());
|
|
||||||
|
|
||||||
return Utf16ConversionResult { utf16_data, code_point_length };
|
|
||||||
}
|
|
||||||
|
|
||||||
ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const& utf32_view)
|
|
||||||
{
|
|
||||||
if (utf32_view.is_empty())
|
|
||||||
return Utf16ConversionResult { Utf16Data {}, 0 };
|
|
||||||
|
|
||||||
auto const* data = reinterpret_cast<char32_t const*>(utf32_view.code_points());
|
|
||||||
auto length = utf32_view.length();
|
|
||||||
|
|
||||||
Utf16Data utf16_data;
|
|
||||||
TRY(utf16_data.try_resize(simdutf::utf16_length_from_utf32(data, length)));
|
|
||||||
|
|
||||||
[[maybe_unused]] auto result = simdutf::convert_utf32_to_utf16(data, length, reinterpret_cast<char16_t*>(utf16_data.data()));
|
|
||||||
ASSERT(result == utf16_data.size());
|
|
||||||
|
|
||||||
return Utf16ConversionResult { utf16_data, length };
|
|
||||||
}
|
|
||||||
|
|
||||||
bool validate_utf16_le(ReadonlyBytes bytes)
|
bool validate_utf16_le(ReadonlyBytes bytes)
|
||||||
{
|
{
|
||||||
return simdutf::validate_utf16le(reinterpret_cast<char16_t const*>(bytes.data()), bytes.size() / 2);
|
return simdutf::validate_utf16le(reinterpret_cast<char16_t const*>(bytes.data()), bytes.size() / 2);
|
||||||
|
|
|
@ -23,16 +23,6 @@
|
||||||
|
|
||||||
namespace AK {
|
namespace AK {
|
||||||
|
|
||||||
using Utf16Data = Vector<char16_t, 1>;
|
|
||||||
|
|
||||||
struct Utf16ConversionResult {
|
|
||||||
Utf16Data data;
|
|
||||||
size_t code_point_count;
|
|
||||||
};
|
|
||||||
ErrorOr<Utf16ConversionResult> utf8_to_utf16(StringView);
|
|
||||||
ErrorOr<Utf16ConversionResult> utf8_to_utf16(Utf8View const&);
|
|
||||||
ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const&);
|
|
||||||
|
|
||||||
[[nodiscard]] bool validate_utf16_le(ReadonlyBytes);
|
[[nodiscard]] bool validate_utf16_le(ReadonlyBytes);
|
||||||
[[nodiscard]] bool validate_utf16_be(ReadonlyBytes);
|
[[nodiscard]] bool validate_utf16_be(ReadonlyBytes);
|
||||||
|
|
||||||
|
@ -156,13 +146,6 @@ public:
|
||||||
m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
|
m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr Utf16View(Utf16Data const& string)
|
|
||||||
: m_string { .utf16 = string.data() }
|
|
||||||
, m_length_in_code_units(string.size())
|
|
||||||
{
|
|
||||||
m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
consteval Utf16View(StringView string)
|
consteval Utf16View(StringView string)
|
||||||
: m_string { .ascii = string.characters_without_null_termination() }
|
: m_string { .ascii = string.characters_without_null_termination() }
|
||||||
, m_length_in_code_units(string.length())
|
, m_length_in_code_units(string.length())
|
||||||
|
@ -170,15 +153,6 @@ public:
|
||||||
VERIFY(all_of(string, AK::is_ascii));
|
VERIFY(all_of(string, AK::is_ascii));
|
||||||
}
|
}
|
||||||
|
|
||||||
Utf16View(Utf16ConversionResult&&) = delete;
|
|
||||||
explicit Utf16View(Utf16ConversionResult const& conversion_result)
|
|
||||||
: m_string { .utf16 = conversion_result.data.data() }
|
|
||||||
, m_length_in_code_units(conversion_result.data.size())
|
|
||||||
, m_length_in_code_points(conversion_result.code_point_count)
|
|
||||||
{
|
|
||||||
m_length_in_code_units |= 1uz << Detail::UTF16_FLAG;
|
|
||||||
}
|
|
||||||
|
|
||||||
ErrorOr<String> to_utf8(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
|
ErrorOr<String> to_utf8(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
|
||||||
ErrorOr<ByteString> to_byte_string(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
|
ErrorOr<ByteString> to_byte_string(AllowLonelySurrogates = AllowLonelySurrogates::Yes) const;
|
||||||
|
|
||||||
|
@ -314,18 +288,6 @@ public:
|
||||||
return m_length_in_code_points;
|
return m_length_in_code_points;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr Optional<size_t> length_in_code_points_if_known() const
|
|
||||||
{
|
|
||||||
if (has_ascii_storage())
|
|
||||||
return m_length_in_code_units;
|
|
||||||
|
|
||||||
if (m_length_in_code_points == NumericLimits<size_t>::max())
|
|
||||||
return {};
|
|
||||||
return m_length_in_code_points;
|
|
||||||
}
|
|
||||||
|
|
||||||
constexpr void unsafe_set_code_point_length(size_t length) const { m_length_in_code_points = length; }
|
|
||||||
|
|
||||||
[[nodiscard]] constexpr char16_t code_unit_at(size_t index) const
|
[[nodiscard]] constexpr char16_t code_unit_at(size_t index) const
|
||||||
{
|
{
|
||||||
VERIFY(index < length_in_code_units());
|
VERIFY(index < length_in_code_units());
|
||||||
|
@ -591,6 +553,5 @@ inline constexpr bool IsHashCompatible<Utf16String, Utf16View> = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if USING_AK_GLOBALLY
|
#if USING_AK_GLOBALLY
|
||||||
using AK::Utf16Data;
|
|
||||||
using AK::Utf16View;
|
using AK::Utf16View;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -111,7 +111,7 @@ ErrorOr<String> Process::get_name()
|
||||||
if (!length)
|
if (!length)
|
||||||
return Error::from_windows_error();
|
return Error::from_windows_error();
|
||||||
|
|
||||||
return String::from_utf16(Utf16View { reinterpret_cast<char16_t const*>(path), length });
|
return MUST(Utf16View { reinterpret_cast<char16_t const*>(path), length }.to_utf8());
|
||||||
}
|
}
|
||||||
|
|
||||||
ErrorOr<void> Process::set_name(StringView, SetThreadName)
|
ErrorOr<void> Process::set_name(StringView, SetThreadName)
|
||||||
|
|
|
@ -559,7 +559,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::encode_uri_component)
|
||||||
JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
|
JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
|
||||||
{
|
{
|
||||||
// 1. Set string to ? ToString(string).
|
// 1. Set string to ? ToString(string).
|
||||||
auto string = TRY(vm.argument(0).to_byte_string(vm));
|
auto string = TRY(vm.argument(0).to_utf16_string(vm));
|
||||||
|
|
||||||
// 3. Let R be the empty String.
|
// 3. Let R be the empty String.
|
||||||
StringBuilder escaped;
|
StringBuilder escaped;
|
||||||
|
@ -570,29 +570,29 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
|
||||||
// 2. Let length be the length of string.
|
// 2. Let length be the length of string.
|
||||||
// 5. Let k be 0.
|
// 5. Let k be 0.
|
||||||
// 6. Repeat, while k < length,
|
// 6. Repeat, while k < length,
|
||||||
auto utf16_conversion = TRY_OR_THROW_OOM(vm, utf8_to_utf16(string));
|
for (size_t k = 0; k < string.length_in_code_units(); ++k) {
|
||||||
for (auto code_point : utf16_conversion.data) {
|
|
||||||
// a. Let char be the code unit at index k within string.
|
// a. Let char be the code unit at index k within string.
|
||||||
|
auto code_unit = string.code_unit_at(k);
|
||||||
|
|
||||||
// b. If unescapedSet contains char, then
|
// b. If unescapedSet contains char, then
|
||||||
// NOTE: We know unescapedSet is ASCII-only, so ensure we have an ASCII codepoint before casting to char.
|
// NOTE: We know unescapedSet is ASCII-only, so ensure we have an ASCII codepoint before casting to char.
|
||||||
if (is_ascii(code_point) && unescaped_set.contains(static_cast<char>(code_point))) {
|
if (is_ascii(code_unit) && unescaped_set.contains(static_cast<char>(code_unit))) {
|
||||||
// i. Let S be the String value containing the single code unit char.
|
// i. Let S be the String value containing the single code unit char.
|
||||||
escaped.append(code_point);
|
escaped.append(static_cast<char>(code_unit));
|
||||||
}
|
}
|
||||||
// c. Else,
|
// c. Else,
|
||||||
// i. Let n be the numeric value of char.
|
// i. Let n be the numeric value of char.
|
||||||
// ii. If n < 256, then
|
// ii. If n < 256, then
|
||||||
else if (code_point < 256) {
|
else if (code_unit < 256) {
|
||||||
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
|
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
|
||||||
// 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
|
// 2. Let S be the string-concatenation of "%" and ! StringPad(hex, 2𝔽, "0", start).
|
||||||
escaped.appendff("%{:02X}", code_point);
|
escaped.appendff("%{:02X}", code_unit);
|
||||||
}
|
}
|
||||||
// iii. Else,
|
// iii. Else,
|
||||||
else {
|
else {
|
||||||
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
|
// 1. Let hex be the String representation of n, formatted as an uppercase hexadecimal number.
|
||||||
// 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
|
// 2. Let S be the string-concatenation of "%u" and ! StringPad(hex, 4𝔽, "0", start).
|
||||||
escaped.appendff("%u{:04X}", code_point);
|
escaped.appendff("%u{:04X}", code_unit);
|
||||||
}
|
}
|
||||||
|
|
||||||
// d. Set R to the string-concatenation of R and S.
|
// d. Set R to the string-concatenation of R and S.
|
||||||
|
|
|
@ -93,26 +93,21 @@ ErrorOr<String, ParseRegexPatternError> parse_regex_pattern(StringView pattern,
|
||||||
if (unicode && unicode_sets)
|
if (unicode && unicode_sets)
|
||||||
return ParseRegexPatternError { MUST(String::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v')) };
|
return ParseRegexPatternError { MUST(String::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v')) };
|
||||||
|
|
||||||
auto utf16_pattern_result = AK::utf8_to_utf16(pattern);
|
auto utf16_pattern = Utf16String::from_utf8(pattern);
|
||||||
if (utf16_pattern_result.is_error())
|
|
||||||
return ParseRegexPatternError { "Out of memory"_string };
|
|
||||||
|
|
||||||
auto utf16_result = utf16_pattern_result.release_value();
|
|
||||||
Utf16View utf16_pattern_view { utf16_result };
|
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
|
|
||||||
// If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
|
// If the Unicode flag is set, append each code point to the pattern. Otherwise, append each
|
||||||
// code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
|
// code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse.
|
||||||
auto previous_code_unit_was_backslash = false;
|
auto previous_code_unit_was_backslash = false;
|
||||||
for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) {
|
for (size_t i = 0; i < utf16_pattern.length_in_code_units();) {
|
||||||
if (unicode || unicode_sets) {
|
if (unicode || unicode_sets) {
|
||||||
auto code_point = code_point_at(utf16_pattern_view, i);
|
auto code_point = code_point_at(utf16_pattern, i);
|
||||||
builder.append_code_point(code_point.code_point);
|
builder.append_code_point(code_point.code_point);
|
||||||
i += code_point.code_unit_count;
|
i += code_point.code_unit_count;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
u16 code_unit = utf16_pattern_view.code_unit_at(i);
|
u16 code_unit = utf16_pattern.code_unit_at(i);
|
||||||
++i;
|
++i;
|
||||||
|
|
||||||
if (code_unit > 0x7f) {
|
if (code_unit > 0x7f) {
|
||||||
|
|
|
@ -512,7 +512,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M
|
||||||
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
return ExecutionResult::Failed_ExecuteLowPrioForks;
|
||||||
|
|
||||||
Optional<ByteString> str;
|
Optional<ByteString> str;
|
||||||
Utf16Data utf16;
|
Utf16String utf16;
|
||||||
Vector<u32> data;
|
Vector<u32> data;
|
||||||
data.ensure_capacity(length);
|
data.ensure_capacity(length);
|
||||||
for (size_t i = offset; i < offset + length; ++i)
|
for (size_t i = offset; i < offset + length; ++i)
|
||||||
|
|
|
@ -8,14 +8,15 @@
|
||||||
|
|
||||||
#include "Forward.h"
|
#include "Forward.h"
|
||||||
#include "RegexOptions.h"
|
#include "RegexOptions.h"
|
||||||
#include <AK/Error.h>
|
|
||||||
|
|
||||||
#include <AK/ByteString.h>
|
#include <AK/ByteString.h>
|
||||||
#include <AK/COWVector.h>
|
#include <AK/COWVector.h>
|
||||||
|
#include <AK/Error.h>
|
||||||
#include <AK/FlyString.h>
|
#include <AK/FlyString.h>
|
||||||
#include <AK/MemMem.h>
|
#include <AK/MemMem.h>
|
||||||
#include <AK/StringBuilder.h>
|
#include <AK/StringBuilder.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
#include <AK/Utf16String.h>
|
||||||
#include <AK/Utf16View.h>
|
#include <AK/Utf16View.h>
|
||||||
#include <AK/Utf32View.h>
|
#include <AK/Utf32View.h>
|
||||||
#include <AK/Utf8View.h>
|
#include <AK/Utf8View.h>
|
||||||
|
@ -110,7 +111,7 @@ public:
|
||||||
return view;
|
return view;
|
||||||
}
|
}
|
||||||
|
|
||||||
RegexStringView construct_as_same(Span<u32> data, Optional<ByteString>& optional_string_storage, Utf16Data& optional_utf16_storage) const
|
RegexStringView construct_as_same(Span<u32> data, Optional<ByteString>& optional_string_storage, Utf16String& optional_utf16_storage) const
|
||||||
{
|
{
|
||||||
auto view = m_view.visit(
|
auto view = m_view.visit(
|
||||||
[&optional_string_storage, data]<typename T>(T const&) {
|
[&optional_string_storage, data]<typename T>(T const&) {
|
||||||
|
@ -121,11 +122,8 @@ public:
|
||||||
return RegexStringView { T { *optional_string_storage } };
|
return RegexStringView { T { *optional_string_storage } };
|
||||||
},
|
},
|
||||||
[&optional_utf16_storage, data](Utf16View) {
|
[&optional_utf16_storage, data](Utf16View) {
|
||||||
auto conversion_result = utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors();
|
optional_utf16_storage = Utf16String::from_utf32({ data.data(), data.size() });
|
||||||
optional_utf16_storage = conversion_result.data;
|
return RegexStringView { optional_utf16_storage.utf16_view() };
|
||||||
auto view = Utf16View { optional_utf16_storage };
|
|
||||||
view.unsafe_set_code_point_length(conversion_result.code_point_count);
|
|
||||||
return RegexStringView { view };
|
|
||||||
});
|
});
|
||||||
|
|
||||||
view.set_unicode(unicode());
|
view.set_unicode(unicode());
|
||||||
|
|
|
@ -46,9 +46,8 @@ WebIDL::ExceptionOr<String> CharacterData::substring_data(size_t offset, size_t
|
||||||
{
|
{
|
||||||
// 1. Let length be node’s length.
|
// 1. Let length be node’s length.
|
||||||
// FIXME: This is very inefficient!
|
// FIXME: This is very inefficient!
|
||||||
auto utf16_result = MUST(AK::utf8_to_utf16(m_data));
|
auto utf16_string = Utf16String::from_utf8(m_data);
|
||||||
Utf16View utf16_view { utf16_result };
|
auto length = utf16_string.length_in_code_units();
|
||||||
auto length = utf16_view.length_in_code_units();
|
|
||||||
|
|
||||||
// 2. If offset is greater than length, then throw an "IndexSizeError" DOMException.
|
// 2. If offset is greater than length, then throw an "IndexSizeError" DOMException.
|
||||||
if (offset > length)
|
if (offset > length)
|
||||||
|
@ -57,10 +56,10 @@ WebIDL::ExceptionOr<String> CharacterData::substring_data(size_t offset, size_t
|
||||||
// 3. If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit
|
// 3. If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit
|
||||||
// to the end of node’s data, and then return.
|
// to the end of node’s data, and then return.
|
||||||
if (offset + count > length)
|
if (offset + count > length)
|
||||||
return MUST(utf16_view.substring_view(offset).to_utf8());
|
return MUST(utf16_string.substring_view(offset).to_utf8());
|
||||||
|
|
||||||
// 4. Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data.
|
// 4. Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data.
|
||||||
return MUST(utf16_view.substring_view(offset, count).to_utf8());
|
return MUST(utf16_string.substring_view(offset, count).to_utf8());
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://dom.spec.whatwg.org/#concept-cd-replace
|
// https://dom.spec.whatwg.org/#concept-cd-replace
|
||||||
|
@ -68,9 +67,8 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
|
||||||
{
|
{
|
||||||
// 1. Let length be node’s length.
|
// 1. Let length be node’s length.
|
||||||
// FIXME: This is very inefficient!
|
// FIXME: This is very inefficient!
|
||||||
auto utf16_data = MUST(AK::utf8_to_utf16(m_data));
|
auto utf16_string = Utf16String::from_utf8(m_data);
|
||||||
Utf16View utf16_view { utf16_data };
|
auto length = utf16_string.length_in_code_units();
|
||||||
auto length = utf16_view.length_in_code_units();
|
|
||||||
|
|
||||||
// 2. If offset is greater than length, then throw an "IndexSizeError" DOMException.
|
// 2. If offset is greater than length, then throw an "IndexSizeError" DOMException.
|
||||||
if (offset > length)
|
if (offset > length)
|
||||||
|
@ -83,17 +81,17 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
|
||||||
// 5. Insert data into node’s data after offset code units.
|
// 5. Insert data into node’s data after offset code units.
|
||||||
// 6. Let delete offset be offset + data’s length.
|
// 6. Let delete offset be offset + data’s length.
|
||||||
// 7. Starting from delete offset code units, remove count code units from node’s data.
|
// 7. Starting from delete offset code units, remove count code units from node’s data.
|
||||||
auto before_data = utf16_view.substring_view(0, offset);
|
auto before_data = utf16_string.substring_view(0, offset);
|
||||||
auto inserted_data_result = MUST(AK::utf8_to_utf16(data));
|
auto inserted_data = Utf16String::from_utf8(data);
|
||||||
auto after_data = utf16_view.substring_view(offset + count);
|
auto after_data = utf16_string.substring_view(offset + count);
|
||||||
|
|
||||||
StringBuilder full_data(StringBuilder::Mode::UTF16, before_data.length_in_code_units() + inserted_data_result.data.size() + after_data.length_in_code_units());
|
StringBuilder full_data(StringBuilder::Mode::UTF16, before_data.length_in_code_units() + inserted_data.length_in_code_units() + after_data.length_in_code_units());
|
||||||
full_data.append(before_data);
|
full_data.append(before_data);
|
||||||
full_data.append(inserted_data_result.data);
|
full_data.append(inserted_data);
|
||||||
full_data.append(after_data);
|
full_data.append(after_data);
|
||||||
auto full_view = full_data.utf16_string_view();
|
|
||||||
|
|
||||||
bool characters_are_the_same = utf16_view == full_view;
|
auto full_view = full_data.utf16_string_view();
|
||||||
|
bool characters_are_the_same = utf16_string == full_view;
|
||||||
auto old_data = m_data;
|
auto old_data = m_data;
|
||||||
|
|
||||||
// OPTIMIZATION: Skip UTF-8 encoding if the characters are the same.
|
// OPTIMIZATION: Skip UTF-8 encoding if the characters are the same.
|
||||||
|
@ -123,14 +121,14 @@ WebIDL::ExceptionOr<void> CharacterData::replace_data(size_t offset, size_t coun
|
||||||
// start offset by data’s length and decrease it by count.
|
// start offset by data’s length and decrease it by count.
|
||||||
for (auto* range : Range::live_ranges()) {
|
for (auto* range : Range::live_ranges()) {
|
||||||
if (range->start_container() == this && range->start_offset() > (offset + count))
|
if (range->start_container() == this && range->start_offset() > (offset + count))
|
||||||
range->set_start_offset(range->start_offset() + inserted_data_result.data.size() - count);
|
range->set_start_offset(range->start_offset() + inserted_data.length_in_code_units() - count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 11. For each live range whose end node is node and end offset is greater than offset plus count, increase its end
|
// 11. For each live range whose end node is node and end offset is greater than offset plus count, increase its end
|
||||||
// offset by data’s length and decrease it by count.
|
// offset by data’s length and decrease it by count.
|
||||||
for (auto* range : Range::live_ranges()) {
|
for (auto* range : Range::live_ranges()) {
|
||||||
if (range->end_container() == this && range->end_offset() > (offset + count))
|
if (range->end_container() == this && range->end_offset() > (offset + count))
|
||||||
range->set_end_offset(range->end_offset() + inserted_data_result.data.size() - count);
|
range->set_end_offset(range->end_offset() + inserted_data.length_in_code_units() - count);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 12. If node’s parent is non-null, then run the children changed steps for node’s parent.
|
// 12. If node’s parent is non-null, then run the children changed steps for node’s parent.
|
||||||
|
|
|
@ -6158,8 +6158,7 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
|
||||||
if (text_blocks.is_empty())
|
if (text_blocks.is_empty())
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
auto utf16_query = MUST(AK::utf8_to_utf16(query));
|
auto utf16_query = Utf16String::from_utf8(query);
|
||||||
Utf16View query_view { utf16_query };
|
|
||||||
|
|
||||||
Vector<GC::Root<Range>> matches;
|
Vector<GC::Root<Range>> matches;
|
||||||
for (auto const& text_block : text_blocks) {
|
for (auto const& text_block : text_blocks) {
|
||||||
|
@ -6169,8 +6168,8 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
|
||||||
auto* match_start_position = text_block.positions.data();
|
auto* match_start_position = text_block.positions.data();
|
||||||
while (true) {
|
while (true) {
|
||||||
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
|
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
|
||||||
? text_view.find_code_unit_offset_ignoring_case(query_view, offset)
|
? text_view.find_code_unit_offset_ignoring_case(utf16_query, offset)
|
||||||
: text_view.find_code_unit_offset(query_view, offset);
|
: text_view.find_code_unit_offset(utf16_query, offset);
|
||||||
if (!match_index.has_value())
|
if (!match_index.has_value())
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -6181,15 +6180,15 @@ Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSe
|
||||||
auto& start_dom_node = match_start_position->dom_node;
|
auto& start_dom_node = match_start_position->dom_node;
|
||||||
|
|
||||||
auto* match_end_position = match_start_position;
|
auto* match_end_position = match_start_position;
|
||||||
for (; i < text_block.positions.size() - 1 && (match_index.value() + query_view.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
|
for (; i < text_block.positions.size() - 1 && (match_index.value() + utf16_query.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
|
||||||
match_end_position = &text_block.positions[i + 1];
|
match_end_position = &text_block.positions[i + 1];
|
||||||
|
|
||||||
auto& end_dom_node = match_end_position->dom_node;
|
auto& end_dom_node = match_end_position->dom_node;
|
||||||
auto end_position = match_index.value() + query_view.length_in_code_units() - match_end_position->start_offset;
|
auto end_position = match_index.value() + utf16_query.length_in_code_units() - match_end_position->start_offset;
|
||||||
|
|
||||||
matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position));
|
matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position));
|
||||||
match_start_position = match_end_position;
|
match_start_position = match_end_position;
|
||||||
offset = match_index.value() + query_view.length_in_code_units() + 1;
|
offset = match_index.value() + utf16_query.length_in_code_units() + 1;
|
||||||
if (offset >= text_view.length_in_code_units())
|
if (offset >= text_view.length_in_code_units())
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -384,9 +384,9 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
|
||||||
auto parent_white_space_collapse = resolved_keyword(*start_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
|
auto parent_white_space_collapse = resolved_keyword(*start_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
|
||||||
|
|
||||||
// FIXME: Find a way to get code points directly from the UTF-8 string
|
// FIXME: Find a way to get code points directly from the UTF-8 string
|
||||||
auto start_node_data = *start_node->text_content();
|
auto start_node_data = Utf16String::from_utf8(*start_node->text_content());
|
||||||
auto utf16_code_units = MUST(AK::utf8_to_utf16(start_node_data));
|
auto offset_minus_one_code_point = start_node_data.code_point_at(start_offset - 1);
|
||||||
auto offset_minus_one_code_point = Utf16View { utf16_code_units }.code_point_at(start_offset - 1);
|
|
||||||
if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_minus_one_code_point == 0x20 || offset_minus_one_code_point == 0xA0)) {
|
if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_minus_one_code_point == 0x20 || offset_minus_one_code_point == 0xA0)) {
|
||||||
--start_offset;
|
--start_offset;
|
||||||
continue;
|
continue;
|
||||||
|
@ -437,9 +437,9 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
|
||||||
auto parent_white_space_collapse = resolved_keyword(*end_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
|
auto parent_white_space_collapse = resolved_keyword(*end_node->parent(), CSS::PropertyID::WhiteSpaceCollapse);
|
||||||
|
|
||||||
// FIXME: Find a way to get code points directly from the UTF-8 string
|
// FIXME: Find a way to get code points directly from the UTF-8 string
|
||||||
auto end_node_data = *end_node->text_content();
|
auto end_node_data = Utf16String::from_utf8(*end_node->text_content());
|
||||||
auto utf16_code_units = MUST(AK::utf8_to_utf16(end_node_data));
|
auto offset_code_point = end_node_data.code_point_at(end_offset);
|
||||||
auto offset_code_point = Utf16View { utf16_code_units }.code_point_at(end_offset);
|
|
||||||
if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_code_point == 0x20 || offset_code_point == 0xA0)) {
|
if (parent_white_space_collapse != CSS::Keyword::Preserve && (offset_code_point == 0x20 || offset_code_point == 0xA0)) {
|
||||||
// 1. If fix collapsed space is true, and collapse spaces is true, and the end offsetth
|
// 1. If fix collapsed space is true, and collapse spaces is true, and the end offsetth
|
||||||
// code unit of end node's data is a space (0x0020): call deleteData(end offset, 1)
|
// code unit of end node's data is a space (0x0020): call deleteData(end offset, 1)
|
||||||
|
@ -556,16 +556,14 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
|
||||||
// 1. Remove the first code unit from replacement whitespace, and let element be that
|
// 1. Remove the first code unit from replacement whitespace, and let element be that
|
||||||
// code unit.
|
// code unit.
|
||||||
// FIXME: Find a way to get code points directly from the UTF-8 string
|
// FIXME: Find a way to get code points directly from the UTF-8 string
|
||||||
auto replacement_whitespace_utf16 = MUST(AK::utf8_to_utf16(replacement_whitespace));
|
auto replacement_whitespace_utf16 = Utf16String::from_utf8(replacement_whitespace);
|
||||||
auto replacement_whitespace_utf16_view = Utf16View { replacement_whitespace_utf16 };
|
replacement_whitespace = MUST(replacement_whitespace_utf16.substring_view(1).to_utf8());
|
||||||
replacement_whitespace = MUST(String::from_utf16({ replacement_whitespace_utf16_view.substring_view(1) }));
|
auto element = replacement_whitespace_utf16.code_point_at(0);
|
||||||
auto element = replacement_whitespace_utf16_view.code_point_at(0);
|
|
||||||
|
|
||||||
// 2. If element is not the same as the start offsetth code unit of start node's data:
|
// 2. If element is not the same as the start offsetth code unit of start node's data:
|
||||||
auto start_node_data = *start_node->text_content();
|
auto start_node_data = Utf16String::from_utf8(*start_node->text_content());
|
||||||
auto start_node_utf16 = MUST(AK::utf8_to_utf16(start_node_data));
|
auto start_node_code_point = start_node_data.code_point_at(start_offset);
|
||||||
auto start_node_utf16_view = Utf16View { start_node_utf16 };
|
|
||||||
auto start_node_code_point = start_node_utf16_view.code_point_at(start_offset);
|
|
||||||
if (element != start_node_code_point) {
|
if (element != start_node_code_point) {
|
||||||
// 1. Call insertData(start offset, element) on start node.
|
// 1. Call insertData(start offset, element) on start node.
|
||||||
auto& start_node_character_data = static_cast<DOM::CharacterData&>(*start_node);
|
auto& start_node_character_data = static_cast<DOM::CharacterData&>(*start_node);
|
||||||
|
|
|
@ -106,11 +106,10 @@ WebIDL::ExceptionOr<FileReader::Result> FileReader::blob_package_data(JS::Realm&
|
||||||
return JS::ArrayBuffer::create(realm, move(bytes));
|
return JS::ArrayBuffer::create(realm, move(bytes));
|
||||||
case Type::BinaryString:
|
case Type::BinaryString:
|
||||||
// Return bytes as a binary string, in which every byte is represented by a code unit of equal value [0..255].
|
// Return bytes as a binary string, in which every byte is represented by a code unit of equal value [0..255].
|
||||||
Utf16Data builder;
|
StringBuilder builder(StringBuilder::Mode::UTF16, bytes.size());
|
||||||
builder.ensure_capacity(bytes.size());
|
|
||||||
for (auto byte : bytes.bytes())
|
for (auto byte : bytes.bytes())
|
||||||
builder.unchecked_append(byte);
|
builder.append_code_unit(byte);
|
||||||
return MUST(Utf16View { builder }.to_utf8());
|
return MUST(builder.utf16_string_view().to_utf8());
|
||||||
}
|
}
|
||||||
VERIFY_NOT_REACHED();
|
VERIFY_NOT_REACHED();
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include <AK/FlyString.h>
|
#include <AK/FlyString.h>
|
||||||
#include <AK/GenericLexer.h>
|
#include <AK/GenericLexer.h>
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
|
#include <AK/Utf16String.h>
|
||||||
#include <AK/Utf16View.h>
|
#include <AK/Utf16View.h>
|
||||||
#include <AK/Utf8View.h>
|
#include <AK/Utf8View.h>
|
||||||
#include <LibWeb/Infra/CharacterTypes.h>
|
#include <LibWeb/Infra/CharacterTypes.h>
|
||||||
|
@ -63,10 +64,8 @@ ErrorOr<String> strip_and_collapse_whitespace(StringView string)
|
||||||
// https://infra.spec.whatwg.org/#code-unit-prefix
|
// https://infra.spec.whatwg.org/#code-unit-prefix
|
||||||
bool is_code_unit_prefix(StringView potential_prefix_utf8, StringView input_utf8)
|
bool is_code_unit_prefix(StringView potential_prefix_utf8, StringView input_utf8)
|
||||||
{
|
{
|
||||||
auto potential_prefix_utf16_bytes = MUST(utf8_to_utf16(potential_prefix_utf8));
|
auto potential_prefix = Utf16String::from_utf8(potential_prefix_utf8);
|
||||||
auto input_utf16_bytes = MUST(utf8_to_utf16(input_utf8));
|
auto input = Utf16String::from_utf8(input_utf8);
|
||||||
Utf16View potential_prefix { potential_prefix_utf16_bytes };
|
|
||||||
Utf16View input { input_utf16_bytes };
|
|
||||||
|
|
||||||
// 1. Let i be 0.
|
// 1. Let i be 0.
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
|
@ -148,9 +147,10 @@ bool code_unit_less_than(StringView a, StringView b)
|
||||||
if (a.is_ascii() && b.is_ascii())
|
if (a.is_ascii() && b.is_ascii())
|
||||||
return a < b;
|
return a < b;
|
||||||
|
|
||||||
auto a_utf16 = MUST(utf8_to_utf16(a));
|
auto a_utf16 = Utf16String::from_utf8(a);
|
||||||
auto b_utf16 = MUST(utf8_to_utf16(b));
|
auto b_utf16 = Utf16String::from_utf8(b);
|
||||||
return Utf16View { a_utf16 }.is_code_unit_less_than(Utf16View { b_utf16 });
|
|
||||||
|
return a_utf16.utf16_view().is_code_unit_less_than(b_utf16);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,17 +50,18 @@ Vector<Viewport::TextBlock> const& Viewport::text_blocks()
|
||||||
|
|
||||||
void Viewport::update_text_blocks()
|
void Viewport::update_text_blocks()
|
||||||
{
|
{
|
||||||
StringBuilder builder;
|
StringBuilder builder(StringBuilder::Mode::UTF16);
|
||||||
size_t current_start_position = 0;
|
size_t current_start_position = 0;
|
||||||
Vector<TextPosition> text_positions;
|
Vector<TextPosition> text_positions;
|
||||||
Vector<TextBlock> text_blocks;
|
Vector<TextBlock> text_blocks;
|
||||||
|
|
||||||
for_each_in_inclusive_subtree([&](auto const& layout_node) {
|
for_each_in_inclusive_subtree([&](auto const& layout_node) {
|
||||||
if (layout_node.display().is_none() || !layout_node.first_paintable() || !layout_node.first_paintable()->is_visible())
|
if (layout_node.display().is_none() || !layout_node.first_paintable() || !layout_node.first_paintable()->is_visible())
|
||||||
return TraversalDecision::Continue;
|
return TraversalDecision::Continue;
|
||||||
|
|
||||||
if (layout_node.is_box() || layout_node.is_generated()) {
|
if (layout_node.is_box() || layout_node.is_generated()) {
|
||||||
if (!builder.is_empty()) {
|
if (!builder.is_empty()) {
|
||||||
text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
|
text_blocks.append({ builder.to_utf16_string(), text_positions });
|
||||||
current_start_position = 0;
|
current_start_position = 0;
|
||||||
text_positions.clear_with_capacity();
|
text_positions.clear_with_capacity();
|
||||||
builder.clear();
|
builder.clear();
|
||||||
|
@ -79,10 +80,9 @@ void Viewport::update_text_blocks()
|
||||||
text_positions.empend(dom_node, current_start_position);
|
text_positions.empend(dom_node, current_start_position);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto const& current_node_text = text_node->text_for_rendering();
|
auto const& current_node_text = Utf16String::from_utf8(text_node->text_for_rendering());
|
||||||
auto const current_node_text_utf16 = MUST(AK::utf8_to_utf16(current_node_text));
|
current_start_position += current_node_text.length_in_code_units();
|
||||||
current_start_position += current_node_text_utf16.data.size();
|
builder.append(current_node_text);
|
||||||
builder.append(move(current_node_text));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ void Viewport::update_text_blocks()
|
||||||
});
|
});
|
||||||
|
|
||||||
if (!builder.is_empty())
|
if (!builder.is_empty())
|
||||||
text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
|
text_blocks.append({ builder.to_utf16_string(), text_positions });
|
||||||
|
|
||||||
m_text_blocks = move(text_blocks);
|
m_text_blocks = move(text_blocks);
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/Utf16String.h>
|
||||||
#include <LibWeb/DOM/Document.h>
|
#include <LibWeb/DOM/Document.h>
|
||||||
#include <LibWeb/Layout/BlockContainer.h>
|
#include <LibWeb/Layout/BlockContainer.h>
|
||||||
|
|
||||||
|
@ -24,7 +25,7 @@ public:
|
||||||
size_t start_offset { 0 };
|
size_t start_offset { 0 };
|
||||||
};
|
};
|
||||||
struct TextBlock {
|
struct TextBlock {
|
||||||
AK::Utf16ConversionResult text;
|
Utf16String text;
|
||||||
Vector<TextPosition> positions;
|
Vector<TextPosition> positions;
|
||||||
};
|
};
|
||||||
Vector<TextBlock> const& text_blocks();
|
Vector<TextBlock> const& text_blocks();
|
||||||
|
|
|
@ -252,9 +252,8 @@ Utf16View PaintableFragment::utf16_view() const
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
if (!m_text_in_utf16.has_value())
|
if (!m_text_in_utf16.has_value())
|
||||||
m_text_in_utf16 = MUST(AK::utf8_to_utf16(utf8_view()));
|
m_text_in_utf16 = Utf16String::from_utf8(utf8_view().as_string());
|
||||||
|
return *m_text_in_utf16;
|
||||||
return Utf16View { m_text_in_utf16.value() };
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/Utf16String.h>
|
||||||
#include <LibGfx/TextLayout.h>
|
#include <LibGfx/TextLayout.h>
|
||||||
#include <LibWeb/Layout/Node.h>
|
#include <LibWeb/Layout/Node.h>
|
||||||
#include <LibWeb/Painting/ShadowData.h>
|
#include <LibWeb/Painting/ShadowData.h>
|
||||||
|
@ -64,7 +65,7 @@ private:
|
||||||
CSS::WritingMode m_writing_mode;
|
CSS::WritingMode m_writing_mode;
|
||||||
Vector<ShadowData> m_shadows;
|
Vector<ShadowData> m_shadows;
|
||||||
CSSPixels m_text_decoration_thickness { 0 };
|
CSSPixels m_text_decoration_thickness { 0 };
|
||||||
mutable Optional<AK::Utf16ConversionResult> m_text_in_utf16;
|
mutable Optional<Utf16String> m_text_in_utf16;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -48,8 +48,8 @@ ByteString SVGTextContentElement::text_contents() const
|
||||||
// https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
|
// https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
|
||||||
WebIDL::ExceptionOr<WebIDL::Long> SVGTextContentElement::get_number_of_chars() const
|
WebIDL::ExceptionOr<WebIDL::Long> SVGTextContentElement::get_number_of_chars() const
|
||||||
{
|
{
|
||||||
auto chars = TRY_OR_THROW_OOM(vm(), utf8_to_utf16(text_contents())).data;
|
auto length_in_code_units = AK::utf16_code_unit_length_from_utf8(text_contents());
|
||||||
return static_cast<WebIDL::Long>(chars.size());
|
return static_cast<WebIDL::Long>(length_in_code_units);
|
||||||
}
|
}
|
||||||
|
|
||||||
GC::Ref<Geometry::DOMPoint> SVGTextContentElement::get_start_position_of_char(WebIDL::UnsignedLong charnum)
|
GC::Ref<Geometry::DOMPoint> SVGTextContentElement::get_start_position_of_char(WebIDL::UnsignedLong charnum)
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
|
|
||||||
TEST_CASE(decode_ascii)
|
TEST_CASE(decode_ascii)
|
||||||
{
|
{
|
||||||
auto string = MUST(AK::utf8_to_utf16("Hello World!11"sv));
|
auto string = Utf16String::from_utf8("Hello World!11"sv);
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
|
|
||||||
size_t valid_code_units = 0;
|
size_t valid_code_units = 0;
|
||||||
|
@ -34,7 +34,7 @@ TEST_CASE(decode_ascii)
|
||||||
|
|
||||||
TEST_CASE(decode_utf8)
|
TEST_CASE(decode_utf8)
|
||||||
{
|
{
|
||||||
auto string = MUST(AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv));
|
auto string = Utf16String::from_utf8("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv);
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
|
|
||||||
size_t valid_code_units = 0;
|
size_t valid_code_units = 0;
|
||||||
|
@ -55,7 +55,7 @@ TEST_CASE(encode_utf8)
|
||||||
{
|
{
|
||||||
{
|
{
|
||||||
auto utf8_string = "Привет, мир! 😀 γειά σου κόσμος こんにちは世界"_string;
|
auto utf8_string = "Привет, мир! 😀 γειά σου κόσμος こんにちは世界"_string;
|
||||||
auto string = MUST(AK::utf8_to_utf16(utf8_string));
|
auto string = Utf16String::from_utf8(utf8_string);
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::Yes)), utf8_string);
|
EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::Yes)), utf8_string);
|
||||||
EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::No)), utf8_string);
|
EXPECT_EQ(MUST(view.to_utf8(AllowLonelySurrogates::No)), utf8_string);
|
||||||
|
@ -139,7 +139,7 @@ TEST_CASE(utf16_literal)
|
||||||
|
|
||||||
TEST_CASE(iterate_utf16)
|
TEST_CASE(iterate_utf16)
|
||||||
{
|
{
|
||||||
auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
|
auto string = Utf16String::from_utf8("Привет 😀"sv);
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
auto iterator = view.begin();
|
auto iterator = view.begin();
|
||||||
|
|
||||||
|
@ -371,16 +371,16 @@ TEST_CASE(to_ascii_titlecase)
|
||||||
|
|
||||||
TEST_CASE(equals_ignoring_case)
|
TEST_CASE(equals_ignoring_case)
|
||||||
{
|
{
|
||||||
auto string1 = MUST(AK::utf8_to_utf16("foobar"sv));
|
auto string1 = Utf16String::from_utf8("foobar"sv);
|
||||||
auto string2 = MUST(AK::utf8_to_utf16("FooBar"sv));
|
auto string2 = Utf16String::from_utf8("FooBar"sv);
|
||||||
EXPECT(Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
|
EXPECT(Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
|
||||||
|
|
||||||
string1 = MUST(AK::utf8_to_utf16(""sv));
|
string1 = Utf16String::from_utf8(""sv);
|
||||||
string2 = MUST(AK::utf8_to_utf16(""sv));
|
string2 = Utf16String::from_utf8(""sv);
|
||||||
EXPECT(Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
|
EXPECT(Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
|
||||||
|
|
||||||
string1 = MUST(AK::utf8_to_utf16(""sv));
|
string1 = Utf16String::from_utf8(""sv);
|
||||||
string2 = MUST(AK::utf8_to_utf16("FooBar"sv));
|
string2 = Utf16String::from_utf8("FooBar"sv);
|
||||||
EXPECT(!Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
|
EXPECT(!Utf16View { string1 }.equals_ignoring_case(Utf16View { string2 }));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -425,7 +425,7 @@ TEST_CASE(replace)
|
||||||
|
|
||||||
TEST_CASE(substring_view)
|
TEST_CASE(substring_view)
|
||||||
{
|
{
|
||||||
auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
|
auto string = Utf16String::from_utf8("Привет 😀"sv);
|
||||||
{
|
{
|
||||||
Utf16View view { string };
|
Utf16View view { string };
|
||||||
view = view.substring_view(7, 2);
|
view = view.substring_view(7, 2);
|
||||||
|
@ -532,7 +532,7 @@ TEST_CASE(starts_with)
|
||||||
|
|
||||||
TEST_CASE(find_code_unit_offset)
|
TEST_CASE(find_code_unit_offset)
|
||||||
{
|
{
|
||||||
auto conversion_result = MUST(AK::utf8_to_utf16("😀foo😀bar"sv));
|
auto conversion_result = Utf16String::from_utf8("😀foo😀bar"sv);
|
||||||
Utf16View const view { conversion_result };
|
Utf16View const view { conversion_result };
|
||||||
|
|
||||||
EXPECT_EQ(0u, view.find_code_unit_offset(u""sv).value());
|
EXPECT_EQ(0u, view.find_code_unit_offset(u""sv).value());
|
||||||
|
@ -549,7 +549,7 @@ TEST_CASE(find_code_unit_offset)
|
||||||
|
|
||||||
TEST_CASE(find_code_unit_offset_ignoring_case)
|
TEST_CASE(find_code_unit_offset_ignoring_case)
|
||||||
{
|
{
|
||||||
auto conversion_result = MUST(AK::utf8_to_utf16("😀Foo😀Bar"sv));
|
auto conversion_result = Utf16String::from_utf8("😀Foo😀Bar"sv);
|
||||||
Utf16View const view { conversion_result };
|
Utf16View const view { conversion_result };
|
||||||
|
|
||||||
EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u""sv).value());
|
EXPECT_EQ(0u, view.find_code_unit_offset_ignoring_case(u""sv).value());
|
||||||
|
|
|
@ -823,7 +823,7 @@ TEST_CASE(ECMA262_unicode_match)
|
||||||
for (auto& test : tests) {
|
for (auto& test : tests) {
|
||||||
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
|
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
|
||||||
|
|
||||||
auto subject = MUST(AK::utf8_to_utf16(test.subject));
|
auto subject = Utf16String::from_utf8(test.subject);
|
||||||
Utf16View view { subject };
|
Utf16View view { subject };
|
||||||
|
|
||||||
if constexpr (REGEX_DEBUG) {
|
if constexpr (REGEX_DEBUG) {
|
||||||
|
@ -956,7 +956,7 @@ TEST_CASE(ECMA262_property_match)
|
||||||
for (auto& test : tests) {
|
for (auto& test : tests) {
|
||||||
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
|
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
|
||||||
|
|
||||||
auto subject = MUST(AK::utf8_to_utf16(test.subject));
|
auto subject = Utf16String::from_utf8(test.subject);
|
||||||
Utf16View view { subject };
|
Utf16View view { subject };
|
||||||
|
|
||||||
if constexpr (REGEX_DEBUG) {
|
if constexpr (REGEX_DEBUG) {
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <AK/Array.h>
|
#include <AK/Array.h>
|
||||||
#include <AK/String.h>
|
#include <AK/String.h>
|
||||||
#include <AK/StringView.h>
|
#include <AK/StringView.h>
|
||||||
|
#include <AK/Utf16String.h>
|
||||||
#include <AK/Utf16View.h>
|
#include <AK/Utf16View.h>
|
||||||
#include <AK/Vector.h>
|
#include <AK/Vector.h>
|
||||||
#include <LibUnicode/Segmenter.h>
|
#include <LibUnicode/Segmenter.h>
|
||||||
|
@ -155,21 +156,21 @@ TEST_CASE(out_of_bounds)
|
||||||
EXPECT(!result.has_value());
|
EXPECT(!result.has_value());
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto text = MUST(AK::utf8_to_utf16("foo"sv));
|
auto text = u"foo"_utf16;
|
||||||
|
|
||||||
auto segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
|
auto segmenter = Unicode::Segmenter::create(Unicode::SegmenterGranularity::Word);
|
||||||
segmenter->set_segmented_text(Utf16View { text });
|
segmenter->set_segmented_text(text);
|
||||||
|
|
||||||
auto result = segmenter->previous_boundary(text.data.size() + 1);
|
auto result = segmenter->previous_boundary(text.length_in_code_units() + 1);
|
||||||
EXPECT(result.has_value());
|
EXPECT(result.has_value());
|
||||||
|
|
||||||
result = segmenter->next_boundary(text.data.size() + 1);
|
result = segmenter->next_boundary(text.length_in_code_units() + 1);
|
||||||
EXPECT(!result.has_value());
|
EXPECT(!result.has_value());
|
||||||
|
|
||||||
result = segmenter->previous_boundary(text.data.size());
|
result = segmenter->previous_boundary(text.length_in_code_units());
|
||||||
EXPECT(result.has_value());
|
EXPECT(result.has_value());
|
||||||
|
|
||||||
result = segmenter->next_boundary(text.data.size());
|
result = segmenter->next_boundary(text.length_in_code_units());
|
||||||
EXPECT(!result.has_value());
|
EXPECT(!result.has_value());
|
||||||
|
|
||||||
result = segmenter->next_boundary(0);
|
result = segmenter->next_boundary(0);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue