mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-28 21:26:22 +00:00
AK+LibJS+LibWeb: Use simdutf to create well-formed strings
This commit is contained in:
parent
017a6cc687
commit
1375e6bf39
Notes:
github-actions[bot]
2025-07-25 22:41:55 +00:00
Author: https://github.com/trflynn89
Commit: 1375e6bf39
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5608
Reviewed-by: https://github.com/gmta ✅
9 changed files with 37 additions and 42 deletions
|
@ -114,6 +114,20 @@ Utf16String Utf16String::repeated(u32 code_point, size_t count)
|
|||
return builder.to_utf16_string();
|
||||
}
|
||||
|
||||
Utf16String Utf16String::to_well_formed() const
|
||||
{
|
||||
if (utf16_view().validate(AllowLonelySurrogates::No))
|
||||
return *this;
|
||||
return Utf16String { Detail::Utf16StringData::to_well_formed(*this) };
|
||||
}
|
||||
|
||||
String Utf16String::to_well_formed_utf8() const
|
||||
{
|
||||
if (utf16_view().validate(AllowLonelySurrogates::No))
|
||||
return to_utf8(AllowLonelySurrogates::No);
|
||||
return to_well_formed().to_utf8(AllowLonelySurrogates::No);
|
||||
}
|
||||
|
||||
ErrorOr<void> Formatter<Utf16String>::format(FormatBuilder& builder, Utf16String const& utf16_string)
|
||||
{
|
||||
if (utf16_string.has_long_utf16_storage())
|
||||
|
|
|
@ -138,6 +138,9 @@ public:
|
|||
return from_string_builder_without_validation(builder);
|
||||
}
|
||||
|
||||
Utf16String to_well_formed() const;
|
||||
String to_well_formed_utf8() const;
|
||||
|
||||
// These methods require linking LibUnicode.
|
||||
Utf16String to_lowercase(Optional<StringView> const& locale = {}) const;
|
||||
Utf16String to_uppercase(Optional<StringView> const& locale = {}) const;
|
||||
|
|
|
@ -158,6 +158,16 @@ NonnullRefPtr<Utf16StringData> Utf16StringData::from_string_builder(StringBuilde
|
|||
return adopt_ref(*new (buffer->buffer.data()) Utf16StringData { storage_type, code_unit_length });
|
||||
}
|
||||
|
||||
NonnullRefPtr<Utf16StringData> Utf16StringData::to_well_formed(Utf16View const& utf16_string)
|
||||
{
|
||||
VERIFY(!utf16_string.has_ascii_storage());
|
||||
|
||||
auto string = create_uninitialized(StorageType::UTF16, utf16_string.length_in_code_units());
|
||||
simdutf::to_well_formed_utf16(utf16_string.utf16_span().data(), utf16_string.length_in_code_units(), string->m_utf16_data);
|
||||
|
||||
return string;
|
||||
}
|
||||
|
||||
size_t Utf16StringData::calculate_code_point_length() const
|
||||
{
|
||||
ASSERT(!has_ascii_storage());
|
||||
|
|
|
@ -35,6 +35,8 @@ public:
|
|||
static NonnullRefPtr<Utf16StringData> from_utf32(Utf32View const&);
|
||||
static NonnullRefPtr<Utf16StringData> from_string_builder(StringBuilder&);
|
||||
|
||||
static NonnullRefPtr<Utf16StringData> to_well_formed(Utf16View const&);
|
||||
|
||||
~Utf16StringData()
|
||||
{
|
||||
if (is_fly_string())
|
||||
|
|
|
@ -1362,44 +1362,17 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::to_well_formed)
|
|||
// 2. Let S be ? ToString(O).
|
||||
auto string = TRY(primitive_string_from(vm));
|
||||
|
||||
// NOTE: Rest of steps in to_well_formed below
|
||||
return PrimitiveString::create(vm, to_well_formed_string(string->utf16_string()));
|
||||
}
|
||||
|
||||
// https://tc39.es/ecma262/#sec-string.prototype.towellformed
|
||||
String to_well_formed_string(Utf16String const& string)
|
||||
{
|
||||
// 3. Let strLen be the length of S.
|
||||
auto length = string.length_in_code_units();
|
||||
|
||||
// 4. Let k be 0.
|
||||
size_t k = 0;
|
||||
|
||||
// 5. Let result be the empty String.
|
||||
StringBuilder result;
|
||||
|
||||
// 6. Repeat, while k < strLen,
|
||||
while (k < length) {
|
||||
// a. Let cp be CodePointAt(S, k).
|
||||
auto code_point = JS::code_point_at(string, k);
|
||||
|
||||
// b. If cp.[[IsUnpairedSurrogate]] is true, then
|
||||
if (code_point.is_unpaired_surrogate) {
|
||||
// i. Set result to the string-concatenation of result and 0xFFFD (REPLACEMENT CHARACTER).
|
||||
result.append_code_point(0xfffd);
|
||||
}
|
||||
// c. Else,
|
||||
else {
|
||||
// i. Set result to the string-concatenation of result and UTF16EncodeCodePoint(cp.[[CodePoint]]).
|
||||
result.append_code_point(code_point.code_point);
|
||||
}
|
||||
|
||||
// d. Set k to k + cp.[[CodeUnitCount]].
|
||||
k += code_point.code_unit_count;
|
||||
}
|
||||
|
||||
// 7. Return result.
|
||||
return MUST(result.to_string());
|
||||
// a. Let cp be CodePointAt(S, k).
|
||||
// b. If cp.[[IsUnpairedSurrogate]] is true, then
|
||||
// i. Set result to the string-concatenation of result and 0xFFFD (REPLACEMENT CHARACTER).
|
||||
// c. Else,
|
||||
// i. Set result to the string-concatenation of result and UTF16EncodeCodePoint(cp.[[CodePoint]]).
|
||||
// d. Set k to k + cp.[[CodeUnitCount]].
|
||||
return PrimitiveString::create(vm, string->utf16_string().to_well_formed());
|
||||
}
|
||||
|
||||
// 22.1.3.32.1 TrimString ( string, where ), https://tc39.es/ecma262/#sec-trimstring
|
||||
|
|
|
@ -19,7 +19,6 @@ struct CodePoint {
|
|||
|
||||
Optional<size_t> string_index_of(Utf16View const& string, Utf16View const& search_value, size_t from_index);
|
||||
CodePoint code_point_at(Utf16View const& string, size_t position);
|
||||
String to_well_formed_string(Utf16String const&);
|
||||
static constexpr Utf8View whitespace_characters = Utf8View("\x09\x0A\x0B\x0C\x0D\x20\xC2\xA0\xE1\x9A\x80\xE2\x80\x80\xE2\x80\x81\xE2\x80\x82\xE2\x80\x83\xE2\x80\x84\xE2\x80\x85\xE2\x80\x86\xE2\x80\x87\xE2\x80\x88\xE2\x80\x89\xE2\x80\x8A\xE2\x80\xAF\xE2\x81\x9F\xE3\x80\x80\xE2\x80\xA8\xE2\x80\xA9\xEF\xBB\xBF"sv);
|
||||
ThrowCompletionOr<String> trim_string(VM&, Value string, TrimMode where);
|
||||
|
||||
|
|
|
@ -458,11 +458,6 @@ ThrowCompletionOr<Utf16String> Value::to_utf16_string(VM& vm) const
|
|||
return Utf16String::from_utf8(utf8_string);
|
||||
}
|
||||
|
||||
ThrowCompletionOr<String> Value::to_well_formed_string(VM& vm) const
|
||||
{
|
||||
return ::JS::to_well_formed_string(TRY(to_utf16_string(vm)));
|
||||
}
|
||||
|
||||
// 7.1.2 ToBoolean ( argument ), https://tc39.es/ecma262/#sec-toboolean
|
||||
bool Value::to_boolean_slow_case() const
|
||||
{
|
||||
|
|
|
@ -352,7 +352,6 @@ public:
|
|||
ThrowCompletionOr<String> to_string(VM&) const;
|
||||
ThrowCompletionOr<ByteString> to_byte_string(VM&) const;
|
||||
ThrowCompletionOr<Utf16String> to_utf16_string(VM&) const;
|
||||
ThrowCompletionOr<String> to_well_formed_string(VM&) const;
|
||||
ThrowCompletionOr<GC::Ref<PrimitiveString>> to_primitive_string(VM&);
|
||||
ThrowCompletionOr<Value> to_primitive(VM&, PreferredType preferred_type = PreferredType::Default) const;
|
||||
ThrowCompletionOr<GC::Ref<Object>> to_object(VM&) const;
|
||||
|
|
|
@ -241,7 +241,7 @@ JS::ThrowCompletionOr<Utf16String> to_utf16_string(JS::VM& vm, JS::Value value)
|
|||
|
||||
JS::ThrowCompletionOr<String> to_usv_string(JS::VM& vm, JS::Value value)
|
||||
{
|
||||
return value.to_well_formed_string(vm);
|
||||
return TRY(value.to_utf16_string(vm)).to_well_formed_utf8();
|
||||
}
|
||||
|
||||
// https://webidl.spec.whatwg.org/#invoke-a-callback-function
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue