AK+Everywhere: Recognise that surrogates in utf16 aren't all that common

For the slight cost of counting code points when converting between
encodings and a teeny bit of memory, this commit adds a fast path for
all-happy utf-16 substrings and code point operations.

This seems to be a significant chunk of time spent in many regex
benchmarks.
This commit is contained in:
Ali Mohammad Pur 2025-04-02 17:56:49 +02:00 committed by Andrew Kaster
commit eea81738cd
Notes: github-actions[bot] 2025-04-23 13:57:06 +00:00
11 changed files with 74 additions and 37 deletions

View file

@ -572,7 +572,8 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
// 2. Let length be the length of string.
// 5. Let k be 0.
// 6. Repeat, while k < length,
for (auto code_point : TRY_OR_THROW_OOM(vm, utf8_to_utf16(string))) {
auto utf16_conversion = TRY_OR_THROW_OOM(vm, utf8_to_utf16(string));
for (auto code_point : utf16_conversion.data) {
// a. Let char be the code unit at index k within string.
// b. If unescapedSet contains char, then

View file

@ -97,8 +97,8 @@ ErrorOr<String, ParseRegexPatternError> parse_regex_pattern(StringView pattern,
if (utf16_pattern_result.is_error())
return ParseRegexPatternError { "Out of memory"_string };
auto utf16_pattern = utf16_pattern_result.release_value();
Utf16View utf16_pattern_view { utf16_pattern };
auto utf16_result = utf16_pattern_result.release_value();
Utf16View utf16_pattern_view { utf16_result };
StringBuilder builder;
// If the Unicode flag is set, append each code point to the pattern. Otherwise, append each

View file

@ -34,7 +34,10 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16Data string)
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
{
return create(MUST(utf8_to_utf16(string)));
auto result = MUST(utf8_to_utf16(string));
auto impl = create(move(result.data));
impl->m_cached_view.unsafe_set_code_point_length(result.code_point_count);
return impl;
}
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
@ -42,7 +45,9 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
Utf16Data string;
string.ensure_capacity(view.length_in_code_units());
string.unchecked_append(view.data(), view.length_in_code_units());
return create(move(string));
auto impl = create(move(string));
impl->m_cached_view.unsafe_set_code_point_length(view.length_in_code_units());
return impl;
}
Utf16Data const& Utf16StringImpl::string() const
@ -52,7 +57,7 @@ Utf16Data const& Utf16StringImpl::string() const
Utf16View Utf16StringImpl::view() const
{
return Utf16View { m_string };
return m_cached_view;
}
u32 Utf16StringImpl::compute_hash() const

View file

@ -48,6 +48,7 @@ private:
mutable bool m_has_hash { false };
mutable u32 m_hash { 0 };
Utf16Data m_string;
Utf16View m_cached_view { m_string.span() };
};
}