AK: Add fast paths for Utf8View::*_offset_of() methods
Some checks are pending
CI / macOS, arm64, Sanitizer_CI, Clang (push) Waiting to run
CI / Linux, x86_64, Fuzzers_CI, Clang (push) Waiting to run
CI / Linux, x86_64, Sanitizer_CI, GNU (push) Waiting to run
CI / Linux, x86_64, Sanitizer_CI, Clang (push) Waiting to run
Package the js repl as a binary artifact / macOS, arm64 (push) Waiting to run
Package the js repl as a binary artifact / Linux, x86_64 (push) Waiting to run
Run test262 and test-wasm / run_and_update_results (push) Waiting to run
Lint Code / lint (push) Waiting to run
Label PRs with merge conflicts / auto-labeler (push) Waiting to run
Push notes / build (push) Waiting to run

If all code points in the string are represented by a single byte, we
can simply take the fast path of returning the input for these methods.
This commit is contained in:
Jelle Raaijmakers 2025-06-12 20:26:25 +02:00 committed by Jelle Raaijmakers
commit 01ede6cc58
Notes: github-actions[bot] 2025-06-13 13:09:30 +00:00

View file

@ -33,6 +33,11 @@ Utf8CodePointIterator Utf8View::iterator_at_byte_offset_without_validation(size_
size_t Utf8View::code_point_offset_of(size_t byte_offset) const size_t Utf8View::code_point_offset_of(size_t byte_offset) const
{ {
VERIFY(byte_offset < byte_length()); VERIFY(byte_offset < byte_length());
// Fast path: each code point is represented by a single byte.
if (length() == byte_length())
return byte_offset;
size_t code_point_offset = 0; size_t code_point_offset = 0;
for (auto it = begin(); !it.done(); ++it) { for (auto it = begin(); !it.done(); ++it) {
if (it.m_ptr > begin_ptr() + byte_offset) if (it.m_ptr > begin_ptr() + byte_offset)
@ -44,8 +49,13 @@ size_t Utf8View::code_point_offset_of(size_t byte_offset) const
size_t Utf8View::byte_offset_of(size_t code_point_offset) const size_t Utf8View::byte_offset_of(size_t code_point_offset) const
{ {
size_t byte_offset = 0; VERIFY(code_point_offset < length());
// Fast path: each code point is represented by a single byte.
if (length() == byte_length())
return code_point_offset;
size_t byte_offset = 0;
for (auto it = begin(); !it.done(); ++it) { for (auto it = begin(); !it.done(); ++it) {
if (code_point_offset == 0) if (code_point_offset == 0)
return byte_offset; return byte_offset;
@ -53,7 +63,6 @@ size_t Utf8View::byte_offset_of(size_t code_point_offset) const
byte_offset += it.underlying_code_point_length_in_bytes(); byte_offset += it.underlying_code_point_length_in_bytes();
--code_point_offset; --code_point_offset;
} }
return byte_offset; return byte_offset;
} }