From 01ede6cc580bb79fd13c97b52e616fb4a742c549 Mon Sep 17 00:00:00 2001 From: Jelle Raaijmakers Date: Thu, 12 Jun 2025 20:26:25 +0200 Subject: [PATCH] AK: Add fast paths for `Utf8View::*_offset_of()` methods If all code points in the string are represented by a single byte, we can simply take the fast path of returning the input for these methods. --- AK/Utf8View.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index a40b6817c9b..648516c281f 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -33,6 +33,11 @@ Utf8CodePointIterator Utf8View::iterator_at_byte_offset_without_validation(size_ size_t Utf8View::code_point_offset_of(size_t byte_offset) const { VERIFY(byte_offset < byte_length()); + + // Fast path: each code point is represented by a single byte. + if (length() == byte_length()) + return byte_offset; + size_t code_point_offset = 0; for (auto it = begin(); !it.done(); ++it) { if (it.m_ptr > begin_ptr() + byte_offset) @@ -44,8 +49,13 @@ size_t Utf8View::code_point_offset_of(size_t byte_offset) const size_t Utf8View::byte_offset_of(size_t code_point_offset) const { - size_t byte_offset = 0; + VERIFY(code_point_offset < length()); + // Fast path: each code point is represented by a single byte. + if (length() == byte_length()) + return code_point_offset; + + size_t byte_offset = 0; for (auto it = begin(); !it.done(); ++it) { if (code_point_offset == 0) return byte_offset; @@ -53,7 +63,6 @@ size_t Utf8View::byte_offset_of(size_t code_point_offset) const byte_offset += it.underlying_code_point_length_in_bytes(); --code_point_offset; } - return byte_offset; }