AK: Improve performance of ASCII case conversions

Don't use a Vector to form the transformed string. We can construct the
string immediately and store the result in its buffer, and thus avoid a
double allocation.

In a synthetic benchmark, lowercasing a 500 character ASCII string
1 million times reduced from 550ms to 65ms on my machine.
This commit is contained in:
Timothy Flynn 2025-04-07 08:51:36 -04:00
commit fdf259a8f5

View file

@ -8,6 +8,7 @@
#include <AK/Array.h> #include <AK/Array.h>
#include <AK/Checked.h> #include <AK/Checked.h>
#include <AK/Endian.h> #include <AK/Endian.h>
#include <AK/Enumerate.h>
#include <AK/FlyString.h> #include <AK/FlyString.h>
#include <AK/Format.h> #include <AK/Format.h>
#include <AK/MemMem.h> #include <AK/MemMem.h>
@ -377,13 +378,15 @@ String String::to_ascii_lowercase() const
if (!any_of(bytes(), is_ascii_upper_alpha)) if (!any_of(bytes(), is_ascii_upper_alpha))
return *this; return *this;
Vector<u8> lowercase_bytes; String result;
lowercase_bytes.ensure_capacity(bytes().size());
for (auto character : bytes_as_string_view()) MUST(result.replace_with_new_string(byte_count(), [&](Bytes buffer) -> ErrorOr<void> {
lowercase_bytes.unchecked_append(AK::to_ascii_lowercase(character)); for (auto [i, byte] : enumerate(bytes()))
buffer[i] = static_cast<u8>(AK::to_ascii_lowercase(byte));
return {};
}));
return String::from_utf8_without_validation(lowercase_bytes); return result;
} }
String String::to_ascii_uppercase() const String String::to_ascii_uppercase() const
@ -391,13 +394,15 @@ String String::to_ascii_uppercase() const
if (!any_of(bytes(), is_ascii_lower_alpha)) if (!any_of(bytes(), is_ascii_lower_alpha))
return *this; return *this;
Vector<u8> uppercase_bytes; String result;
uppercase_bytes.ensure_capacity(bytes().size());
for (auto character : bytes_as_string_view()) MUST(result.replace_with_new_string(byte_count(), [&](Bytes buffer) -> ErrorOr<void> {
uppercase_bytes.unchecked_append(AK::to_ascii_uppercase(character)); for (auto [i, byte] : enumerate(bytes()))
buffer[i] = static_cast<u8>(AK::to_ascii_uppercase(byte));
return {};
}));
return String::from_utf8_without_validation(uppercase_bytes); return result;
} }
bool String::equals_ignoring_ascii_case(String const& other) const bool String::equals_ignoring_ascii_case(String const& other) const