mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-09-02 15:46:33 +00:00
AK+Everywhere: Recognise that surrogates in utf16 aren't all that common
For the slight cost of counting code points when converting between encodings and a teeny bit of memory, this commit adds a fast path for all-happy utf-16 substrings and code point operations. This seems to be a significant chunk of time spent in many regex benchmarks.
This commit is contained in:
parent
86c756a589
commit
eea81738cd
Notes:
github-actions[bot]
2025-04-23 13:57:06 +00:00
Author: https://github.com/alimpfard
Commit: eea81738cd
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/4196
Reviewed-by: https://github.com/ADKaster ✅
11 changed files with 74 additions and 37 deletions
|
@ -21,9 +21,13 @@ namespace AK {
|
|||
|
||||
using Utf16Data = Vector<u16, 1>;
|
||||
|
||||
ErrorOr<Utf16Data> utf8_to_utf16(StringView, Endianness = Endianness::Host);
|
||||
ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const&, Endianness = Endianness::Host);
|
||||
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&, Endianness = Endianness::Host);
|
||||
struct Utf16ConversionResult {
|
||||
Utf16Data data;
|
||||
size_t code_point_count;
|
||||
};
|
||||
ErrorOr<Utf16ConversionResult> utf8_to_utf16(StringView, Endianness = Endianness::Host);
|
||||
ErrorOr<Utf16ConversionResult> utf8_to_utf16(Utf8View const&, Endianness = Endianness::Host);
|
||||
ErrorOr<Utf16ConversionResult> utf32_to_utf16(Utf32View const&, Endianness = Endianness::Host);
|
||||
ErrorOr<void> code_point_to_utf16(Utf16Data&, u32, Endianness = Endianness::Host);
|
||||
|
||||
[[nodiscard]] bool validate_utf16_le(ReadonlyBytes);
|
||||
|
@ -77,6 +81,13 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
Utf16View(Utf16ConversionResult&&) = delete;
|
||||
explicit Utf16View(Utf16ConversionResult const& conversion_result)
|
||||
: m_code_units(conversion_result.data)
|
||||
, m_length_in_code_points(conversion_result.code_point_count)
|
||||
{
|
||||
}
|
||||
|
||||
template<size_t Size>
|
||||
Utf16View(char16_t const (&code_units)[Size])
|
||||
: m_code_units(
|
||||
|
@ -95,6 +106,8 @@ public:
|
|||
ErrorOr<ByteString> to_byte_string(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
|
||||
ErrorOr<String> to_utf8(AllowInvalidCodeUnits = AllowInvalidCodeUnits::No) const;
|
||||
|
||||
void unsafe_set_code_point_length(size_t length) const { m_length_in_code_points = length; }
|
||||
|
||||
bool is_null() const { return m_code_units.is_null(); }
|
||||
bool is_empty() const { return m_code_units.is_empty(); }
|
||||
size_t length_in_code_units() const { return m_code_units.size(); }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue