From efa9737cf7cebcf64a036f32019b055a6a67426d Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 25 Jun 2025 15:52:01 -0400 Subject: [PATCH] AK+LibJS: Do not set UTF-16 code point length to its code unit length --- AK/Utf16View.h | 7 +++++++ Libraries/LibJS/Runtime/Utf16String.cpp | 5 ++++- .../Tests/builtins/RegExp/RegExp.prototype.exec.js | 14 ++++++++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/AK/Utf16View.h b/AK/Utf16View.h index ed2dee466c2..aaba8b20aef 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -113,6 +113,13 @@ public: size_t length_in_code_units() const { return m_code_units.size(); } size_t length_in_code_points() const; + Optional length_in_code_points_if_known() const + { + if (m_length_in_code_points == NumericLimits::max()) + return {}; + return m_length_in_code_points; + } + Utf16CodePointIterator begin() const { return { begin_ptr(), m_code_units.size() }; } Utf16CodePointIterator end() const { return { end_ptr(), 0 }; } diff --git a/Libraries/LibJS/Runtime/Utf16String.cpp b/Libraries/LibJS/Runtime/Utf16String.cpp index 5b7acc96de6..ba94c32a82c 100644 --- a/Libraries/LibJS/Runtime/Utf16String.cpp +++ b/Libraries/LibJS/Runtime/Utf16String.cpp @@ -45,8 +45,11 @@ NonnullRefPtr Utf16StringImpl::create(Utf16View const& view) Utf16Data string; string.ensure_capacity(view.length_in_code_units()); string.unchecked_append(view.data(), view.length_in_code_units()); + auto impl = create(move(string)); - impl->m_cached_view.unsafe_set_code_point_length(view.length_in_code_units()); + if (auto length_in_code_points = view.length_in_code_points_if_known(); length_in_code_points.has_value()) + impl->m_cached_view.unsafe_set_code_point_length(*length_in_code_points); + return impl; } diff --git a/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js b/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js index 40dfa46ffc0..e861ace858d 100644 --- a/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js +++ b/Libraries/LibJS/Tests/builtins/RegExp/RegExp.prototype.exec.js @@ -212,3 +212,17 @@ test("string coercion", () => { expect(result[0]).toBe("1"); expect(result.index).toBe(0); }); + +test("cached UTF-16 code point length", () => { + // This exercises a regression where we incorrectly cached the code point length of the `match` string, + // causing subsequent code point lookups on that string to be incorrect. + const regex = /\p{Emoji_Presentation}/u; + + let result = regex.exec("😀"); + let match = result[0]; + + result = regex.exec(match); + match = result[0]; + + expect(match.codePointAt(0)).toBe(0x1f600); +});