mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-07-24 18:02:20 +00:00
AK+LibJS: Do not set UTF-16 code point length to its code unit length
This commit is contained in:
parent
594194eb60
commit
efa9737cf7
Notes:
github-actions[bot]
2025-06-25 20:22:15 +00:00
Author: https://github.com/trflynn89
Commit: efa9737cf7
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5214
Reviewed-by: https://github.com/gmta ✅
3 changed files with 25 additions and 1 deletions
|
@ -113,6 +113,13 @@ public:
|
||||||
size_t length_in_code_units() const { return m_code_units.size(); }
|
size_t length_in_code_units() const { return m_code_units.size(); }
|
||||||
size_t length_in_code_points() const;
|
size_t length_in_code_points() const;
|
||||||
|
|
||||||
|
Optional<size_t> length_in_code_points_if_known() const
|
||||||
|
{
|
||||||
|
if (m_length_in_code_points == NumericLimits<size_t>::max())
|
||||||
|
return {};
|
||||||
|
return m_length_in_code_points;
|
||||||
|
}
|
||||||
|
|
||||||
Utf16CodePointIterator begin() const { return { begin_ptr(), m_code_units.size() }; }
|
Utf16CodePointIterator begin() const { return { begin_ptr(), m_code_units.size() }; }
|
||||||
Utf16CodePointIterator end() const { return { end_ptr(), 0 }; }
|
Utf16CodePointIterator end() const { return { end_ptr(), 0 }; }
|
||||||
|
|
||||||
|
|
|
@ -45,8 +45,11 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
|
||||||
Utf16Data string;
|
Utf16Data string;
|
||||||
string.ensure_capacity(view.length_in_code_units());
|
string.ensure_capacity(view.length_in_code_units());
|
||||||
string.unchecked_append(view.data(), view.length_in_code_units());
|
string.unchecked_append(view.data(), view.length_in_code_units());
|
||||||
|
|
||||||
auto impl = create(move(string));
|
auto impl = create(move(string));
|
||||||
impl->m_cached_view.unsafe_set_code_point_length(view.length_in_code_units());
|
if (auto length_in_code_points = view.length_in_code_points_if_known(); length_in_code_points.has_value())
|
||||||
|
impl->m_cached_view.unsafe_set_code_point_length(*length_in_code_points);
|
||||||
|
|
||||||
return impl;
|
return impl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -212,3 +212,17 @@ test("string coercion", () => {
|
||||||
expect(result[0]).toBe("1");
|
expect(result[0]).toBe("1");
|
||||||
expect(result.index).toBe(0);
|
expect(result.index).toBe(0);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test("cached UTF-16 code point length", () => {
|
||||||
|
// This exercises a regression where we incorrectly cached the code point length of the `match` string,
|
||||||
|
// causing subsequent code point lookups on that string to be incorrect.
|
||||||
|
const regex = /\p{Emoji_Presentation}/u;
|
||||||
|
|
||||||
|
let result = regex.exec("😀");
|
||||||
|
let match = result[0];
|
||||||
|
|
||||||
|
result = regex.exec(match);
|
||||||
|
match = result[0];
|
||||||
|
|
||||||
|
expect(match.codePointAt(0)).toBe(0x1f600);
|
||||||
|
});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue