LibWeb: Use code unit offsets in Document::find_matching_text()

We were passing in byte offsets instead of UTF-16 code unit offsets,
which could lead to crashes if the offsets found exceeded the number of
code units in text fragments on the page.

Fixes #4908.

Co-authored-by: Tim Ledbetter <tim.ledbetter@ladybird.org>
This commit is contained in:
Jelle Raaijmakers 2025-06-10 16:14:05 +02:00 committed by Jelle Raaijmakers
commit b42c2c5e8f
Notes: github-actions[bot] 2025-06-13 13:10:45 +00:00
5 changed files with 24 additions and 15 deletions

View file

@ -6160,7 +6160,7 @@ void Document::set_needs_to_refresh_scroll_state(bool b)
paintable->set_needs_to_refresh_scroll_state(b); paintable->set_needs_to_refresh_scroll_state(b);
} }
Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, CaseSensitivity case_sensitivity) Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSensitivity case_sensitivity)
{ {
// Ensure the layout tree exists before searching for text matches. // Ensure the layout tree exists before searching for text matches.
update_layout(UpdateLayoutReason::DocumentFindMatchingText); update_layout(UpdateLayoutReason::DocumentFindMatchingText);
@ -6172,16 +6172,19 @@ Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, C
if (text_blocks.is_empty()) if (text_blocks.is_empty())
return {}; return {};
Vector<GC::Root<DOM::Range>> matches; auto utf16_query = MUST(AK::utf8_to_utf16(query));
Utf16View query_view { utf16_query };
Vector<GC::Root<Range>> matches;
for (auto const& text_block : text_blocks) { for (auto const& text_block : text_blocks) {
size_t offset = 0; size_t offset = 0;
size_t i = 0; size_t i = 0;
auto const& text = text_block.text; Utf16View text_view { text_block.text };
auto* match_start_position = &text_block.positions[0]; auto* match_start_position = text_block.positions.data();
while (true) { while (true) {
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
? text.find_byte_offset_ignoring_case(query, offset) ? text_view.find_code_unit_offset_ignoring_case(query_view, offset)
: text.find_byte_offset(query, offset); : text_view.find_code_unit_offset(query_view, offset);
if (!match_index.has_value()) if (!match_index.has_value())
break; break;
@ -6192,16 +6195,16 @@ Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, C
auto& start_dom_node = match_start_position->dom_node; auto& start_dom_node = match_start_position->dom_node;
auto* match_end_position = match_start_position; auto* match_end_position = match_start_position;
for (; i < text_block.positions.size() - 1 && (match_index.value() + query.bytes_as_string_view().length() > text_block.positions[i + 1].start_offset); ++i) for (; i < text_block.positions.size() - 1 && (match_index.value() + query_view.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
match_end_position = &text_block.positions[i + 1]; match_end_position = &text_block.positions[i + 1];
auto& end_dom_node = match_end_position->dom_node; auto& end_dom_node = match_end_position->dom_node;
auto end_position = match_index.value() + query.bytes_as_string_view().length() - match_end_position->start_offset; auto end_position = match_index.value() + query_view.length_in_code_units() - match_end_position->start_offset;
matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position)); matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position));
match_start_position = match_end_position; match_start_position = match_end_position;
offset = match_index.value() + query.bytes_as_string_view().length() + 1; offset = match_index.value() + query_view.length_in_code_units() + 1;
if (offset >= text.bytes_as_string_view().length()) if (offset >= text_view.length_in_code_units())
break; break;
} }
} }

View file

@ -814,7 +814,7 @@ public:
// Does document represent an embedded svg img // Does document represent an embedded svg img
[[nodiscard]] bool is_decoded_svg() const; [[nodiscard]] bool is_decoded_svg() const;
Vector<GC::Root<DOM::Range>> find_matching_text(String const&, CaseSensitivity); Vector<GC::Root<Range>> find_matching_text(String const&, CaseSensitivity);
void parse_html_from_a_string(StringView); void parse_html_from_a_string(StringView);
static GC::Ref<Document> parse_html_unsafe(JS::VM&, StringView); static GC::Ref<Document> parse_html_unsafe(JS::VM&, StringView);

View file

@ -60,7 +60,7 @@ void Viewport::update_text_blocks()
if (layout_node.is_box() || layout_node.is_generated()) { if (layout_node.is_box() || layout_node.is_generated()) {
if (!builder.is_empty()) { if (!builder.is_empty()) {
text_blocks.append({ builder.to_string_without_validation(), text_positions }); text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
current_start_position = 0; current_start_position = 0;
text_positions.clear_with_capacity(); text_positions.clear_with_capacity();
builder.clear(); builder.clear();
@ -80,7 +80,8 @@ void Viewport::update_text_blocks()
} }
auto const& current_node_text = text_node->text_for_rendering(); auto const& current_node_text = text_node->text_for_rendering();
current_start_position += current_node_text.bytes_as_string_view().length(); auto const current_node_text_utf16 = MUST(AK::utf8_to_utf16(current_node_text));
current_start_position += current_node_text_utf16.data.size();
builder.append(move(current_node_text)); builder.append(move(current_node_text));
} }
} }
@ -89,7 +90,7 @@ void Viewport::update_text_blocks()
}); });
if (!builder.is_empty()) if (!builder.is_empty())
text_blocks.append({ builder.to_string_without_validation(), text_positions }); text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
m_text_blocks = move(text_blocks); m_text_blocks = move(text_blocks);
} }

View file

@ -24,7 +24,7 @@ public:
size_t start_offset { 0 }; size_t start_offset { 0 };
}; };
struct TextBlock { struct TextBlock {
String text; AK::Utf16ConversionResult text;
Vector<TextPosition> positions; Vector<TextPosition> positions;
}; };
Vector<TextBlock> const& text_blocks(); Vector<TextBlock> const& text_blocks();

View file

@ -0,0 +1,5 @@
<!DOCTYPE html>
😭a
<script>
window.find('a');
</script>