LibWeb: Use code unit offsets in Document::find_matching_text()

We were passing in byte offsets instead of UTF-16 code unit offsets,
which could lead to crashes if the offsets found exceeded the number of
code units in text fragments on the page.

Fixes #4908.

Co-authored-by: Tim Ledbetter <tim.ledbetter@ladybird.org>
This commit is contained in:
Jelle Raaijmakers 2025-06-10 16:14:05 +02:00 committed by Jelle Raaijmakers
commit b42c2c5e8f
Notes: github-actions[bot] 2025-06-13 13:10:45 +00:00
5 changed files with 24 additions and 15 deletions

View file

@ -6160,7 +6160,7 @@ void Document::set_needs_to_refresh_scroll_state(bool b)
paintable->set_needs_to_refresh_scroll_state(b);
}
Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, CaseSensitivity case_sensitivity)
Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSensitivity case_sensitivity)
{
// Ensure the layout tree exists before searching for text matches.
update_layout(UpdateLayoutReason::DocumentFindMatchingText);
@ -6172,16 +6172,19 @@ Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, C
if (text_blocks.is_empty())
return {};
Vector<GC::Root<DOM::Range>> matches;
auto utf16_query = MUST(AK::utf8_to_utf16(query));
Utf16View query_view { utf16_query };
Vector<GC::Root<Range>> matches;
for (auto const& text_block : text_blocks) {
size_t offset = 0;
size_t i = 0;
auto const& text = text_block.text;
auto* match_start_position = &text_block.positions[0];
Utf16View text_view { text_block.text };
auto* match_start_position = text_block.positions.data();
while (true) {
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
? text.find_byte_offset_ignoring_case(query, offset)
: text.find_byte_offset(query, offset);
? text_view.find_code_unit_offset_ignoring_case(query_view, offset)
: text_view.find_code_unit_offset(query_view, offset);
if (!match_index.has_value())
break;
@ -6192,16 +6195,16 @@ Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, C
auto& start_dom_node = match_start_position->dom_node;
auto* match_end_position = match_start_position;
for (; i < text_block.positions.size() - 1 && (match_index.value() + query.bytes_as_string_view().length() > text_block.positions[i + 1].start_offset); ++i)
for (; i < text_block.positions.size() - 1 && (match_index.value() + query_view.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
match_end_position = &text_block.positions[i + 1];
auto& end_dom_node = match_end_position->dom_node;
auto end_position = match_index.value() + query.bytes_as_string_view().length() - match_end_position->start_offset;
auto end_position = match_index.value() + query_view.length_in_code_units() - match_end_position->start_offset;
matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position));
match_start_position = match_end_position;
offset = match_index.value() + query.bytes_as_string_view().length() + 1;
if (offset >= text.bytes_as_string_view().length())
offset = match_index.value() + query_view.length_in_code_units() + 1;
if (offset >= text_view.length_in_code_units())
break;
}
}