mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-07 00:29:15 +00:00
LibWeb: Use code unit offsets in Document::find_matching_text()
We were passing in byte offsets instead of UTF-16 code unit offsets, which could lead to crashes if the offsets found exceeded the number of code units in text fragments on the page. Fixes #4908. Co-authored-by: Tim Ledbetter <tim.ledbetter@ladybird.org>
This commit is contained in:
parent
cc0a28ee7d
commit
b42c2c5e8f
Notes:
github-actions[bot]
2025-06-13 13:10:45 +00:00
Author: https://github.com/gmta
Commit: b42c2c5e8f
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5067
Reviewed-by: https://github.com/tcl3
Reviewed-by: https://github.com/trflynn89
5 changed files with 24 additions and 15 deletions
|
@ -6160,7 +6160,7 @@ void Document::set_needs_to_refresh_scroll_state(bool b)
|
|||
paintable->set_needs_to_refresh_scroll_state(b);
|
||||
}
|
||||
|
||||
Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, CaseSensitivity case_sensitivity)
|
||||
Vector<GC::Root<Range>> Document::find_matching_text(String const& query, CaseSensitivity case_sensitivity)
|
||||
{
|
||||
// Ensure the layout tree exists before searching for text matches.
|
||||
update_layout(UpdateLayoutReason::DocumentFindMatchingText);
|
||||
|
@ -6172,16 +6172,19 @@ Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, C
|
|||
if (text_blocks.is_empty())
|
||||
return {};
|
||||
|
||||
Vector<GC::Root<DOM::Range>> matches;
|
||||
auto utf16_query = MUST(AK::utf8_to_utf16(query));
|
||||
Utf16View query_view { utf16_query };
|
||||
|
||||
Vector<GC::Root<Range>> matches;
|
||||
for (auto const& text_block : text_blocks) {
|
||||
size_t offset = 0;
|
||||
size_t i = 0;
|
||||
auto const& text = text_block.text;
|
||||
auto* match_start_position = &text_block.positions[0];
|
||||
Utf16View text_view { text_block.text };
|
||||
auto* match_start_position = text_block.positions.data();
|
||||
while (true) {
|
||||
auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive
|
||||
? text.find_byte_offset_ignoring_case(query, offset)
|
||||
: text.find_byte_offset(query, offset);
|
||||
? text_view.find_code_unit_offset_ignoring_case(query_view, offset)
|
||||
: text_view.find_code_unit_offset(query_view, offset);
|
||||
if (!match_index.has_value())
|
||||
break;
|
||||
|
||||
|
@ -6192,16 +6195,16 @@ Vector<GC::Root<DOM::Range>> Document::find_matching_text(String const& query, C
|
|||
auto& start_dom_node = match_start_position->dom_node;
|
||||
|
||||
auto* match_end_position = match_start_position;
|
||||
for (; i < text_block.positions.size() - 1 && (match_index.value() + query.bytes_as_string_view().length() > text_block.positions[i + 1].start_offset); ++i)
|
||||
for (; i < text_block.positions.size() - 1 && (match_index.value() + query_view.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i)
|
||||
match_end_position = &text_block.positions[i + 1];
|
||||
|
||||
auto& end_dom_node = match_end_position->dom_node;
|
||||
auto end_position = match_index.value() + query.bytes_as_string_view().length() - match_end_position->start_offset;
|
||||
auto end_position = match_index.value() + query_view.length_in_code_units() - match_end_position->start_offset;
|
||||
|
||||
matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position));
|
||||
match_start_position = match_end_position;
|
||||
offset = match_index.value() + query.bytes_as_string_view().length() + 1;
|
||||
if (offset >= text.bytes_as_string_view().length())
|
||||
offset = match_index.value() + query_view.length_in_code_units() + 1;
|
||||
if (offset >= text_view.length_in_code_units())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -814,7 +814,7 @@ public:
|
|||
// Does document represent an embedded svg img
|
||||
[[nodiscard]] bool is_decoded_svg() const;
|
||||
|
||||
Vector<GC::Root<DOM::Range>> find_matching_text(String const&, CaseSensitivity);
|
||||
Vector<GC::Root<Range>> find_matching_text(String const&, CaseSensitivity);
|
||||
|
||||
void parse_html_from_a_string(StringView);
|
||||
static GC::Ref<Document> parse_html_unsafe(JS::VM&, StringView);
|
||||
|
|
|
@ -60,7 +60,7 @@ void Viewport::update_text_blocks()
|
|||
|
||||
if (layout_node.is_box() || layout_node.is_generated()) {
|
||||
if (!builder.is_empty()) {
|
||||
text_blocks.append({ builder.to_string_without_validation(), text_positions });
|
||||
text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
|
||||
current_start_position = 0;
|
||||
text_positions.clear_with_capacity();
|
||||
builder.clear();
|
||||
|
@ -80,7 +80,8 @@ void Viewport::update_text_blocks()
|
|||
}
|
||||
|
||||
auto const& current_node_text = text_node->text_for_rendering();
|
||||
current_start_position += current_node_text.bytes_as_string_view().length();
|
||||
auto const current_node_text_utf16 = MUST(AK::utf8_to_utf16(current_node_text));
|
||||
current_start_position += current_node_text_utf16.data.size();
|
||||
builder.append(move(current_node_text));
|
||||
}
|
||||
}
|
||||
|
@ -89,7 +90,7 @@ void Viewport::update_text_blocks()
|
|||
});
|
||||
|
||||
if (!builder.is_empty())
|
||||
text_blocks.append({ builder.to_string_without_validation(), text_positions });
|
||||
text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions });
|
||||
|
||||
m_text_blocks = move(text_blocks);
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ public:
|
|||
size_t start_offset { 0 };
|
||||
};
|
||||
struct TextBlock {
|
||||
String text;
|
||||
AK::Utf16ConversionResult text;
|
||||
Vector<TextPosition> positions;
|
||||
};
|
||||
Vector<TextBlock> const& text_blocks();
|
||||
|
|
5
Tests/LibWeb/Crash/DOM/Window-find-code-units.html
Normal file
5
Tests/LibWeb/Crash/DOM/Window-find-code-units.html
Normal file
|
@ -0,0 +1,5 @@
|
|||
<!DOCTYPE html>
|
||||
😭a
|
||||
<script>
|
||||
window.find('a');
|
||||
</script>
|
Loading…
Add table
Add a link
Reference in a new issue