diff --git a/Libraries/LibWeb/DOM/Document.cpp b/Libraries/LibWeb/DOM/Document.cpp index 7abccaa1fb6..8f5e387a71b 100644 --- a/Libraries/LibWeb/DOM/Document.cpp +++ b/Libraries/LibWeb/DOM/Document.cpp @@ -6160,7 +6160,7 @@ void Document::set_needs_to_refresh_scroll_state(bool b) paintable->set_needs_to_refresh_scroll_state(b); } -Vector> Document::find_matching_text(String const& query, CaseSensitivity case_sensitivity) +Vector> Document::find_matching_text(String const& query, CaseSensitivity case_sensitivity) { // Ensure the layout tree exists before searching for text matches. update_layout(UpdateLayoutReason::DocumentFindMatchingText); @@ -6172,16 +6172,19 @@ Vector> Document::find_matching_text(String const& query, C if (text_blocks.is_empty()) return {}; - Vector> matches; + auto utf16_query = MUST(AK::utf8_to_utf16(query)); + Utf16View query_view { utf16_query }; + + Vector> matches; for (auto const& text_block : text_blocks) { size_t offset = 0; size_t i = 0; - auto const& text = text_block.text; - auto* match_start_position = &text_block.positions[0]; + Utf16View text_view { text_block.text }; + auto* match_start_position = text_block.positions.data(); while (true) { auto match_index = case_sensitivity == CaseSensitivity::CaseInsensitive - ? text.find_byte_offset_ignoring_case(query, offset) - : text.find_byte_offset(query, offset); + ? text_view.find_code_unit_offset_ignoring_case(query_view, offset) + : text_view.find_code_unit_offset(query_view, offset); if (!match_index.has_value()) break; @@ -6192,16 +6195,16 @@ Vector> Document::find_matching_text(String const& query, C auto& start_dom_node = match_start_position->dom_node; auto* match_end_position = match_start_position; - for (; i < text_block.positions.size() - 1 && (match_index.value() + query.bytes_as_string_view().length() > text_block.positions[i + 1].start_offset); ++i) + for (; i < text_block.positions.size() - 1 && (match_index.value() + query_view.length_in_code_units() > text_block.positions[i + 1].start_offset); ++i) match_end_position = &text_block.positions[i + 1]; auto& end_dom_node = match_end_position->dom_node; - auto end_position = match_index.value() + query.bytes_as_string_view().length() - match_end_position->start_offset; + auto end_position = match_index.value() + query_view.length_in_code_units() - match_end_position->start_offset; matches.append(Range::create(start_dom_node, start_position, end_dom_node, end_position)); match_start_position = match_end_position; - offset = match_index.value() + query.bytes_as_string_view().length() + 1; - if (offset >= text.bytes_as_string_view().length()) + offset = match_index.value() + query_view.length_in_code_units() + 1; + if (offset >= text_view.length_in_code_units()) break; } } diff --git a/Libraries/LibWeb/DOM/Document.h b/Libraries/LibWeb/DOM/Document.h index 0d5f6d747bb..15c3e7d747d 100644 --- a/Libraries/LibWeb/DOM/Document.h +++ b/Libraries/LibWeb/DOM/Document.h @@ -814,7 +814,7 @@ public: // Does document represent an embedded svg img [[nodiscard]] bool is_decoded_svg() const; - Vector> find_matching_text(String const&, CaseSensitivity); + Vector> find_matching_text(String const&, CaseSensitivity); void parse_html_from_a_string(StringView); static GC::Ref parse_html_unsafe(JS::VM&, StringView); diff --git a/Libraries/LibWeb/Layout/Viewport.cpp b/Libraries/LibWeb/Layout/Viewport.cpp index 29d45cb020e..6fcc55ced36 100644 --- a/Libraries/LibWeb/Layout/Viewport.cpp +++ b/Libraries/LibWeb/Layout/Viewport.cpp @@ -60,7 +60,7 @@ void Viewport::update_text_blocks() if (layout_node.is_box() || layout_node.is_generated()) { if (!builder.is_empty()) { - text_blocks.append({ builder.to_string_without_validation(), text_positions }); + text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions }); current_start_position = 0; text_positions.clear_with_capacity(); builder.clear(); @@ -80,7 +80,8 @@ void Viewport::update_text_blocks() } auto const& current_node_text = text_node->text_for_rendering(); - current_start_position += current_node_text.bytes_as_string_view().length(); + auto const current_node_text_utf16 = MUST(AK::utf8_to_utf16(current_node_text)); + current_start_position += current_node_text_utf16.data.size(); builder.append(move(current_node_text)); } } @@ -89,7 +90,7 @@ void Viewport::update_text_blocks() }); if (!builder.is_empty()) - text_blocks.append({ builder.to_string_without_validation(), text_positions }); + text_blocks.append({ MUST(AK::utf8_to_utf16(builder.string_view())), text_positions }); m_text_blocks = move(text_blocks); } diff --git a/Libraries/LibWeb/Layout/Viewport.h b/Libraries/LibWeb/Layout/Viewport.h index f6b6d32e0eb..a39a2b1fd6b 100644 --- a/Libraries/LibWeb/Layout/Viewport.h +++ b/Libraries/LibWeb/Layout/Viewport.h @@ -24,7 +24,7 @@ public: size_t start_offset { 0 }; }; struct TextBlock { - String text; + AK::Utf16ConversionResult text; Vector positions; }; Vector const& text_blocks(); diff --git a/Tests/LibWeb/Crash/DOM/Window-find-code-units.html b/Tests/LibWeb/Crash/DOM/Window-find-code-units.html new file mode 100644 index 00000000000..b073bea2858 --- /dev/null +++ b/Tests/LibWeb/Crash/DOM/Window-find-code-units.html @@ -0,0 +1,5 @@ + +😭a +