LibWeb: Treat DOM::Range offsets as UTF-16 code unit offsets

We generated `PaintableFragment`s with a start and length represented in
UTF-8 byte offsets, but failed to consider that the offsets in a
`DOM::Range` are actually expressed in UTF-16 code units.

This is a bit of a mess: almost all web specs use UTF-16 code units as
the unit for indexing into text nodes, but we almost exclusively use
UTF-8 in our code base. Arguably the best thing would for us to use
UTF-16 everywhere as well: it prevents these mismatches in our
implementations for the price of a bit more memory usage - and even that
could potentially be optimized for.

But for now, try to do the correct thing and lazily allocate UTF-16 data
in a `PaintableFragment` whenever we need to index into it or if we're
asked to determine the code unit offset of a pixel position.
This commit is contained in:
Jelle Raaijmakers 2025-06-12 13:19:52 +02:00 committed by Jelle Raaijmakers
commit 3df83dade8
Notes: github-actions[bot] 2025-06-13 13:09:49 +00:00
6 changed files with 110 additions and 36 deletions

View file

@ -212,7 +212,7 @@ static CSSPixelPoint compute_mouse_event_offset(CSSPixelPoint position, Painting
}
// https://drafts.csswg.org/css-ui/#propdef-user-select
static void set_user_selection(GC::Ptr<DOM::Node> anchor_node, unsigned anchor_offset, GC::Ptr<DOM::Node> focus_node, unsigned focus_offset, Selection::Selection* selection, CSS::UserSelect user_select)
static void set_user_selection(GC::Ptr<DOM::Node> anchor_node, size_t anchor_offset, GC::Ptr<DOM::Node> focus_node, size_t focus_offset, Selection::Selection* selection, CSS::UserSelect user_select)
{
// https://drafts.csswg.org/css-ui/#valdef-user-select-contain
// NOTE: This is clamping the focus node to any node with user-select: contain that stands between it and the anchor node.

View file

@ -670,7 +670,14 @@ void paint_cursor_if_needed(PaintContext& context, TextPaintable const& paintabl
return;
// NOTE: This checks if the cursor is before the start or after the end of the fragment. If it is at the end, after all text, it should still be painted.
if (cursor_position->offset() < (unsigned)fragment.start() || cursor_position->offset() > (unsigned)(fragment.start() + fragment.length()))
size_t cursor_position_byte_offset = 0;
if (cursor_position->offset() == fragment.utf16_view().length_in_code_units()) {
cursor_position_byte_offset = fragment.utf8_view().byte_length();
} else {
auto cursor_position_code_point_offset = fragment.utf16_view().code_point_offset_of(cursor_position->offset());
cursor_position_byte_offset = fragment.utf8_view().byte_offset_of(cursor_position_code_point_offset);
}
if (cursor_position_byte_offset < fragment.start() || cursor_position_byte_offset > (fragment.start() + fragment.length()))
return;
auto active_element = document.active_element();
@ -687,10 +694,12 @@ void paint_cursor_if_needed(PaintContext& context, TextPaintable const& paintabl
auto fragment_rect = fragment.absolute_rect();
auto text = fragment.string_view();
auto const& font = fragment.glyph_run() ? fragment.glyph_run()->font() : fragment.layout_node().first_available_font();
auto utf8_text = fragment.utf8_view();
auto cursor_offset = font.width(utf8_text.substring_view(fragment.start(), cursor_position_byte_offset - fragment.start()));
CSSPixelRect cursor_rect {
fragment_rect.x() + CSSPixels::nearest_value_for(font.width(text.substring_view(0, document.cursor_position()->offset() - fragment.start()))),
fragment_rect.x() + CSSPixels::nearest_value_for(cursor_offset),
fragment_rect.top(),
1,
fragment_rect.height()
@ -1182,7 +1191,7 @@ TraversalDecision PaintableWithLines::hit_test(CSSPixelPoint position, HitTestTy
if (fragment_absolute_rect.contains(transformed_position_adjusted_by_scroll_offset)) {
if (fragment.paintable().hit_test(transformed_position_adjusted_by_scroll_offset, type, callback) == TraversalDecision::Break)
return TraversalDecision::Break;
HitTestResult hit_test_result { const_cast<Paintable&>(fragment.paintable()), fragment.text_index_at(transformed_position_adjusted_by_scroll_offset), 0, 0 };
HitTestResult hit_test_result { const_cast<Paintable&>(fragment.paintable()), fragment.index_in_node_for_point(transformed_position_adjusted_by_scroll_offset), 0, 0 };
if (callback(hit_test_result) == TraversalDecision::Break)
return TraversalDecision::Break;
} else if (type == HitTestType::TextCursor) {
@ -1208,7 +1217,7 @@ TraversalDecision PaintableWithLines::hit_test(CSSPixelPoint position, HitTestTy
if (fragment_absolute_rect.bottom() - 1 <= transformed_position_adjusted_by_scroll_offset.y()) { // fully below the fragment
HitTestResult hit_test_result {
.paintable = const_cast<Paintable&>(fragment.paintable()),
.index_in_node = fragment.start() + fragment.length(),
.index_in_node = fragment.index_in_node_for_byte_offset(fragment.start() + fragment.length()),
.vertical_distance = transformed_position_adjusted_by_scroll_offset.y() - fragment_absolute_rect.bottom(),
};
if (callback(hit_test_result) == TraversalDecision::Break)
@ -1217,7 +1226,7 @@ TraversalDecision PaintableWithLines::hit_test(CSSPixelPoint position, HitTestTy
if (transformed_position_adjusted_by_scroll_offset.x() < fragment_absolute_rect.left()) {
HitTestResult hit_test_result {
.paintable = const_cast<Paintable&>(fragment.paintable()),
.index_in_node = fragment.start(),
.index_in_node = fragment.index_in_node_for_byte_offset(fragment.start()),
.vertical_distance = 0,
.horizontal_distance = fragment_absolute_rect.left() - transformed_position_adjusted_by_scroll_offset.x(),
};
@ -1226,7 +1235,7 @@ TraversalDecision PaintableWithLines::hit_test(CSSPixelPoint position, HitTestTy
} else if (transformed_position_adjusted_by_scroll_offset.x() > fragment_absolute_rect.right()) {
HitTestResult hit_test_result {
.paintable = const_cast<Paintable&>(fragment.paintable()),
.index_in_node = fragment.start() + fragment.length(),
.index_in_node = fragment.index_in_node_for_byte_offset(fragment.start() + fragment.length()),
.vertical_distance = 0,
.horizontal_distance = transformed_position_adjusted_by_scroll_offset.x() - fragment_absolute_rect.right(),
};

View file

@ -29,14 +29,23 @@ PaintableFragment::PaintableFragment(Layout::LineBoxFragment const& fragment)
CSSPixelRect const PaintableFragment::absolute_rect() const
{
CSSPixelRect rect { {}, size() };
auto const* containing_block = paintable().containing_block();
if (containing_block)
if (auto const* containing_block = paintable().containing_block())
rect.set_location(containing_block->absolute_position());
rect.translate_by(offset());
return rect;
}
size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
size_t PaintableFragment::index_in_node_for_byte_offset(size_t byte_offset) const
{
if (m_length == 0)
return 0;
if (byte_offset >= m_start + m_length)
return utf16_view().length_in_code_units();
auto code_point_offset = utf8_view().code_point_offset_of(byte_offset);
return utf16_view().code_unit_offset_of(code_point_offset);
}
size_t PaintableFragment::index_in_node_for_point(CSSPixelPoint position) const
{
if (!is<TextPaintable>(paintable()))
return 0;
@ -53,6 +62,8 @@ size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
if (relative_inline_offset < 0)
return 0;
// Find the code point offset of the glyph matching the position.
auto code_point_offset = utf8_view().code_point_offset_of(m_start);
auto const& glyphs = m_glyph_run->glyphs();
auto smallest_distance = AK::NumericLimits<float>::max();
for (size_t i = 0; i < glyphs.size(); ++i) {
@ -60,14 +71,17 @@ size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
// The last distance was smaller than this new distance, so we've found the closest glyph.
if (distance_to_position > smallest_distance)
return m_start + i - 1;
break;
smallest_distance = distance_to_position;
++code_point_offset;
}
return m_start + m_length - 1;
// Return the code unit offset in the UTF-16 string.
return utf16_view().code_unit_offset_of(code_point_offset - 1);
}
CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offset) const
CSSPixelRect PaintableFragment::range_rect(size_t start_offset_in_code_units, size_t end_offset_in_code_units) const
{
if (paintable().selection_state() == Paintable::SelectionState::None)
return {};
@ -75,24 +89,39 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
if (paintable().selection_state() == Paintable::SelectionState::Full)
return absolute_rect();
auto const start_index = m_start;
auto const end_index = m_start + m_length;
auto const& font = glyph_run() ? glyph_run()->font() : layout_node().first_available_font();
auto text = string_view();
// We are invoked with offsets coming from a Range, which means they are expressed in UTF-16 code units. We need to
// convert them to the byte offsets in the UTF-8 string. This is inefficient, but we only need to do it for
// fragments with a partial selection.
auto code_unit_to_byte_offset = [&](size_t offset_in_code_units) -> size_t {
auto text_in_utf16 = utf16_view();
if (offset_in_code_units >= text_in_utf16.length_in_code_units())
return m_length;
auto offset_code_point = text_in_utf16.code_point_offset_of(offset_in_code_units);
auto byte_offset = utf8_view().byte_offset_of(offset_code_point);
if (byte_offset <= m_start)
return 0;
if (byte_offset > m_start + m_length)
return m_length;
return byte_offset - m_start;
};
// We operate on the UTF-8 string that is part of this fragment.
auto text = utf8_view().substring_view(m_start, m_length);
if (paintable().selection_state() == Paintable::SelectionState::StartAndEnd) {
auto selection_start_in_this_fragment = code_unit_to_byte_offset(start_offset_in_code_units);
auto selection_end_in_this_fragment = code_unit_to_byte_offset(end_offset_in_code_units);
// we are in the start/end node (both the same)
if (start_index > end_offset)
if (selection_start_in_this_fragment >= m_length)
return {};
if (end_index < start_offset)
if (selection_end_in_this_fragment == 0)
return {};
if (selection_start_in_this_fragment == selection_end_in_this_fragment)
return {};
if (start_offset == end_offset)
return {};
auto selection_start_in_this_fragment = max(0, start_offset - m_start);
auto selection_end_in_this_fragment = min(m_length, end_offset - m_start);
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
@ -113,12 +142,13 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
return rect;
}
if (paintable().selection_state() == Paintable::SelectionState::Start) {
auto selection_start_in_this_fragment = code_unit_to_byte_offset(start_offset_in_code_units);
auto selection_end_in_this_fragment = m_length;
// we are in the start node
if (end_index < start_offset)
if (selection_start_in_this_fragment >= m_length)
return {};
auto selection_start_in_this_fragment = max(0, start_offset - m_start);
auto selection_end_in_this_fragment = m_length;
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
@ -139,12 +169,13 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
return rect;
}
if (paintable().selection_state() == Paintable::SelectionState::End) {
auto selection_start_in_this_fragment = 0u;
auto selection_end_in_this_fragment = code_unit_to_byte_offset(end_offset_in_code_units);
// we are in the end node
if (start_index > end_offset)
if (selection_end_in_this_fragment == 0)
return {};
auto selection_start_in_this_fragment = 0;
auto selection_end_in_this_fragment = min<int>(end_offset - m_start, m_length);
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
@ -197,6 +228,7 @@ CSSPixelRect PaintableFragment::selection_rect() const
auto selection_end = text_control_element->selection_end();
return range_rect(selection_start, selection_end);
}
auto selection = paintable().document().get_selection();
if (!selection)
return {};
@ -207,11 +239,22 @@ CSSPixelRect PaintableFragment::selection_rect() const
return range_rect(range->start_offset(), range->end_offset());
}
StringView PaintableFragment::string_view() const
Utf8View PaintableFragment::utf8_view() const
{
if (!is<TextPaintable>(paintable()))
return {};
return static_cast<TextPaintable const&>(paintable()).text_for_rendering().bytes_as_string_view().substring_view(m_start, m_length);
return Utf8View { static_cast<TextPaintable const&>(paintable()).text_for_rendering() };
}
Utf16View PaintableFragment::utf16_view() const
{
if (!is<TextPaintable>(paintable()))
return {};
if (!m_text_in_utf16.has_value())
m_text_in_utf16 = MUST(AK::utf8_to_utf16(utf8_view()));
return Utf16View { m_text_in_utf16.value() };
}
}

View file

@ -39,14 +39,16 @@ public:
Gfx::Orientation orientation() const;
CSSPixelRect selection_rect() const;
CSSPixelRect range_rect(size_t start_offset, size_t end_offset) const;
CSSPixelRect range_rect(size_t start_offset_in_code_units, size_t end_offset_in_code_units) const;
CSSPixels width() const { return m_size.width(); }
CSSPixels height() const { return m_size.height(); }
size_t text_index_at(CSSPixelPoint) const;
size_t index_in_node_for_byte_offset(size_t) const;
size_t index_in_node_for_point(CSSPixelPoint) const;
StringView string_view() const;
Utf8View utf8_view() const;
Utf16View utf16_view() const;
CSSPixels text_decoration_thickness() const { return m_text_decoration_thickness; }
void set_text_decoration_thickness(CSSPixels thickness) { m_text_decoration_thickness = thickness; }
@ -62,6 +64,7 @@ private:
CSS::WritingMode m_writing_mode;
Vector<ShadowData> m_shadows;
CSSPixels m_text_decoration_thickness { 0 };
mutable Optional<AK::Utf16ConversionResult> m_text_in_utf16;
};
}

View file

@ -0,0 +1,4 @@
<#text>
5
<#text>
11

View file

@ -0,0 +1,15 @@
<!DOCTYPE html>
<script src="include.js"></script>
😭foobar😭
<script>
test(() => {
internals.mouseDown(55, 20);
internals.movePointerTo(110, 20);
const activeRange = window.getSelection().getRangeAt(0);
printElement(activeRange.startContainer);
println(activeRange.startOffset);
printElement(activeRange.endContainer);
println(activeRange.endOffset);
});
</script>