LibWeb: Treat DOM::Range offsets as UTF-16 code unit offsets

We generated `PaintableFragment`s with a start and length represented in
UTF-8 byte offsets, but failed to consider that the offsets in a
`DOM::Range` are actually expressed in UTF-16 code units.

This is a bit of a mess: almost all web specs use UTF-16 code units as
the unit for indexing into text nodes, but we almost exclusively use
UTF-8 in our code base. Arguably the best thing would for us to use
UTF-16 everywhere as well: it prevents these mismatches in our
implementations for the price of a bit more memory usage - and even that
could potentially be optimized for.

But for now, try to do the correct thing and lazily allocate UTF-16 data
in a `PaintableFragment` whenever we need to index into it or if we're
asked to determine the code unit offset of a pixel position.
This commit is contained in:
Jelle Raaijmakers 2025-06-12 13:19:52 +02:00 committed by Jelle Raaijmakers
commit 3df83dade8
Notes: github-actions[bot] 2025-06-13 13:09:49 +00:00
6 changed files with 110 additions and 36 deletions

View file

@ -29,14 +29,23 @@ PaintableFragment::PaintableFragment(Layout::LineBoxFragment const& fragment)
CSSPixelRect const PaintableFragment::absolute_rect() const
{
CSSPixelRect rect { {}, size() };
auto const* containing_block = paintable().containing_block();
if (containing_block)
if (auto const* containing_block = paintable().containing_block())
rect.set_location(containing_block->absolute_position());
rect.translate_by(offset());
return rect;
}
size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
size_t PaintableFragment::index_in_node_for_byte_offset(size_t byte_offset) const
{
if (m_length == 0)
return 0;
if (byte_offset >= m_start + m_length)
return utf16_view().length_in_code_units();
auto code_point_offset = utf8_view().code_point_offset_of(byte_offset);
return utf16_view().code_unit_offset_of(code_point_offset);
}
size_t PaintableFragment::index_in_node_for_point(CSSPixelPoint position) const
{
if (!is<TextPaintable>(paintable()))
return 0;
@ -53,6 +62,8 @@ size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
if (relative_inline_offset < 0)
return 0;
// Find the code point offset of the glyph matching the position.
auto code_point_offset = utf8_view().code_point_offset_of(m_start);
auto const& glyphs = m_glyph_run->glyphs();
auto smallest_distance = AK::NumericLimits<float>::max();
for (size_t i = 0; i < glyphs.size(); ++i) {
@ -60,14 +71,17 @@ size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
// The last distance was smaller than this new distance, so we've found the closest glyph.
if (distance_to_position > smallest_distance)
return m_start + i - 1;
break;
smallest_distance = distance_to_position;
++code_point_offset;
}
return m_start + m_length - 1;
// Return the code unit offset in the UTF-16 string.
return utf16_view().code_unit_offset_of(code_point_offset - 1);
}
CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offset) const
CSSPixelRect PaintableFragment::range_rect(size_t start_offset_in_code_units, size_t end_offset_in_code_units) const
{
if (paintable().selection_state() == Paintable::SelectionState::None)
return {};
@ -75,24 +89,39 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
if (paintable().selection_state() == Paintable::SelectionState::Full)
return absolute_rect();
auto const start_index = m_start;
auto const end_index = m_start + m_length;
auto const& font = glyph_run() ? glyph_run()->font() : layout_node().first_available_font();
auto text = string_view();
// We are invoked with offsets coming from a Range, which means they are expressed in UTF-16 code units. We need to
// convert them to the byte offsets in the UTF-8 string. This is inefficient, but we only need to do it for
// fragments with a partial selection.
auto code_unit_to_byte_offset = [&](size_t offset_in_code_units) -> size_t {
auto text_in_utf16 = utf16_view();
if (offset_in_code_units >= text_in_utf16.length_in_code_units())
return m_length;
auto offset_code_point = text_in_utf16.code_point_offset_of(offset_in_code_units);
auto byte_offset = utf8_view().byte_offset_of(offset_code_point);
if (byte_offset <= m_start)
return 0;
if (byte_offset > m_start + m_length)
return m_length;
return byte_offset - m_start;
};
// We operate on the UTF-8 string that is part of this fragment.
auto text = utf8_view().substring_view(m_start, m_length);
if (paintable().selection_state() == Paintable::SelectionState::StartAndEnd) {
auto selection_start_in_this_fragment = code_unit_to_byte_offset(start_offset_in_code_units);
auto selection_end_in_this_fragment = code_unit_to_byte_offset(end_offset_in_code_units);
// we are in the start/end node (both the same)
if (start_index > end_offset)
if (selection_start_in_this_fragment >= m_length)
return {};
if (end_index < start_offset)
if (selection_end_in_this_fragment == 0)
return {};
if (selection_start_in_this_fragment == selection_end_in_this_fragment)
return {};
if (start_offset == end_offset)
return {};
auto selection_start_in_this_fragment = max(0, start_offset - m_start);
auto selection_end_in_this_fragment = min(m_length, end_offset - m_start);
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
@ -113,12 +142,13 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
return rect;
}
if (paintable().selection_state() == Paintable::SelectionState::Start) {
auto selection_start_in_this_fragment = code_unit_to_byte_offset(start_offset_in_code_units);
auto selection_end_in_this_fragment = m_length;
// we are in the start node
if (end_index < start_offset)
if (selection_start_in_this_fragment >= m_length)
return {};
auto selection_start_in_this_fragment = max(0, start_offset - m_start);
auto selection_end_in_this_fragment = m_length;
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
@ -139,12 +169,13 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
return rect;
}
if (paintable().selection_state() == Paintable::SelectionState::End) {
auto selection_start_in_this_fragment = 0u;
auto selection_end_in_this_fragment = code_unit_to_byte_offset(end_offset_in_code_units);
// we are in the end node
if (start_index > end_offset)
if (selection_end_in_this_fragment == 0)
return {};
auto selection_start_in_this_fragment = 0;
auto selection_end_in_this_fragment = min<int>(end_offset - m_start, m_length);
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
@ -197,6 +228,7 @@ CSSPixelRect PaintableFragment::selection_rect() const
auto selection_end = text_control_element->selection_end();
return range_rect(selection_start, selection_end);
}
auto selection = paintable().document().get_selection();
if (!selection)
return {};
@ -207,11 +239,22 @@ CSSPixelRect PaintableFragment::selection_rect() const
return range_rect(range->start_offset(), range->end_offset());
}
StringView PaintableFragment::string_view() const
Utf8View PaintableFragment::utf8_view() const
{
if (!is<TextPaintable>(paintable()))
return {};
return static_cast<TextPaintable const&>(paintable()).text_for_rendering().bytes_as_string_view().substring_view(m_start, m_length);
return Utf8View { static_cast<TextPaintable const&>(paintable()).text_for_rendering() };
}
Utf16View PaintableFragment::utf16_view() const
{
if (!is<TextPaintable>(paintable()))
return {};
if (!m_text_in_utf16.has_value())
m_text_in_utf16 = MUST(AK::utf8_to_utf16(utf8_view()));
return Utf16View { m_text_in_utf16.value() };
}
}