mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-09 01:29:17 +00:00
LibWeb: Treat DOM::Range
offsets as UTF-16 code unit offsets
We generated `PaintableFragment`s with a start and length represented in UTF-8 byte offsets, but failed to consider that the offsets in a `DOM::Range` are actually expressed in UTF-16 code units. This is a bit of a mess: almost all web specs use UTF-16 code units as the unit for indexing into text nodes, but we almost exclusively use UTF-8 in our code base. Arguably the best thing would for us to use UTF-16 everywhere as well: it prevents these mismatches in our implementations for the price of a bit more memory usage - and even that could potentially be optimized for. But for now, try to do the correct thing and lazily allocate UTF-16 data in a `PaintableFragment` whenever we need to index into it or if we're asked to determine the code unit offset of a pixel position.
This commit is contained in:
parent
dbbdf2cebc
commit
3df83dade8
Notes:
github-actions[bot]
2025-06-13 13:09:49 +00:00
Author: https://github.com/gmta
Commit: 3df83dade8
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/5067
Reviewed-by: https://github.com/tcl3
Reviewed-by: https://github.com/trflynn89
6 changed files with 110 additions and 36 deletions
|
@ -29,14 +29,23 @@ PaintableFragment::PaintableFragment(Layout::LineBoxFragment const& fragment)
|
|||
CSSPixelRect const PaintableFragment::absolute_rect() const
|
||||
{
|
||||
CSSPixelRect rect { {}, size() };
|
||||
auto const* containing_block = paintable().containing_block();
|
||||
if (containing_block)
|
||||
if (auto const* containing_block = paintable().containing_block())
|
||||
rect.set_location(containing_block->absolute_position());
|
||||
rect.translate_by(offset());
|
||||
return rect;
|
||||
}
|
||||
|
||||
size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
|
||||
size_t PaintableFragment::index_in_node_for_byte_offset(size_t byte_offset) const
|
||||
{
|
||||
if (m_length == 0)
|
||||
return 0;
|
||||
if (byte_offset >= m_start + m_length)
|
||||
return utf16_view().length_in_code_units();
|
||||
auto code_point_offset = utf8_view().code_point_offset_of(byte_offset);
|
||||
return utf16_view().code_unit_offset_of(code_point_offset);
|
||||
}
|
||||
|
||||
size_t PaintableFragment::index_in_node_for_point(CSSPixelPoint position) const
|
||||
{
|
||||
if (!is<TextPaintable>(paintable()))
|
||||
return 0;
|
||||
|
@ -53,6 +62,8 @@ size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
|
|||
if (relative_inline_offset < 0)
|
||||
return 0;
|
||||
|
||||
// Find the code point offset of the glyph matching the position.
|
||||
auto code_point_offset = utf8_view().code_point_offset_of(m_start);
|
||||
auto const& glyphs = m_glyph_run->glyphs();
|
||||
auto smallest_distance = AK::NumericLimits<float>::max();
|
||||
for (size_t i = 0; i < glyphs.size(); ++i) {
|
||||
|
@ -60,14 +71,17 @@ size_t PaintableFragment::text_index_at(CSSPixelPoint position) const
|
|||
|
||||
// The last distance was smaller than this new distance, so we've found the closest glyph.
|
||||
if (distance_to_position > smallest_distance)
|
||||
return m_start + i - 1;
|
||||
break;
|
||||
smallest_distance = distance_to_position;
|
||||
|
||||
++code_point_offset;
|
||||
}
|
||||
|
||||
return m_start + m_length - 1;
|
||||
// Return the code unit offset in the UTF-16 string.
|
||||
return utf16_view().code_unit_offset_of(code_point_offset - 1);
|
||||
}
|
||||
|
||||
CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offset) const
|
||||
CSSPixelRect PaintableFragment::range_rect(size_t start_offset_in_code_units, size_t end_offset_in_code_units) const
|
||||
{
|
||||
if (paintable().selection_state() == Paintable::SelectionState::None)
|
||||
return {};
|
||||
|
@ -75,24 +89,39 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
|
|||
if (paintable().selection_state() == Paintable::SelectionState::Full)
|
||||
return absolute_rect();
|
||||
|
||||
auto const start_index = m_start;
|
||||
auto const end_index = m_start + m_length;
|
||||
|
||||
auto const& font = glyph_run() ? glyph_run()->font() : layout_node().first_available_font();
|
||||
auto text = string_view();
|
||||
|
||||
// We are invoked with offsets coming from a Range, which means they are expressed in UTF-16 code units. We need to
|
||||
// convert them to the byte offsets in the UTF-8 string. This is inefficient, but we only need to do it for
|
||||
// fragments with a partial selection.
|
||||
auto code_unit_to_byte_offset = [&](size_t offset_in_code_units) -> size_t {
|
||||
auto text_in_utf16 = utf16_view();
|
||||
if (offset_in_code_units >= text_in_utf16.length_in_code_units())
|
||||
return m_length;
|
||||
auto offset_code_point = text_in_utf16.code_point_offset_of(offset_in_code_units);
|
||||
auto byte_offset = utf8_view().byte_offset_of(offset_code_point);
|
||||
if (byte_offset <= m_start)
|
||||
return 0;
|
||||
if (byte_offset > m_start + m_length)
|
||||
return m_length;
|
||||
return byte_offset - m_start;
|
||||
};
|
||||
|
||||
// We operate on the UTF-8 string that is part of this fragment.
|
||||
auto text = utf8_view().substring_view(m_start, m_length);
|
||||
|
||||
if (paintable().selection_state() == Paintable::SelectionState::StartAndEnd) {
|
||||
auto selection_start_in_this_fragment = code_unit_to_byte_offset(start_offset_in_code_units);
|
||||
auto selection_end_in_this_fragment = code_unit_to_byte_offset(end_offset_in_code_units);
|
||||
|
||||
// we are in the start/end node (both the same)
|
||||
if (start_index > end_offset)
|
||||
if (selection_start_in_this_fragment >= m_length)
|
||||
return {};
|
||||
if (end_index < start_offset)
|
||||
if (selection_end_in_this_fragment == 0)
|
||||
return {};
|
||||
if (selection_start_in_this_fragment == selection_end_in_this_fragment)
|
||||
return {};
|
||||
|
||||
if (start_offset == end_offset)
|
||||
return {};
|
||||
|
||||
auto selection_start_in_this_fragment = max(0, start_offset - m_start);
|
||||
auto selection_end_in_this_fragment = min(m_length, end_offset - m_start);
|
||||
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
|
||||
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
|
||||
|
||||
|
@ -113,12 +142,13 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
|
|||
return rect;
|
||||
}
|
||||
if (paintable().selection_state() == Paintable::SelectionState::Start) {
|
||||
auto selection_start_in_this_fragment = code_unit_to_byte_offset(start_offset_in_code_units);
|
||||
auto selection_end_in_this_fragment = m_length;
|
||||
|
||||
// we are in the start node
|
||||
if (end_index < start_offset)
|
||||
if (selection_start_in_this_fragment >= m_length)
|
||||
return {};
|
||||
|
||||
auto selection_start_in_this_fragment = max(0, start_offset - m_start);
|
||||
auto selection_end_in_this_fragment = m_length;
|
||||
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
|
||||
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
|
||||
|
||||
|
@ -139,12 +169,13 @@ CSSPixelRect PaintableFragment::range_rect(size_t start_offset, size_t end_offse
|
|||
return rect;
|
||||
}
|
||||
if (paintable().selection_state() == Paintable::SelectionState::End) {
|
||||
auto selection_start_in_this_fragment = 0u;
|
||||
auto selection_end_in_this_fragment = code_unit_to_byte_offset(end_offset_in_code_units);
|
||||
|
||||
// we are in the end node
|
||||
if (start_index > end_offset)
|
||||
if (selection_end_in_this_fragment == 0)
|
||||
return {};
|
||||
|
||||
auto selection_start_in_this_fragment = 0;
|
||||
auto selection_end_in_this_fragment = min<int>(end_offset - m_start, m_length);
|
||||
auto pixel_distance_to_first_selected_character = CSSPixels::nearest_value_for(font.width(text.substring_view(0, selection_start_in_this_fragment)));
|
||||
auto pixel_width_of_selection = CSSPixels::nearest_value_for(font.width(text.substring_view(selection_start_in_this_fragment, selection_end_in_this_fragment - selection_start_in_this_fragment))) + 1;
|
||||
|
||||
|
@ -197,6 +228,7 @@ CSSPixelRect PaintableFragment::selection_rect() const
|
|||
auto selection_end = text_control_element->selection_end();
|
||||
return range_rect(selection_start, selection_end);
|
||||
}
|
||||
|
||||
auto selection = paintable().document().get_selection();
|
||||
if (!selection)
|
||||
return {};
|
||||
|
@ -207,11 +239,22 @@ CSSPixelRect PaintableFragment::selection_rect() const
|
|||
return range_rect(range->start_offset(), range->end_offset());
|
||||
}
|
||||
|
||||
StringView PaintableFragment::string_view() const
|
||||
Utf8View PaintableFragment::utf8_view() const
|
||||
{
|
||||
if (!is<TextPaintable>(paintable()))
|
||||
return {};
|
||||
return static_cast<TextPaintable const&>(paintable()).text_for_rendering().bytes_as_string_view().substring_view(m_start, m_length);
|
||||
return Utf8View { static_cast<TextPaintable const&>(paintable()).text_for_rendering() };
|
||||
}
|
||||
|
||||
Utf16View PaintableFragment::utf16_view() const
|
||||
{
|
||||
if (!is<TextPaintable>(paintable()))
|
||||
return {};
|
||||
|
||||
if (!m_text_in_utf16.has_value())
|
||||
m_text_in_utf16 = MUST(AK::utf8_to_utf16(utf8_view()));
|
||||
|
||||
return Utf16View { m_text_in_utf16.value() };
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue