LibWeb: Ensure up/down arrow navigation is grapheme-aware

Previously, it was possible for an up/down arrow press to place the
cursor in the middle of a multi-code point grapheme cluster. We want to
prevent this in a way that matches the behavior of other browsers.

Both Chrome and Firefox will map the starting position to a visually
equivalent position in the target line with harfbuzz and ICU segmenters.
The need for this is explained in a code comment. The result is a much
more natural feeling of text navigation.
This commit is contained in:
Timothy Flynn 2025-08-15 15:13:23 -04:00 committed by Jelle Raaijmakers
commit 0e4fb9ae73
Notes: github-actions[bot] 2025-08-18 11:18:42 +00:00
5 changed files with 182 additions and 36 deletions

View file

@ -26,6 +26,7 @@
#include <LibWeb/HTML/Parser/HTMLParser.h> #include <LibWeb/HTML/Parser/HTMLParser.h>
#include <LibWeb/HTML/ValidityState.h> #include <LibWeb/HTML/ValidityState.h>
#include <LibWeb/Infra/Strings.h> #include <LibWeb/Infra/Strings.h>
#include <LibWeb/Layout/TextNode.h>
#include <LibWeb/Painting/Paintable.h> #include <LibWeb/Painting/Paintable.h>
#include <LibWeb/Selection/Selection.h> #include <LibWeb/Selection/Selection.h>
@ -1034,70 +1035,136 @@ static constexpr size_t find_line_end(Utf16View const& view, size_t offset)
return offset; return offset;
} }
static float measure_text_width(Layout::TextNode const& text_node, Utf16View const& text)
{
if (text.is_empty())
return 0;
auto segmenter = text_node.grapheme_segmenter().clone();
segmenter->set_segmented_text(text);
Layout::TextNode::ChunkIterator iterator { text_node, text, *segmenter, false, false };
float width = 0;
for (auto chunk = iterator.next(); chunk.has_value(); chunk = iterator.next())
width += chunk->font->width(chunk->view);
return width;
}
static size_t translate_position_across_lines(Layout::TextNode const& text_node, Utf16View const& source_line, Utf16View const& target_line)
{
// When we want to move the cursor from some position within a line to a visually-equivalent position in an adjacent
// line, there are several things to consider. Let's use the following HTML as an example:
//
// <textarea>
// hello 👩🏼‍❤️‍👨🏻 there
// my 👩🏼‍❤️‍👨🏻 friends!
// </textarea>
//
// And let's define the following terms:
// * logical index = the raw code unit offset of the cursor
// * visual index = the grapheme-aware offset of the cursor (i.e. the offset the user actually perceives)
// * text affinity = the side (left or right) of a grapheme that the cursor is visually closest to
//
// If we want to move the cursor from the position just after "hello" (logical index=5, visual index=5) to the next
// line, the user will expect the cursor to be located just after the "👩🏼‍❤️‍👨🏻" (logical index=15, visual index=4). These
// locations do not share the same visual index, so it's not enough to simply map the visual index of 5 back to a
// logical index on the next line. The difference becomes even more apparent when multiple fonts are used within a
// single line.
//
// Instead, we must measure the text between the start of the line and the starting index. On the next line, we want
// to find the position whose corresponding width is as close to the starting width as possible. The target width
// might not be the same as the starting width at all, so we must further consider the text affinity. We want to
// chose a target index whose affinity brings us closest to the starting width.
auto source_line_width = measure_text_width(text_node, source_line);
auto left_edge = 0uz;
auto width_to_left_edge = 0.0f;
auto right_edge = 0uz;
auto width_to_right_edge = 0.0f;
text_node.grapheme_segmenter().clone()->for_each_boundary(target_line, [&](auto index) {
auto current_width = measure_text_width(text_node, target_line.substring_view(left_edge, index - left_edge));
right_edge = index;
width_to_right_edge = width_to_left_edge + current_width;
if (width_to_right_edge >= source_line_width)
return IterationDecision::Break;
left_edge = index;
width_to_left_edge += current_width;
return IterationDecision::Continue;
});
if ((source_line_width - width_to_left_edge) < (width_to_right_edge - source_line_width))
return left_edge;
return right_edge;
}
void FormAssociatedTextControlElement::increment_cursor_position_to_next_line(CollapseSelection collapse) void FormAssociatedTextControlElement::increment_cursor_position_to_next_line(CollapseSelection collapse)
{ {
auto const text_node = form_associated_element_to_text_node(); auto dom_node = form_associated_element_to_text_node();
if (!text_node) if (!dom_node)
return; return;
auto code_points = text_node->data().utf16_view(); auto const* layout_node = as_if<Layout::TextNode>(dom_node->layout_node());
auto length = code_points.length_in_code_units(); if (!layout_node)
auto current_line_end = find_line_end(code_points, m_selection_end); return;
// initialize to handle the case of last line auto text = dom_node->data().utf16_view();
size_t new_offset = current_line_end; auto new_offset = text.length_in_code_units();
if (auto current_line_end = find_line_end(text, m_selection_end); current_line_end < text.length_in_code_units()) {
auto current_line_start = find_line_start(text, m_selection_end);
auto current_line_text = text.substring_view(current_line_start, m_selection_end - current_line_start);
if (current_line_end < length) {
auto next_line_start = current_line_end + 1; auto next_line_start = current_line_end + 1;
auto position_within_line = m_selection_end - find_line_start(code_points, m_selection_end); auto next_line_length = find_line_end(text, next_line_start) - next_line_start;
auto next_line_end = find_line_end(code_points, next_line_start); auto next_line_text = text.substring_view(next_line_start, next_line_length);
auto next_line_length = next_line_end - next_line_start;
new_offset = next_line_start + min(position_within_line, next_line_length); new_offset = next_line_start + translate_position_across_lines(*layout_node, current_line_text, next_line_text);
if (new_offset > 0 && new_offset < length && AK::UnicodeUtils::is_utf16_low_surrogate(code_points.code_unit_at(new_offset))) {
if (AK::UnicodeUtils::is_utf16_high_surrogate(code_points.code_unit_at(new_offset - 1)))
--new_offset;
}
} }
if (collapse == CollapseSelection::Yes) { if (collapse == CollapseSelection::Yes)
collapse_selection_to_offset(new_offset); collapse_selection_to_offset(new_offset);
} else { else
m_selection_end = new_offset; m_selection_end = new_offset;
}
selection_was_changed(); selection_was_changed();
} }
void FormAssociatedTextControlElement::decrement_cursor_position_to_previous_line(CollapseSelection collapse) void FormAssociatedTextControlElement::decrement_cursor_position_to_previous_line(CollapseSelection collapse)
{ {
auto const text_node = form_associated_element_to_text_node(); auto dom_node = form_associated_element_to_text_node();
if (!text_node) if (!dom_node)
return; return;
auto code_points = text_node->data().utf16_view(); auto const* layout_node = as_if<Layout::TextNode>(dom_node->layout_node());
size_t new_offset = 0; if (!layout_node)
return;
if (auto current_line_start = find_line_start(code_points, m_selection_end); current_line_start != 0) { auto text = dom_node->data().utf16_view();
auto position_within_line = m_selection_end - current_line_start; auto new_offset = 0uz;
auto previous_line_start = find_line_start(code_points, current_line_start - 1); if (auto current_line_start = find_line_start(text, m_selection_end); current_line_start != 0) {
auto current_line_text = text.substring_view(current_line_start, m_selection_end - current_line_start);
auto previous_line_start = find_line_start(text, current_line_start - 1);
auto previous_line_length = current_line_start - previous_line_start - 1; auto previous_line_length = current_line_start - previous_line_start - 1;
auto previous_line_text = text.substring_view(previous_line_start, previous_line_length);
new_offset = previous_line_start + min(position_within_line, previous_line_length); new_offset = previous_line_start + translate_position_across_lines(*layout_node, current_line_text, previous_line_text);
if (new_offset > 0 && AK::UnicodeUtils::is_utf16_low_surrogate(code_points.code_unit_at(new_offset))) {
if (AK::UnicodeUtils::is_utf16_high_surrogate(code_points.code_unit_at(new_offset - 1)))
--new_offset;
}
} }
if (collapse == CollapseSelection::Yes) { if (collapse == CollapseSelection::Yes)
collapse_selection_to_offset(new_offset); collapse_selection_to_offset(new_offset);
} else { else
m_selection_end = new_offset; m_selection_end = new_offset;
}
selection_was_changed(); selection_was_changed();
} }

View file

@ -391,6 +391,15 @@ TextNode::ChunkIterator::ChunkIterator(TextNode const& text_node, bool wrap_line
{ {
} }
TextNode::ChunkIterator::ChunkIterator(TextNode const& text_node, Utf16View const& text, Unicode::Segmenter& grapheme_segmenter, bool wrap_lines, bool respect_linebreaks)
: m_wrap_lines(wrap_lines)
, m_respect_linebreaks(respect_linebreaks)
, m_view(text)
, m_font_cascade_list(text_node.computed_values().font_list())
, m_grapheme_segmenter(grapheme_segmenter)
{
}
static Gfx::GlyphRun::TextType text_type_for_code_point(u32 code_point) static Gfx::GlyphRun::TextType text_type_for_code_point(u32 code_point)
{ {
switch (Unicode::bidirectional_class(code_point)) { switch (Unicode::bidirectional_class(code_point)) {

View file

@ -43,6 +43,7 @@ public:
class ChunkIterator { class ChunkIterator {
public: public:
ChunkIterator(TextNode const&, bool wrap_lines, bool respect_linebreaks); ChunkIterator(TextNode const&, bool wrap_lines, bool respect_linebreaks);
ChunkIterator(TextNode const&, Utf16View const&, Unicode::Segmenter&, bool wrap_lines, bool respect_linebreaks);
Optional<Chunk> next(); Optional<Chunk> next();
Optional<Chunk> peek(size_t); Optional<Chunk> peek(size_t);

View file

@ -0,0 +1,18 @@
Right: position=1 character="e"
Right: position=2 character="l"
Right: position=3 character="l"
Right: position=4 character="o"
Right: position=5 character=" "
Down: position=28 character="👩🏼‍❤️‍👨🏻"
Left: position=27 character=" "
Up: position=2 character="l"
Right: position=3 character="l"
Right: position=4 character="o"
Right: position=5 character=" "
Right: position=6 character="👩🏼‍❤️‍👨🏻"
Down: position=40 character=" "
Up: position=6 character="👩🏼‍❤️‍👨🏻"
Down: position=40 character=" "
Left: position=28 character="👩🏼‍❤️‍👨🏻"
Left: position=27 character=" "
Up: position=2 character="l"

View file

@ -0,0 +1,51 @@
<!DOCTYPE html>
<script src="include.js"></script>
<textarea id="text">
hello 👩🏼‍❤️‍👨🏻 there
my 👩🏼‍❤️‍👨🏻 friends!
</textarea>
<script>
test(() => {
// We need to ensure layout has occurred for arrow navigation to have a layout node to interact with.
document.body.offsetWidth;
const segmenter = new Intl.Segmenter("en", { granularity: "grapheme" });
const content = text.textContent.trim();
const graphemeAtCurrentLocation = () => {
const segments = segmenter.segment(content.substring(text.selectionStart));
return Array.from(segments)[0].segment;
};
const moveCursor = direction => {
internals.sendKey(text, direction);
const character = graphemeAtCurrentLocation();
println(`${direction}: position=${text.selectionStart} character="${character}"`);
};
moveCursor("Right");
moveCursor("Right");
moveCursor("Right");
moveCursor("Right");
moveCursor("Right");
moveCursor("Down");
moveCursor("Left");
moveCursor("Up");
moveCursor("Right");
moveCursor("Right");
moveCursor("Right");
moveCursor("Right");
moveCursor("Down");
moveCursor("Up");
moveCursor("Down");
moveCursor("Left");
moveCursor("Left");
moveCursor("Up");
});
</script>