From 6176b05ca575cd1e534e6d9210d18fa4285f46d9 Mon Sep 17 00:00:00 2001 From: Jelle Raaijmakers Date: Tue, 29 Apr 2025 13:20:15 +0200 Subject: [PATCH] LibWeb: Align editing whitespace canonicalization with other browsers The spec calls for a couple of very specific whitespace padding techniques whenever we canonicalize whitespace during the execution of editing commands, but it seems that other browsers have a simpler strategy - let's adopt theirs! --- .../LibWeb/Editing/Internal/Algorithms.cpp | 74 +++++++++---------- .../Editing/execCommand-forwardDelete.txt | 19 +++++ .../Editing/execCommand-forwardDelete.html | 36 ++++++--- 3 files changed, 81 insertions(+), 48 deletions(-) diff --git a/Libraries/LibWeb/Editing/Internal/Algorithms.cpp b/Libraries/LibWeb/Editing/Internal/Algorithms.cpp index 0e039317991..278b6c0a91b 100644 --- a/Libraries/LibWeb/Editing/Internal/Algorithms.cpp +++ b/Libraries/LibWeb/Editing/Internal/Algorithms.cpp @@ -292,55 +292,51 @@ String canonical_space_sequence(u32 length, bool non_breaking_start, bool non_br auto repeated_pair = non_breaking_start ? "\u00A0 "sv : " \u00A0"sv; // 6. While n is greater than three, append repeated pair to buffer and subtract two from n. - while (n > 3) { + // AD-HOC: Other browsers seem to fit in as many repeated pairs until the remaining length is <= 2. + while (n > 2) { buffer.append(repeated_pair); n -= 2; } // 7. If n is three, append a three-code unit string to buffer depending on non-breaking start // and non-breaking end: - if (n == 3) { - // non-breaking start and non-breaking end false - // U+0020 U+00A0 U+0020 - if (!non_breaking_start && !non_breaking_end) - buffer.append(" \u00A0 "sv); - - // non-breaking start true, non-breaking end false - // U+00A0 U+00A0 U+0020 - else if (non_breaking_start && !non_breaking_end) - buffer.append("\u00A0\u00A0 "sv); - - // non-breaking start false, non-breaking end true - // U+0020 U+00A0 U+00A0 - else if (!non_breaking_start) - buffer.append(" \u00A0\u00A0"sv); - - // non-breaking start and non-breaking end both true - // U+00A0 U+0020 U+00A0 - else - buffer.append("\u00A0 \u00A0"sv); - } + // + // non-breaking start and non-breaking end false + // U+0020 U+00A0 U+0020 + // + // non-breaking start true, non-breaking end false + // U+00A0 U+00A0 U+0020 + // + // non-breaking start false, non-breaking end true + // U+0020 U+00A0 U+00A0 + // + // non-breaking start and non-breaking end both true + // U+00A0 U+0020 U+00A0 // 8. Otherwise, append a two-code unit string to buffer depending on non-breaking start and // non-breaking end: - else { - // non-breaking start and non-breaking end false - // non-breaking start true, non-breaking end false - // U+00A0 U+0020 - if (!non_breaking_start && !non_breaking_end) - buffer.append("\u00A0 "sv); + // + // non-breaking start and non-breaking end false + // non-breaking start true, non-breaking end false + // U+00A0 U+0020 + // + // non-breaking start false, non-breaking end true + // U+0020 U+00A0 + // + // non-breaking start and non-breaking end both true + // U+00A0 U+00A0 - // non-breaking start false, non-breaking end true - // U+0020 U+00A0 - else if (!non_breaking_start) - buffer.append(" \u00A0"sv); - - // non-breaking start and non-breaking end both true - // U+00A0 U+00A0 - else - buffer.append("\u00A0\u00A0"sv); + // AD-HOC: Other browsers seem to ignore the above and deal differently with padding the remainder; the first + // remaining position is filled with the first character from repeated pair. + if (n > 0) { + buffer.append(repeated_pair.substring_view(0, 1) == " "sv ? " "sv : "\u00A0"sv); + --n; } + // AD-HOC: Then, the final position is set depending on the value of non-breaking end. + if (n > 0) + buffer.append(non_breaking_end ? "\u00A0"sv : " "sv); + // 9. Return buffer. return MUST(buffer.to_string()); } @@ -528,9 +524,11 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa // start is true if start offset is zero and start node follows a line break, and false // otherwise. non-breaking end is true if end offset is end node's length and end node // precedes a line break, and false otherwise. + // AD-HOC: Other browsers' behavior here is to set non_breaking_start to true if length > 1, so we add that + // condition as well. auto replacement_whitespace = canonical_space_sequence( length, - start_offset == 0 && follows_a_line_break(start_node), + (start_offset == 0 && follows_a_line_break(start_node)) || length > 1, end_offset == end_node->length() && precedes_a_line_break(end_node)); // 10. While (start node, start offset) is before (end node, end offset): diff --git a/Tests/LibWeb/Text/expected/Editing/execCommand-forwardDelete.txt b/Tests/LibWeb/Text/expected/Editing/execCommand-forwardDelete.txt index 0db943aa338..6a2750fb0d7 100644 --- a/Tests/LibWeb/Text/expected/Editing/execCommand-forwardDelete.txt +++ b/Tests/LibWeb/Text/expected/Editing/execCommand-forwardDelete.txt @@ -1,2 +1,21 @@ +--- a --- Before: foobar After: fooar +--- b --- +Before: a    +After: a   +--- c --- +Before: a  b +After: a b +--- d --- +Before: a   b +After: a  b +--- e --- +Before: a    b +After: a   b +--- f --- +Before: a     b +After: a    b +--- g --- +Before:   b +After:  b diff --git a/Tests/LibWeb/Text/input/Editing/execCommand-forwardDelete.html b/Tests/LibWeb/Text/input/Editing/execCommand-forwardDelete.html index 0fecb6bfa5f..5a1a98adcdc 100644 --- a/Tests/LibWeb/Text/input/Editing/execCommand-forwardDelete.html +++ b/Tests/LibWeb/Text/input/Editing/execCommand-forwardDelete.html @@ -1,19 +1,35 @@ -
foobar
+
foobar
+
a   
+
a  b
+
a   b
+
a    b
+
a     b
+
  b