LibWeb: Align editing whitespace canonicalization with other browsers

The spec calls for a couple of very specific whitespace padding
techniques whenever we canonicalize whitespace during the execution of
editing commands, but it seems that other browsers have a simpler
strategy - let's adopt theirs!
This commit is contained in:
Jelle Raaijmakers 2025-04-29 13:20:15 +02:00 committed by Andreas Kling
commit 6176b05ca5
Notes: github-actions[bot] 2025-04-29 13:33:03 +00:00
3 changed files with 81 additions and 48 deletions

View file

@ -292,55 +292,51 @@ String canonical_space_sequence(u32 length, bool non_breaking_start, bool non_br
auto repeated_pair = non_breaking_start ? "\u00A0 "sv : " \u00A0"sv;
// 6. While n is greater than three, append repeated pair to buffer and subtract two from n.
while (n > 3) {
// AD-HOC: Other browsers seem to fit in as many repeated pairs until the remaining length is <= 2.
while (n > 2) {
buffer.append(repeated_pair);
n -= 2;
}
// 7. If n is three, append a three-code unit string to buffer depending on non-breaking start
// and non-breaking end:
if (n == 3) {
// non-breaking start and non-breaking end false
// U+0020 U+00A0 U+0020
if (!non_breaking_start && !non_breaking_end)
buffer.append(" \u00A0 "sv);
// non-breaking start true, non-breaking end false
// U+00A0 U+00A0 U+0020
else if (non_breaking_start && !non_breaking_end)
buffer.append("\u00A0\u00A0 "sv);
// non-breaking start false, non-breaking end true
// U+0020 U+00A0 U+00A0
else if (!non_breaking_start)
buffer.append(" \u00A0\u00A0"sv);
// non-breaking start and non-breaking end both true
// U+00A0 U+0020 U+00A0
else
buffer.append("\u00A0 \u00A0"sv);
}
//
// non-breaking start and non-breaking end false
// U+0020 U+00A0 U+0020
//
// non-breaking start true, non-breaking end false
// U+00A0 U+00A0 U+0020
//
// non-breaking start false, non-breaking end true
// U+0020 U+00A0 U+00A0
//
// non-breaking start and non-breaking end both true
// U+00A0 U+0020 U+00A0
// 8. Otherwise, append a two-code unit string to buffer depending on non-breaking start and
// non-breaking end:
else {
// non-breaking start and non-breaking end false
// non-breaking start true, non-breaking end false
// U+00A0 U+0020
if (!non_breaking_start && !non_breaking_end)
buffer.append("\u00A0 "sv);
//
// non-breaking start and non-breaking end false
// non-breaking start true, non-breaking end false
// U+00A0 U+0020
//
// non-breaking start false, non-breaking end true
// U+0020 U+00A0
//
// non-breaking start and non-breaking end both true
// U+00A0 U+00A0
// non-breaking start false, non-breaking end true
// U+0020 U+00A0
else if (!non_breaking_start)
buffer.append(" \u00A0"sv);
// non-breaking start and non-breaking end both true
// U+00A0 U+00A0
else
buffer.append("\u00A0\u00A0"sv);
// AD-HOC: Other browsers seem to ignore the above and deal differently with padding the remainder; the first
// remaining position is filled with the first character from repeated pair.
if (n > 0) {
buffer.append(repeated_pair.substring_view(0, 1) == " "sv ? " "sv : "\u00A0"sv);
--n;
}
// AD-HOC: Then, the final position is set depending on the value of non-breaking end.
if (n > 0)
buffer.append(non_breaking_end ? "\u00A0"sv : " "sv);
// 9. Return buffer.
return MUST(buffer.to_string());
}
@ -528,9 +524,11 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
// start is true if start offset is zero and start node follows a line break, and false
// otherwise. non-breaking end is true if end offset is end node's length and end node
// precedes a line break, and false otherwise.
// AD-HOC: Other browsers' behavior here is to set non_breaking_start to true if length > 1, so we add that
// condition as well.
auto replacement_whitespace = canonical_space_sequence(
length,
start_offset == 0 && follows_a_line_break(start_node),
(start_offset == 0 && follows_a_line_break(start_node)) || length > 1,
end_offset == end_node->length() && precedes_a_line_break(end_node));
// 10. While (start node, start offset) is before (end node, end offset):

View file

@ -1,2 +1,21 @@
--- a ---
Before: foobar
After: fooar
--- b ---
Before: a&nbsp;&nbsp;&nbsp;
After: a&nbsp;&nbsp;
--- c ---
Before: a&nbsp;&nbsp;b
After: a b
--- d ---
Before: a&nbsp;&nbsp;&nbsp;b
After: a&nbsp; b
--- e ---
Before: a&nbsp;&nbsp;&nbsp;&nbsp;b
After: a&nbsp; &nbsp;b
--- f ---
Before: a&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;b
After: a&nbsp; &nbsp; b
--- g ---
Before: &nbsp;&nbsp;b
After: &nbsp;b

View file

@ -1,19 +1,35 @@
<!DOCTYPE html>
<script src="../include.js"></script>
<div contenteditable="true">foobar</div>
<div id="a" contenteditable="true">foobar</div>
<div id="b" contenteditable="true">a&nbsp;&nbsp;&nbsp;</div>
<div id="c" contenteditable="true">a&nbsp;&nbsp;b</div>
<div id="d" contenteditable="true">a&nbsp;&nbsp;&nbsp;b</div>
<div id="e" contenteditable="true">a&nbsp;&nbsp;&nbsp;&nbsp;b</div>
<div id="f" contenteditable="true">a&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;b</div>
<div id="g" contenteditable="true">&nbsp;&nbsp;b</div>
<script>
test(() => {
var divElm = document.querySelector('div');
println(`Before: ${divElm.textContent}`);
const testForwardDelete = function(divId, position) {
println(`--- ${divId} ---`);
const divElm = document.querySelector(`div#${divId}`);
println(`Before: ${divElm.innerHTML}`);
// Put cursor after 'foo'
var range = document.createRange();
range.setStart(divElm.childNodes[0], 3);
getSelection().addRange(range);
// Place cursor
const node = divElm.childNodes[0];
getSelection().setBaseAndExtent(node, position, node, position);
// Press delete
document.execCommand('forwardDelete');
// Press delete
document.execCommand('forwardDelete');
println(`After: ${divElm.textContent}`);
println(`After: ${divElm.innerHTML}`);
};
testForwardDelete('a', 3);
testForwardDelete('b', 1);
testForwardDelete('c', 1);
testForwardDelete('d', 1);
testForwardDelete('e', 1);
testForwardDelete('f', 1);
testForwardDelete('g', 0);
});
</script>