LibWeb: Align editing whitespace canonicalization with other browsers

The spec calls for a couple of very specific whitespace padding
techniques whenever we canonicalize whitespace during the execution of
editing commands, but it seems that other browsers have a simpler
strategy - let's adopt theirs!
This commit is contained in:
Jelle Raaijmakers 2025-04-29 13:20:15 +02:00 committed by Andreas Kling
commit 6176b05ca5
Notes: github-actions[bot] 2025-04-29 13:33:03 +00:00
3 changed files with 81 additions and 48 deletions

View file

@ -292,55 +292,51 @@ String canonical_space_sequence(u32 length, bool non_breaking_start, bool non_br
auto repeated_pair = non_breaking_start ? "\u00A0 "sv : " \u00A0"sv; auto repeated_pair = non_breaking_start ? "\u00A0 "sv : " \u00A0"sv;
// 6. While n is greater than three, append repeated pair to buffer and subtract two from n. // 6. While n is greater than three, append repeated pair to buffer and subtract two from n.
while (n > 3) { // AD-HOC: Other browsers seem to fit in as many repeated pairs until the remaining length is <= 2.
while (n > 2) {
buffer.append(repeated_pair); buffer.append(repeated_pair);
n -= 2; n -= 2;
} }
// 7. If n is three, append a three-code unit string to buffer depending on non-breaking start // 7. If n is three, append a three-code unit string to buffer depending on non-breaking start
// and non-breaking end: // and non-breaking end:
if (n == 3) { //
// non-breaking start and non-breaking end false // non-breaking start and non-breaking end false
// U+0020 U+00A0 U+0020 // U+0020 U+00A0 U+0020
if (!non_breaking_start && !non_breaking_end) //
buffer.append(" \u00A0 "sv);
// non-breaking start true, non-breaking end false // non-breaking start true, non-breaking end false
// U+00A0 U+00A0 U+0020 // U+00A0 U+00A0 U+0020
else if (non_breaking_start && !non_breaking_end) //
buffer.append("\u00A0\u00A0 "sv);
// non-breaking start false, non-breaking end true // non-breaking start false, non-breaking end true
// U+0020 U+00A0 U+00A0 // U+0020 U+00A0 U+00A0
else if (!non_breaking_start) //
buffer.append(" \u00A0\u00A0"sv);
// non-breaking start and non-breaking end both true // non-breaking start and non-breaking end both true
// U+00A0 U+0020 U+00A0 // U+00A0 U+0020 U+00A0
else
buffer.append("\u00A0 \u00A0"sv);
}
// 8. Otherwise, append a two-code unit string to buffer depending on non-breaking start and // 8. Otherwise, append a two-code unit string to buffer depending on non-breaking start and
// non-breaking end: // non-breaking end:
else { //
// non-breaking start and non-breaking end false // non-breaking start and non-breaking end false
// non-breaking start true, non-breaking end false // non-breaking start true, non-breaking end false
// U+00A0 U+0020 // U+00A0 U+0020
if (!non_breaking_start && !non_breaking_end) //
buffer.append("\u00A0 "sv);
// non-breaking start false, non-breaking end true // non-breaking start false, non-breaking end true
// U+0020 U+00A0 // U+0020 U+00A0
else if (!non_breaking_start) //
buffer.append(" \u00A0"sv);
// non-breaking start and non-breaking end both true // non-breaking start and non-breaking end both true
// U+00A0 U+00A0 // U+00A0 U+00A0
else
buffer.append("\u00A0\u00A0"sv); // AD-HOC: Other browsers seem to ignore the above and deal differently with padding the remainder; the first
// remaining position is filled with the first character from repeated pair.
if (n > 0) {
buffer.append(repeated_pair.substring_view(0, 1) == " "sv ? " "sv : "\u00A0"sv);
--n;
} }
// AD-HOC: Then, the final position is set depending on the value of non-breaking end.
if (n > 0)
buffer.append(non_breaking_end ? "\u00A0"sv : " "sv);
// 9. Return buffer. // 9. Return buffer.
return MUST(buffer.to_string()); return MUST(buffer.to_string());
} }
@ -528,9 +524,11 @@ void canonicalize_whitespace(DOM::BoundaryPoint boundary, bool fix_collapsed_spa
// start is true if start offset is zero and start node follows a line break, and false // start is true if start offset is zero and start node follows a line break, and false
// otherwise. non-breaking end is true if end offset is end node's length and end node // otherwise. non-breaking end is true if end offset is end node's length and end node
// precedes a line break, and false otherwise. // precedes a line break, and false otherwise.
// AD-HOC: Other browsers' behavior here is to set non_breaking_start to true if length > 1, so we add that
// condition as well.
auto replacement_whitespace = canonical_space_sequence( auto replacement_whitespace = canonical_space_sequence(
length, length,
start_offset == 0 && follows_a_line_break(start_node), (start_offset == 0 && follows_a_line_break(start_node)) || length > 1,
end_offset == end_node->length() && precedes_a_line_break(end_node)); end_offset == end_node->length() && precedes_a_line_break(end_node));
// 10. While (start node, start offset) is before (end node, end offset): // 10. While (start node, start offset) is before (end node, end offset):

View file

@ -1,2 +1,21 @@
--- a ---
Before: foobar Before: foobar
After: fooar After: fooar
--- b ---
Before: a&nbsp;&nbsp;&nbsp;
After: a&nbsp;&nbsp;
--- c ---
Before: a&nbsp;&nbsp;b
After: a b
--- d ---
Before: a&nbsp;&nbsp;&nbsp;b
After: a&nbsp; b
--- e ---
Before: a&nbsp;&nbsp;&nbsp;&nbsp;b
After: a&nbsp; &nbsp;b
--- f ---
Before: a&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;b
After: a&nbsp; &nbsp; b
--- g ---
Before: &nbsp;&nbsp;b
After: &nbsp;b

View file

@ -1,19 +1,35 @@
<!DOCTYPE html> <!DOCTYPE html>
<script src="../include.js"></script> <script src="../include.js"></script>
<div contenteditable="true">foobar</div> <div id="a" contenteditable="true">foobar</div>
<div id="b" contenteditable="true">a&nbsp;&nbsp;&nbsp;</div>
<div id="c" contenteditable="true">a&nbsp;&nbsp;b</div>
<div id="d" contenteditable="true">a&nbsp;&nbsp;&nbsp;b</div>
<div id="e" contenteditable="true">a&nbsp;&nbsp;&nbsp;&nbsp;b</div>
<div id="f" contenteditable="true">a&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;b</div>
<div id="g" contenteditable="true">&nbsp;&nbsp;b</div>
<script> <script>
test(() => { test(() => {
var divElm = document.querySelector('div'); const testForwardDelete = function(divId, position) {
println(`Before: ${divElm.textContent}`); println(`--- ${divId} ---`);
const divElm = document.querySelector(`div#${divId}`);
println(`Before: ${divElm.innerHTML}`);
// Put cursor after 'foo' // Place cursor
var range = document.createRange(); const node = divElm.childNodes[0];
range.setStart(divElm.childNodes[0], 3); getSelection().setBaseAndExtent(node, position, node, position);
getSelection().addRange(range);
// Press delete // Press delete
document.execCommand('forwardDelete'); document.execCommand('forwardDelete');
println(`After: ${divElm.textContent}`); println(`After: ${divElm.innerHTML}`);
};
testForwardDelete('a', 3);
testForwardDelete('b', 1);
testForwardDelete('c', 1);
testForwardDelete('d', 1);
testForwardDelete('e', 1);
testForwardDelete('f', 1);
testForwardDelete('g', 0);
}); });
</script> </script>