LibWeb/HTML: Bail from HTML parsing when EOF hit on document.close

This fixes a crash in the included test that regressed in 0adf261,
and is hit by the following HTML:

```html
<body></body>
<script>
  const frame = document.body.appendChild(document.createElement("iframe"));
  frame.contentDocument.open();
  const child = frame.contentDocument.createElement("html")
  const html = frame.contentDocument.appendChild(child);
  frame.contentDocument.close();
</script>
```

I am not 100% sure this is fully the correct fix and there are other
cases which would not work properly. But it's definitely an improvement
to make the confuisingly named 'insert_an_eof' function of the tokenizer
actually do something.
This commit is contained in:
Shannon Booth 2025-02-09 12:47:50 +13:00 committed by Tim Ledbetter
commit 7441aa34e4
Notes: github-actions[bot] 2025-02-09 19:21:10 +00:00
4 changed files with 95 additions and 0 deletions

View file

@ -199,6 +199,9 @@ void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point
dbgln_if(HTML_PARSER_DEBUG, "[{}] {}", insertion_mode_name(), token.to_string());
if (token.is_end_of_file() && m_tokenizer.is_eof_inserted())
break;
// https://html.spec.whatwg.org/multipage/parsing.html#tree-construction-dispatcher
// As each token is emitted from the tokenizer, the user agent must follow the appropriate steps from the following list, known as the tree construction dispatcher:
if (m_stack_of_open_elements.is_empty()

View file

@ -0,0 +1,10 @@
Harness status: OK
Found 4 tests
3 Pass
1 Fail
Fail document.open() sets document to no-quirks mode (write no doctype)
Pass document.open() sets document to no-quirks mode (write old doctype)
Pass document.open() sets document to no-quirks mode (write new doctype)
Pass document.open() sets document to no-quirks mode, not limited-quirks mode

View file

@ -0,0 +1,8 @@
<!doctype html>
<meta charset=utf-8>
<script src="../../../../resources/testharness.js"></script>
<script src="../../../../resources/testharnessreport.js"></script>
<div id=log></div>
<script src="../../../../html/webappapis/dynamic-markup-insertion/opening-the-input-stream/quirks.window.js"></script>

View file

@ -0,0 +1,74 @@
test(t => {
const frame = document.body.appendChild(document.createElement("iframe"));
t.add_cleanup(() => frame.contentDocument.close());
assert_equals(frame.contentDocument.compatMode, "BackCompat");
frame.contentDocument.open();
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
frame.contentDocument.close();
assert_equals(frame.contentDocument.compatMode, "BackCompat");
}, "document.open() sets document to no-quirks mode (write no doctype)");
test(t => {
const frame = document.body.appendChild(document.createElement("iframe"));
t.add_cleanup(() => frame.contentDocument.close());
assert_equals(frame.contentDocument.compatMode, "BackCompat");
frame.contentDocument.open();
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
frame.contentDocument.write("<!doctype html public");
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
frame.contentDocument.write(" \"-//IETF//DTD HTML 3//\"");
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
frame.contentDocument.write(">");
assert_equals(frame.contentDocument.compatMode, "BackCompat");
frame.contentDocument.close();
assert_equals(frame.contentDocument.compatMode, "BackCompat");
}, "document.open() sets document to no-quirks mode (write old doctype)");
test(t => {
const frame = document.body.appendChild(document.createElement("iframe"));
t.add_cleanup(() => frame.contentDocument.close());
assert_equals(frame.contentDocument.compatMode, "BackCompat");
frame.contentDocument.open();
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
frame.contentDocument.write("<!doctype html");
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
frame.contentDocument.write(">");
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
frame.contentDocument.close();
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
}, "document.open() sets document to no-quirks mode (write new doctype)");
// This tests the document.open() call in fact sets the document to no-quirks
// mode, not limited-quirks mode. It is derived from
// quirks/blocks-ignore-line-height.html in WPT, as there is no direct way to
// distinguish between a no-quirks document and a limited-quirks document. It
// assumes that the user agent passes the linked test, which at the time of
// writing is all major web browsers.
test(t => {
const frame = document.body.appendChild(document.createElement("iframe"));
t.add_cleanup(() => frame.contentDocument.close());
assert_equals(frame.contentDocument.compatMode, "BackCompat");
frame.contentDocument.open();
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
// Create the DOM tree manually rather than going through document.write() to
// bypass the parser, which resets the document mode.
const html = frame.contentDocument.appendChild(frame.contentDocument.createElement("html"));
const body = html.appendChild(frame.contentDocument.createElement("body"));
assert_equals(frame.contentDocument.body, body);
body.innerHTML = `
<style>#ref { display:block }</style>
<div id=test><font size=1>x</font></div>
<font id=ref size=1>x</font>
<div id=s_ref>x</div>
`;
assert_equals(frame.contentDocument.compatMode, "CSS1Compat");
const idTest = frame.contentDocument.getElementById("test");
const idRef = frame.contentDocument.getElementById("ref");
const idSRef = frame.contentDocument.getElementById("s_ref");
assert_equals(frame.contentWindow.getComputedStyle(idTest).height,
frame.contentWindow.getComputedStyle(idSRef).height);
assert_not_equals(frame.contentWindow.getComputedStyle(idTest).height,
frame.contentWindow.getComputedStyle(idRef).height);
}, "document.open() sets document to no-quirks mode, not limited-quirks mode");