From c834c594ac88f410f8e01b9b72db4dd5408a5d4d Mon Sep 17 00:00:00 2001 From: Veeti Paananen Date: Wed, 9 Jul 2025 08:29:06 +0300 Subject: [PATCH] LibWeb: Set correct content and document types in DOMParser It appears this was removed by accident in an earlier commit, regressing the included WPT test. --- Libraries/LibWeb/HTML/DOMParser.cpp | 2 + .../DOMParser-parseFromString-html.txt | 15 ++++ .../DOMParser-parseFromString-html.html | 86 +++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 Tests/LibWeb/Text/expected/wpt-import/domparsing/DOMParser-parseFromString-html.txt create mode 100644 Tests/LibWeb/Text/input/wpt-import/domparsing/DOMParser-parseFromString-html.html diff --git a/Libraries/LibWeb/HTML/DOMParser.cpp b/Libraries/LibWeb/HTML/DOMParser.cpp index b1664fc4866..27bb8e6520b 100644 --- a/Libraries/LibWeb/HTML/DOMParser.cpp +++ b/Libraries/LibWeb/HTML/DOMParser.cpp @@ -49,6 +49,8 @@ GC::Ref DOMParser::parse_from_string(StringView string, Bindings: if (type == Bindings::DOMParserSupportedType::Text_Html) { // -> "text/html" document = HTML::HTMLDocument::create(realm(), associated_document.url()); + document->set_content_type(Bindings::idl_enum_to_string(type)); + document->set_document_type(DOM::Document::Type::HTML); // 1. Parse HTML from a string given document and compliantString. FIXME: Use compliantString. document->parse_html_from_a_string(string); diff --git a/Tests/LibWeb/Text/expected/wpt-import/domparsing/DOMParser-parseFromString-html.txt b/Tests/LibWeb/Text/expected/wpt-import/domparsing/DOMParser-parseFromString-html.txt new file mode 100644 index 00000000000..a3d5be7ecaa --- /dev/null +++ b/Tests/LibWeb/Text/expected/wpt-import/domparsing/DOMParser-parseFromString-html.txt @@ -0,0 +1,15 @@ +Harness status: OK + +Found 10 tests + +10 Pass +Pass Parsing of id attribute +Pass contentType +Pass compatMode +Pass compatMode for a proper DOCTYPE +Pass Location value +Pass DOMParser parses HTML tag soup with no problems +Pass DOMParser should handle the content of as raw text +Pass DOMParser throws on an invalid enum value +Pass script is found synchronously even when there is a css import +Pass must be parsed with scripting disabled, so noscript works \ No newline at end of file diff --git a/Tests/LibWeb/Text/input/wpt-import/domparsing/DOMParser-parseFromString-html.html b/Tests/LibWeb/Text/input/wpt-import/domparsing/DOMParser-parseFromString-html.html new file mode 100644 index 00000000000..1dd263c39f1 --- /dev/null +++ b/Tests/LibWeb/Text/input/wpt-import/domparsing/DOMParser-parseFromString-html.html @@ -0,0 +1,86 @@ +<!doctype html> +<title>DOMParser basic test of HTML parsing</title> +<script src="../resources/testharness.js"></script> +<script src="../resources/testharnessreport.js"></script> +<script> +// |expected| should be an object indicating the expected type of node. +function assert_node(actual, expected) { + assert_true(actual instanceof expected.type, + 'Node type mismatch: actual = ' + actual.constructor.name + ', expected = ' + expected.type.name); + if (typeof(expected.id) !== 'undefined') + assert_equals(actual.id, expected.id, expected.idMessage); +} + +var doc; +setup(function() { + var parser = new DOMParser(); + var input = '<html id="root"><head></head><body></body></html>'; + doc = parser.parseFromString(input, 'text/html'); +}); + +test(function() { + var root = doc.documentElement; + assert_node(root, { type: HTMLHtmlElement, id: 'root', + idMessage: 'documentElement id attribute should be root.' }); +}, 'Parsing of id attribute'); + +test(function() { + assert_equals(doc.contentType, "text/html") +}, 'contentType'); + +test(function() { + assert_equals(doc.compatMode, "BackCompat") +}, 'compatMode'); + +test(function() { + var parser = new DOMParser(); + var input = '<!DOCTYPE html><html id="root"><head></head><body></body></html>'; + doc = parser.parseFromString(input, 'text/html'); + assert_equals(doc.compatMode, "CSS1Compat") +}, 'compatMode for a proper DOCTYPE'); + +// URL- and encoding-related stuff tested separately. + +test(function() { + assert_equals(doc.location, null, + 'The document must have a location value of null.'); +}, 'Location value'); + +test(function() { + var soup = "<!DOCTYPE foo></><foo></multiple></>"; + var htmldoc = new DOMParser().parseFromString(soup, "text/html"); + assert_equals(htmldoc.documentElement.localName, "html"); + assert_equals(htmldoc.documentElement.namespaceURI, "http://www.w3.org/1999/xhtml"); +}, "DOMParser parses HTML tag soup with no problems"); + +test(function() { + const doc = new DOMParser().parseFromString('<noembed>&lt;a&gt;', 'text/html'); + assert_equals(doc.querySelector('noembed').textContent, '<a>'); +}, 'DOMParser should handle the content of as raw text'); + +test(function() { + assert_throws_js(TypeError, function() { + new DOMParser().parseFromString("", "text/foo-this-is-invalid"); + }) +}, "DOMParser throws on an invalid enum value") + +test(() => { + const doc = new DOMParser().parseFromString(` +<html><body> +<style> + @import url(/dummy.css) +</style> +<script>document.x = 8<\/script> +</body></html>`, 'text/html'); + + assert_not_equals(doc.querySelector('script'), null, 'script must be found'); + assert_equals(doc.x, undefined, 'script must not be executed on the inner document'); + assert_equals(document.x, undefined, 'script must not be executed on the outer document'); +}, 'script is found synchronously even when there is a css import'); + +test(() => { + const doc = new DOMParser().parseFromString(`<body><noscript><p id="test1">test1<p id="test2">test2</noscript>`, 'text/html'); + assert_node(doc.body.firstChild.childNodes[0], { type: HTMLParagraphElement, id: 'test1' }); + assert_node(doc.body.firstChild.childNodes[1], { type: HTMLParagraphElement, id: 'test2' }); +}, 'must be parsed with scripting disabled, so noscript works'); +</script>