From c834c594ac88f410f8e01b9b72db4dd5408a5d4d Mon Sep 17 00:00:00 2001
From: Veeti Paananen <veeti.paananen@rojekti.fi>
Date: Wed, 9 Jul 2025 08:29:06 +0300
Subject: [PATCH] LibWeb: Set correct content and document types in DOMParser

It appears this was removed by accident in an earlier commit, regressing
the included WPT test.
---
 Libraries/LibWeb/HTML/DOMParser.cpp           |  2 +
 .../DOMParser-parseFromString-html.txt        | 15 ++++
 .../DOMParser-parseFromString-html.html       | 86 +++++++++++++++++++
 3 files changed, 103 insertions(+)
 create mode 100644 Tests/LibWeb/Text/expected/wpt-import/domparsing/DOMParser-parseFromString-html.txt
 create mode 100644 Tests/LibWeb/Text/input/wpt-import/domparsing/DOMParser-parseFromString-html.html
diff --git a/Libraries/LibWeb/HTML/DOMParser.cpp b/Libraries/LibWeb/HTML/DOMParser.cpp
index b1664fc4866..27bb8e6520b 100644
--- a/Libraries/LibWeb/HTML/DOMParser.cpp
+++ b/Libraries/LibWeb/HTML/DOMParser.cpp
@@ -49,6 +49,8 @@ GC::Ref<DOM::Document> DOMParser::parse_from_string(StringView string, Bindings:
     if (type == Bindings::DOMParserSupportedType::Text_Html) {
         // -> "text/html"
         document = HTML::HTMLDocument::create(realm(), associated_document.url());
+        document->set_content_type(Bindings::idl_enum_to_string(type));
+        document->set_document_type(DOM::Document::Type::HTML);
 
         // 1. Parse HTML from a string given document and compliantString. FIXME: Use compliantString.
         document->parse_html_from_a_string(string);
diff --git a/Tests/LibWeb/Text/expected/wpt-import/domparsing/DOMParser-parseFromString-html.txt b/Tests/LibWeb/Text/expected/wpt-import/domparsing/DOMParser-parseFromString-html.txt
new file mode 100644
index 00000000000..a3d5be7ecaa
--- /dev/null
+++ b/Tests/LibWeb/Text/expected/wpt-import/domparsing/DOMParser-parseFromString-html.txt
@@ -0,0 +1,15 @@
+Harness status: OK
+
+Found 10 tests
+
+10 Pass
+Pass	Parsing of id attribute
+Pass	contentType
+Pass	compatMode
+Pass	compatMode for a proper DOCTYPE
+Pass	Location value
+Pass	DOMParser parses HTML tag soup with no problems
+Pass	DOMParser should handle the content of <noembed> as raw text
+Pass	DOMParser throws on an invalid enum value
+Pass	script is found synchronously even when there is a css import
+Pass	must be parsed with scripting disabled, so noscript works
\ No newline at end of file
diff --git a/Tests/LibWeb/Text/input/wpt-import/domparsing/DOMParser-parseFromString-html.html b/Tests/LibWeb/Text/input/wpt-import/domparsing/DOMParser-parseFromString-html.html
new file mode 100644
index 00000000000..1dd263c39f1
--- /dev/null
+++ b/Tests/LibWeb/Text/input/wpt-import/domparsing/DOMParser-parseFromString-html.html
@@ -0,0 +1,86 @@
+<!doctype html>
+<title>DOMParser basic test of HTML parsing</title>
+<script src="../resources/testharness.js"></script>
+<script src="../resources/testharnessreport.js"></script>
+<script>
+// |expected| should be an object indicating the expected type of node.
+function assert_node(actual, expected) {
+    assert_true(actual instanceof expected.type,
+                'Node type mismatch: actual = ' + actual.constructor.name + ', expected = ' + expected.type.name);
+    if (typeof(expected.id) !== 'undefined')
+        assert_equals(actual.id, expected.id, expected.idMessage);
+}
+
+var doc;
+setup(function() {
+    var parser = new DOMParser();
+    var input = '<html id="root"><head></head><body></body></html>';
+    doc = parser.parseFromString(input, 'text/html');
+});
+
+test(function() {
+    var root = doc.documentElement;
+    assert_node(root, { type: HTMLHtmlElement, id: 'root',
+                        idMessage: 'documentElement id attribute should be root.' });
+}, 'Parsing of id attribute');
+
+test(function() {
+    assert_equals(doc.contentType, "text/html")
+}, 'contentType');
+
+test(function() {
+    assert_equals(doc.compatMode, "BackCompat")
+}, 'compatMode');
+
+test(function() {
+    var parser = new DOMParser();
+    var input = '<!DOCTYPE html><html id="root"><head></head><body></body></html>';
+    doc = parser.parseFromString(input, 'text/html');
+    assert_equals(doc.compatMode, "CSS1Compat")
+}, 'compatMode for a proper DOCTYPE');
+
+// URL- and encoding-related stuff tested separately.
+
+test(function() {
+    assert_equals(doc.location, null,
+                  'The document must have a location value of null.');
+}, 'Location value');
+
+test(function() {
+    var soup = "<!DOCTYPE foo></><foo></multiple></>";
+    var htmldoc = new DOMParser().parseFromString(soup, "text/html");
+    assert_equals(htmldoc.documentElement.localName, "html");
+    assert_equals(htmldoc.documentElement.namespaceURI, "http://www.w3.org/1999/xhtml");
+}, "DOMParser parses HTML tag soup with no problems");
+
+test(function() {
+   const doc = new DOMParser().parseFromString('<noembed>&lt;a&gt;</noembed>', 'text/html');
+   assert_equals(doc.querySelector('noembed').textContent, '&lt;a&gt;');
+}, 'DOMParser should handle the content of <noembed> as raw text');
+
+test(function() {
+    assert_throws_js(TypeError, function() {
+        new DOMParser().parseFromString("", "text/foo-this-is-invalid");
+    })
+}, "DOMParser throws on an invalid enum value")
+
+test(() => {
+   const doc = new DOMParser().parseFromString(`
+<html><body>
+<style>
+  @import url(/dummy.css)
+</style>
+<script>document.x = 8<\/script>
+</body></html>`, 'text/html');
+
+  assert_not_equals(doc.querySelector('script'), null, 'script must be found');
+  assert_equals(doc.x, undefined, 'script must not be executed on the inner document');
+  assert_equals(document.x, undefined, 'script must not be executed on the outer document');
+}, 'script is found synchronously even when there is a css import');
+
+test(() => {
+    const doc = new DOMParser().parseFromString(`<body><noscript><p id="test1">test1<p id="test2">test2</noscript>`, 'text/html');
+    assert_node(doc.body.firstChild.childNodes[0], { type: HTMLParagraphElement, id: 'test1' });
+    assert_node(doc.body.firstChild.childNodes[1], { type: HTMLParagraphElement, id: 'test2' });
+}, 'must be parsed with scripting disabled, so noscript works');
+</script>