LibWeb: Remove most uses of the old HTML parser

The only remaining client of the old parser is the fragment parser used
by the Element.innerHTML setter. We'll need to implement a bit more
stuff in the new parser before we can switch that over.
This commit is contained in:
Andreas Kling 2020-06-21 22:29:05 +02:00
parent c9d55e3b80
commit 07d976716f
Notes: sideshowbarker 2024-07-19 05:27:42 +09:00
5 changed files with 16 additions and 8 deletions

View file

@ -54,7 +54,7 @@ static RefPtr<Document> create_markdown_document(const ByteBuffer& data, const U
if (!markdown_document) if (!markdown_document)
return nullptr; return nullptr;
return parse_html_document(markdown_document->render_to_html(), url); return parse_html_document(markdown_document->render_to_html(), url, "utf-8");
} }
static RefPtr<Document> create_text_document(const ByteBuffer& data, const URL& url) static RefPtr<Document> create_text_document(const ByteBuffer& data, const URL& url)
@ -116,7 +116,7 @@ static RefPtr<Document> create_gemini_document(const ByteBuffer& data, const URL
{ {
auto markdown_document = Gemini::Document::parse({ (const char*)data.data(), data.size() }, url); auto markdown_document = Gemini::Document::parse({ (const char*)data.data(), data.size() }, url);
return parse_html_document(markdown_document->render_to_html(), url); return parse_html_document(markdown_document->render_to_html(), url, "utf-8");
} }
RefPtr<Document> FrameLoader::create_document_from_mime_type(const ByteBuffer& data, const URL& url, const String& mime_type, const String& encoding) RefPtr<Document> FrameLoader::create_document_from_mime_type(const ByteBuffer& data, const URL& url, const String& mime_type, const String& encoding)
@ -190,7 +190,7 @@ void FrameLoader::load_error_page(const URL& failed_url, const String& error)
String::copy(data).characters(), String::copy(data).characters(),
escape_html_entities(failed_url.to_string()).characters(), escape_html_entities(failed_url.to_string()).characters(),
escape_html_entities(error).characters()); escape_html_entities(error).characters());
auto document = parse_html_document(html, failed_url); auto document = parse_html_document(html, failed_url, "utf-8");
ASSERT(document); ASSERT(document);
frame().set_document(document); frame().set_document(document);
frame().page().client().page_did_change_title(document->title()); frame().page().client().page_did_change_title(document->title());

View file

@ -46,6 +46,13 @@
namespace Web { namespace Web {
RefPtr<Document> parse_html_document(const StringView& data, const URL& url, const String& encoding)
{
HTMLDocumentParser parser(data, encoding);
parser.run(url);
return parser.document();
}
HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding) HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding)
: m_tokenizer(input, encoding) : m_tokenizer(input, encoding)
{ {

View file

@ -59,6 +59,8 @@
namespace Web { namespace Web {
RefPtr<Document> parse_html_document(const StringView&, const URL&, const String& encoding);
class HTMLDocumentParser { class HTMLDocumentParser {
public: public:
HTMLDocumentParser(const StringView& input, const String& encoding); HTMLDocumentParser(const StringView& input, const String& encoding);

View file

@ -82,7 +82,7 @@ static Vector<char> codepoint_to_bytes(const u32 codepoint)
return bytes; return bytes;
} }
static bool parse_html_document(const StringView& html, Document& document, ParentNode& root) static bool deprecated_parse_html_document(const StringView& html, Document& document, ParentNode& root)
{ {
NonnullRefPtrVector<ParentNode> node_stack; NonnullRefPtrVector<ParentNode> node_stack;
node_stack.append(root); node_stack.append(root);
@ -466,19 +466,19 @@ String to_utf8(const StringView& input, const String& encoding)
RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding) RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding)
{ {
auto fragment = adopt(*new DocumentFragment(document)); auto fragment = adopt(*new DocumentFragment(document));
if (!parse_html_document(to_utf8(raw_html, encoding), document, *fragment)) if (!deprecated_parse_html_document(to_utf8(raw_html, encoding), document, *fragment))
return nullptr; return nullptr;
return fragment; return fragment;
} }
RefPtr<Document> parse_html_document(const StringView& raw_html, const URL& url, const String& encoding) RefPtr<Document> deprecated_parse_html_document(const StringView& raw_html, const URL& url, const String& encoding)
{ {
String html = to_utf8(raw_html, encoding); String html = to_utf8(raw_html, encoding);
auto document = adopt(*new Document(url)); auto document = adopt(*new Document(url));
document->set_source(html); document->set_source(html);
if (!parse_html_document(html, *document, *document)) if (!deprecated_parse_html_document(html, *document, *document))
return nullptr; return nullptr;
document->fixup(); document->fixup();

View file

@ -33,7 +33,6 @@ namespace Web {
class DocumentFragment; class DocumentFragment;
RefPtr<Document> parse_html_document(const StringView&, const URL& = URL(), const String& encoding = "utf-8");
RefPtr<DocumentFragment> parse_html_fragment(Document&, const StringView&, const String& encoding = "utf-8"); RefPtr<DocumentFragment> parse_html_fragment(Document&, const StringView&, const String& encoding = "utf-8");
} }