LibWeb: Remove most uses of the old HTML parser

The only remaining client of the old parser is the fragment parser used
by the Element.innerHTML setter. We'll need to implement a bit more
stuff in the new parser before we can switch that over.
This commit is contained in:
Andreas Kling 2020-06-21 22:29:05 +02:00
parent c9d55e3b80
commit 07d976716f
Notes: sideshowbarker 2024-07-19 05:27:42 +09:00
5 changed files with 16 additions and 8 deletions

View file

@ -54,7 +54,7 @@ static RefPtr<Document> create_markdown_document(const ByteBuffer& data, const U
if (!markdown_document)
return nullptr;
return parse_html_document(markdown_document->render_to_html(), url);
return parse_html_document(markdown_document->render_to_html(), url, "utf-8");
}
static RefPtr<Document> create_text_document(const ByteBuffer& data, const URL& url)
@ -116,7 +116,7 @@ static RefPtr<Document> create_gemini_document(const ByteBuffer& data, const URL
{
auto markdown_document = Gemini::Document::parse({ (const char*)data.data(), data.size() }, url);
return parse_html_document(markdown_document->render_to_html(), url);
return parse_html_document(markdown_document->render_to_html(), url, "utf-8");
}
RefPtr<Document> FrameLoader::create_document_from_mime_type(const ByteBuffer& data, const URL& url, const String& mime_type, const String& encoding)
@ -190,7 +190,7 @@ void FrameLoader::load_error_page(const URL& failed_url, const String& error)
String::copy(data).characters(),
escape_html_entities(failed_url.to_string()).characters(),
escape_html_entities(error).characters());
auto document = parse_html_document(html, failed_url);
auto document = parse_html_document(html, failed_url, "utf-8");
ASSERT(document);
frame().set_document(document);
frame().page().client().page_did_change_title(document->title());

View file

@ -46,6 +46,13 @@
namespace Web {
RefPtr<Document> parse_html_document(const StringView& data, const URL& url, const String& encoding)
{
HTMLDocumentParser parser(data, encoding);
parser.run(url);
return parser.document();
}
HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding)
: m_tokenizer(input, encoding)
{

View file

@ -59,6 +59,8 @@
namespace Web {
RefPtr<Document> parse_html_document(const StringView&, const URL&, const String& encoding);
class HTMLDocumentParser {
public:
HTMLDocumentParser(const StringView& input, const String& encoding);

View file

@ -82,7 +82,7 @@ static Vector<char> codepoint_to_bytes(const u32 codepoint)
return bytes;
}
static bool parse_html_document(const StringView& html, Document& document, ParentNode& root)
static bool deprecated_parse_html_document(const StringView& html, Document& document, ParentNode& root)
{
NonnullRefPtrVector<ParentNode> node_stack;
node_stack.append(root);
@ -466,19 +466,19 @@ String to_utf8(const StringView& input, const String& encoding)
RefPtr<DocumentFragment> parse_html_fragment(Document& document, const StringView& raw_html, const String& encoding)
{
auto fragment = adopt(*new DocumentFragment(document));
if (!parse_html_document(to_utf8(raw_html, encoding), document, *fragment))
if (!deprecated_parse_html_document(to_utf8(raw_html, encoding), document, *fragment))
return nullptr;
return fragment;
}
RefPtr<Document> parse_html_document(const StringView& raw_html, const URL& url, const String& encoding)
RefPtr<Document> deprecated_parse_html_document(const StringView& raw_html, const URL& url, const String& encoding)
{
String html = to_utf8(raw_html, encoding);
auto document = adopt(*new Document(url));
document->set_source(html);
if (!parse_html_document(html, *document, *document))
if (!deprecated_parse_html_document(html, *document, *document))
return nullptr;
document->fixup();

View file

@ -33,7 +33,6 @@ namespace Web {
class DocumentFragment;
RefPtr<Document> parse_html_document(const StringView&, const URL& = URL(), const String& encoding = "utf-8");
RefPtr<DocumentFragment> parse_html_fragment(Document&, const StringView&, const String& encoding = "utf-8");
}