LibWeb: Implement the first half of the Adoption Agency Algorithm

The AAA is a somewhat daunting algorithm you have to run for certain
tag when inserted inside the <body> element. The purpose of it is to
resolve issues with mismatched tags.

This patch implements the first half of the AAA. We also move the
"list of active formatting elements" to its own class, since it kept
accumulating little behaviors. "Marker" entries are now signified by
null Element pointers in the list.
This commit is contained in:
Andreas Kling 2020-05-27 23:22:42 +02:00
parent 4c9c6b3a7b
commit db6cf9b37d
Notes: sideshowbarker 2024-07-19 06:03:46 +09:00
7 changed files with 248 additions and 16 deletions

View file

@ -89,6 +89,7 @@ set(SOURCES
Parser/HTMLParser.cpp
Parser/HTMLToken.cpp
Parser/HTMLTokenizer.cpp
Parser/ListOfActiveFormattingElements.cpp
Parser/StackOfOpenElements.cpp
ResourceLoader.cpp
StylePropertiesModel.cpp

View file

@ -492,11 +492,12 @@ void HTMLDocumentParser::reconstruct_the_active_formatting_elements()
if (m_list_of_active_formatting_elements.is_empty())
return;
if (m_stack_of_open_elements.contains(m_list_of_active_formatting_elements.last()))
if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element))
return;
ssize_t index = m_list_of_active_formatting_elements.size() - 1;
RefPtr<Element> entry = m_list_of_active_formatting_elements.at(index);
ssize_t index = m_list_of_active_formatting_elements.entries().size() - 1;
RefPtr<Element> entry = m_list_of_active_formatting_elements.entries().at(index).element;
ASSERT(entry);
Rewind:
if (index == 0) {
@ -504,14 +505,16 @@ Rewind:
}
--index;
entry = m_list_of_active_formatting_elements.at(index);
entry = m_list_of_active_formatting_elements.entries().at(index).element;
ASSERT(entry);
if (!m_stack_of_open_elements.contains(*entry))
goto Rewind;
Advance:
++index;
entry = m_list_of_active_formatting_elements.at(index);
entry = m_list_of_active_formatting_elements.entries().at(index).element;
ASSERT(entry);
Create:
// FIXME: Hold on to the real token!
@ -520,12 +523,74 @@ Create:
fake_token.m_tag.tag_name.append(entry->tag_name());
auto new_element = insert_html_element(fake_token);
m_list_of_active_formatting_elements.ptr_at(index) = *new_element;
m_list_of_active_formatting_elements.entries().at(index).element = *new_element;
if (index != (ssize_t)m_list_of_active_formatting_elements.size() - 1)
if (index != (ssize_t)m_list_of_active_formatting_elements.entries().size() - 1)
goto Advance;
}
void HTMLDocumentParser::run_the_adoption_agency_algorithm(HTMLToken& token)
{
auto subject = token.tag_name();
// If the current node is an HTML element whose tag name is subject,
// and the current node is not in the list of active formatting elements,
// then pop the current node off the stack of open elements, and return.
if (current_node().tag_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) {
m_stack_of_open_elements.pop();
return;
}
size_t outer_loop_counter = 0;
//OuterLoop:
if (outer_loop_counter >= 8)
return;
++outer_loop_counter;
auto formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject);
if (!formatting_element) {
// FIXME: If there is no such element, then return and instead act as
// described in the "any other end tag" entry above.
TODO();
}
if (!m_stack_of_open_elements.contains(*formatting_element)) {
PARSE_ERROR();
// FIXME: If formatting element is not in the stack of open elements,
// then this is a parse error; remove the element from the list, and return.
TODO();
}
if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) {
PARSE_ERROR();
return;
}
if (formatting_element != &current_node()) {
PARSE_ERROR();
}
// FIXME: Let furthest block be the topmost node in the stack of open elements
// that is lower in the stack than formatting element, and is an element
// in the special category. There might not be one.
RefPtr<Element> furthest_block = nullptr;
if (!furthest_block) {
while (&current_node() != formatting_element)
m_stack_of_open_elements.pop();
m_stack_of_open_elements.pop();
m_list_of_active_formatting_elements.remove(*formatting_element);
return;
}
// FIXME: Implement the rest of the AAA :^)
TODO();
}
void HTMLDocumentParser::handle_in_body(HTMLToken& token)
{
if (token.is_character()) {
@ -602,13 +667,16 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token)
return;
}
{
if (token.is_start_tag() && token.tag_name().is_one_of("b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) {
reconstruct_the_active_formatting_elements();
auto element = insert_html_element(token);
m_list_of_active_formatting_elements.append(*element);
return;
}
if (token.is_start_tag() && token.tag_name().is_one_of("b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) {
reconstruct_the_active_formatting_elements();
auto element = insert_html_element(token);
m_list_of_active_formatting_elements.add(*element);
return;
}
if (token.is_end_tag() && token.tag_name().is_one_of("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u")) {
run_the_adoption_agency_algorithm(token);
return;
}
if (token.is_start_tag() && token.tag_name().is_one_of("address", "article", "aside", "blockquote", "center", "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", "section", "summary", "ul")) {
@ -746,6 +814,11 @@ void HTMLDocumentParser::handle_text(HTMLToken& token)
return;
}
if (token.is_end_tag() && token.tag_name() == "style") {
current_node().children_changed();
// NOTE: We don't return here, keep going.
}
if (token.is_end_tag()) {
m_stack_of_open_elements.pop();
m_insertion_mode = m_original_insertion_mode;

View file

@ -29,6 +29,7 @@
#include <AK/NonnullRefPtrVector.h>
#include <LibWeb/DOM/Node.h>
#include <LibWeb/Parser/HTMLTokenizer.h>
#include <LibWeb/Parser/ListOfActiveFormattingElements.h>
#include <LibWeb/Parser/StackOfOpenElements.h>
#define ENUMERATE_INSERTION_MODES \
@ -106,13 +107,13 @@ private:
void decrement_script_nesting_level();
size_t script_nesting_level() const { return m_script_nesting_level; }
void reset_the_insertion_mode_appropriately();
void run_the_adoption_agency_algorithm(HTMLToken&);
InsertionMode m_insertion_mode { InsertionMode::Initial };
InsertionMode m_original_insertion_mode { InsertionMode::Initial };
StackOfOpenElements m_stack_of_open_elements;
NonnullRefPtrVector<Element> m_list_of_active_formatting_elements;
ListOfActiveFormattingElements m_list_of_active_formatting_elements;
HTMLTokenizer m_tokenizer;

View file

@ -0,0 +1,74 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibWeb/DOM/Element.h>
#include <LibWeb/Parser/ListOfActiveFormattingElements.h>
namespace Web {
ListOfActiveFormattingElements::~ListOfActiveFormattingElements()
{
}
void ListOfActiveFormattingElements::add(Element& element)
{
m_entries.append({ element });
}
void ListOfActiveFormattingElements::add_marker()
{
m_entries.append({ nullptr });
}
bool ListOfActiveFormattingElements::contains(const Element& element) const
{
for (auto& entry : m_entries) {
if (entry.element == &element)
return true;
}
return false;
}
Element* ListOfActiveFormattingElements::last_element_with_tag_name_before_marker(const FlyString& tag_name)
{
for (ssize_t i = m_entries.size() - 1; i >= 0; --i) {
auto& entry = m_entries[i];
if (entry.is_marker())
return nullptr;
if (entry.element->tag_name() == tag_name)
return entry.element;
}
return nullptr;
}
void ListOfActiveFormattingElements::remove(Element& element)
{
m_entries.remove_first_matching([&](auto& entry) {
return entry.element == &element;
});
}
}

View file

@ -0,0 +1,63 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/NonnullRefPtrVector.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/Forward.h>
namespace Web {
class ListOfActiveFormattingElements {
public:
ListOfActiveFormattingElements() { }
~ListOfActiveFormattingElements();
struct Entry {
bool is_marker() const { return !element; }
RefPtr<Element> element;
};
bool is_empty() const { return m_entries.is_empty(); }
bool contains(const Element&) const;
void add(Element& element);
void add_marker();
void remove(Element&);
const Vector<Entry>& entries() const { return m_entries; }
Vector<Entry>& entries() { return m_entries; }
Element* last_element_with_tag_name_before_marker(const FlyString& tag_name);
private:
Vector<Entry> m_entries;
};
}

View file

@ -52,6 +52,23 @@ bool StackOfOpenElements::has_in_scope(const FlyString& tag_name) const
return has_in_scope_impl(tag_name, s_base_list);
}
bool StackOfOpenElements::has_in_scope_impl(const Element& target_node, const Vector<FlyString>& list) const
{
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
auto& node = m_elements.at(i);
if (&node == &target_node)
return true;
if (list.contains_slow(node.tag_name()))
return false;
}
ASSERT_NOT_REACHED();
}
bool StackOfOpenElements::has_in_scope(const Element& target_node) const
{
return has_in_scope_impl(target_node, s_base_list);
}
bool StackOfOpenElements::has_in_button_scope(const FlyString& tag_name) const
{
auto list = s_base_list;

View file

@ -48,12 +48,15 @@ public:
bool has_in_button_scope(const FlyString& tag_name) const;
bool has_in_table_scope(const FlyString& tag_name) const;
bool has_in_scope(const Element&) const;
bool contains(const Element&) const;
const NonnullRefPtrVector<Element>& elements() const { return m_elements; }
private:
bool has_in_scope_impl(const FlyString& tag_name, const Vector<FlyString>&) const;
bool has_in_scope_impl(const Element& target_node, const Vector<FlyString>&) const;
NonnullRefPtrVector<Element> m_elements;
};