mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-08-01 21:59:07 +00:00
Everywhere: Hoist the Libraries folder to the top-level
This commit is contained in:
parent
950e819ee7
commit
93712b24bf
Notes:
github-actions[bot]
2024-11-10 11:51:52 +00:00
Author: https://github.com/trflynn89
Commit: 93712b24bf
Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/2256
Reviewed-by: https://github.com/sideshowbarker
4547 changed files with 104 additions and 113 deletions
2279
Libraries/LibWeb/HTML/Parser/Entities.cpp
Normal file
2279
Libraries/LibWeb/HTML/Parser/Entities.cpp
Normal file
File diff suppressed because it is too large
Load diff
21
Libraries/LibWeb/HTML/Parser/Entities.h
Normal file
21
Libraries/LibWeb/HTML/Parser/Entities.h
Normal file
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
struct EntityMatch {
|
||||
Vector<u32, 2> code_points;
|
||||
StringView entity;
|
||||
};
|
||||
|
||||
Optional<EntityMatch> code_points_from_entity(StringView);
|
||||
|
||||
}
|
416
Libraries/LibWeb/HTML/Parser/HTMLEncodingDetection.cpp
Normal file
416
Libraries/LibWeb/HTML/Parser/HTMLEncodingDetection.cpp
Normal file
|
@ -0,0 +1,416 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/DOM/Attr.h>
|
||||
#include <LibWeb/DOM/Document.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
|
||||
#include <LibWeb/Infra/CharacterTypes.h>
|
||||
#include <ctype.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
bool prescan_should_abort(ByteBuffer const& input, size_t const& position)
|
||||
{
|
||||
return position >= input.size() || position >= 1024;
|
||||
}
|
||||
|
||||
bool prescan_is_whitespace_or_slash(u8 const& byte)
|
||||
{
|
||||
return byte == '\t' || byte == '\n' || byte == '\f' || byte == '\r' || byte == ' ' || byte == '/';
|
||||
}
|
||||
|
||||
bool prescan_skip_whitespace_and_slashes(ByteBuffer const& input, size_t& position)
|
||||
{
|
||||
while (!prescan_should_abort(input, position) && (input[position] == '\t' || input[position] == '\n' || input[position] == '\f' || input[position] == '\r' || input[position] == ' ' || input[position] == '/'))
|
||||
++position;
|
||||
return !prescan_should_abort(input, position);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/urls-and-fetching.html#algorithm-for-extracting-a-character-encoding-from-a-meta-element
|
||||
Optional<StringView> extract_character_encoding_from_meta_element(ByteString const& string)
|
||||
{
|
||||
// Checking for "charset" is case insensitive, as is getting an encoding.
|
||||
// Therefore, stick to lowercase from the start for simplicity.
|
||||
auto lowercase_string = string.to_lowercase();
|
||||
GenericLexer lexer(lowercase_string);
|
||||
|
||||
for (;;) {
|
||||
auto charset_index = lexer.remaining().find("charset"sv);
|
||||
if (!charset_index.has_value())
|
||||
return {};
|
||||
|
||||
// 7 is the length of "charset".
|
||||
lexer.ignore(charset_index.value() + 7);
|
||||
|
||||
lexer.ignore_while([](char c) {
|
||||
return Infra::is_ascii_whitespace(c);
|
||||
});
|
||||
|
||||
if (lexer.peek() != '=')
|
||||
continue;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Ignore the '='.
|
||||
lexer.ignore();
|
||||
|
||||
lexer.ignore_while([](char c) {
|
||||
return Infra::is_ascii_whitespace(c);
|
||||
});
|
||||
|
||||
if (lexer.is_eof())
|
||||
return {};
|
||||
|
||||
if (lexer.consume_specific('"')) {
|
||||
auto matching_double_quote = lexer.remaining().find('"');
|
||||
if (!matching_double_quote.has_value())
|
||||
return {};
|
||||
|
||||
auto encoding = lexer.remaining().substring_view(0, matching_double_quote.value());
|
||||
return TextCodec::get_standardized_encoding(encoding);
|
||||
}
|
||||
|
||||
if (lexer.consume_specific('\'')) {
|
||||
auto matching_single_quote = lexer.remaining().find('\'');
|
||||
if (!matching_single_quote.has_value())
|
||||
return {};
|
||||
|
||||
auto encoding = lexer.remaining().substring_view(0, matching_single_quote.value());
|
||||
return TextCodec::get_standardized_encoding(encoding);
|
||||
}
|
||||
|
||||
auto encoding = lexer.consume_until([](char c) {
|
||||
return Infra::is_ascii_whitespace(c) || c == ';';
|
||||
});
|
||||
return TextCodec::get_standardized_encoding(encoding);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#concept-get-attributes-when-sniffing
|
||||
JS::GCPtr<DOM::Attr> prescan_get_attribute(DOM::Document& document, ByteBuffer const& input, size_t& position)
|
||||
{
|
||||
// 1. If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP), or 0x2F (/) then advance position to the next byte and redo this step.
|
||||
if (!prescan_skip_whitespace_and_slashes(input, position))
|
||||
return {};
|
||||
|
||||
// 2. If the byte at position is 0x3E (>), then abort the get an attribute algorithm. There isn't one.
|
||||
if (input[position] == '>')
|
||||
return {};
|
||||
|
||||
// 3. Otherwise, the byte at position is the start of the attribute name. Let attribute name and attribute value be the empty string.
|
||||
// 4. Process the byte at position as follows:
|
||||
StringBuilder attribute_name;
|
||||
while (true) {
|
||||
// -> If it is 0x3D (=), and the attribute name is longer than the empty string
|
||||
if (input[position] == '=' && !attribute_name.is_empty()) {
|
||||
// Advance position to the next byte and jump to the step below labeled value.
|
||||
++position;
|
||||
goto value;
|
||||
}
|
||||
// -> If it is 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), or 0x20 (SP)
|
||||
if (input[position] == '\t' || input[position] == '\n' || input[position] == '\f' || input[position] == '\r' || input[position] == ' ') {
|
||||
// Jump to the step below labeled spaces.
|
||||
goto spaces;
|
||||
}
|
||||
// -> If it is 0x2F (/) or 0x3E (>)
|
||||
if (input[position] == '/' || input[position] == '>') {
|
||||
// Abort the get an attribute algorithm. The attribute's name is the value of attribute name, its value is the empty string.
|
||||
return DOM::Attr::create(document, MUST(attribute_name.to_string()), String {});
|
||||
}
|
||||
// -> If it is in the range 0x41 (A) to 0x5A (Z)
|
||||
if (input[position] >= 'A' && input[position] <= 'Z') {
|
||||
// Append the code point b+0x20 to attribute name (where b is the value of the byte at position). (This converts the input to lowercase.)
|
||||
attribute_name.append_code_point(input[position] + 0x20);
|
||||
}
|
||||
// -> Anything else
|
||||
else {
|
||||
// Append the code point with the same value as the byte at position to attribute name.
|
||||
// (It doesn't actually matter how bytes outside the ASCII range are handled here,
|
||||
// since only ASCII bytes can contribute to the detection of a character encoding.)
|
||||
attribute_name.append_code_point(input[position]);
|
||||
}
|
||||
|
||||
// 5. Advance position to the next byte and return to the previous step.
|
||||
++position;
|
||||
if (prescan_should_abort(input, position))
|
||||
return {};
|
||||
}
|
||||
|
||||
spaces:
|
||||
// 6. Spaces: If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), or 0x20 (SP)
|
||||
// then advance position to the next byte, then, repeat this step.
|
||||
if (!prescan_skip_whitespace_and_slashes(input, position))
|
||||
return {};
|
||||
|
||||
// 7. If the byte at position is not 0x3D (=), abort the get an attribute algorithm.
|
||||
// The attribute's name is the value of attribute name, its value is the empty string.
|
||||
if (input[position] != '=')
|
||||
return DOM::Attr::create(document, MUST(attribute_name.to_string()), String {});
|
||||
|
||||
// 8. Advance position past the 0x3D (=) byte.
|
||||
++position;
|
||||
|
||||
value:
|
||||
// 9. Value: If the byte at position is one of 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), or 0x20 (SP)
|
||||
// then advance position to the next byte, then, repeat this step.
|
||||
if (!prescan_skip_whitespace_and_slashes(input, position))
|
||||
return {};
|
||||
|
||||
StringBuilder attribute_value;
|
||||
// 10. Process the byte at position as follows:
|
||||
|
||||
// -> If it is 0x22 (") or 0x27 (')
|
||||
if (input[position] == '"' || input[position] == '\'') {
|
||||
// 1. Let b be the value of the byte at position.
|
||||
u8 quote_character = input[position];
|
||||
|
||||
// 2. Quote loop: Advance position to the next byte.
|
||||
++position;
|
||||
|
||||
for (; !prescan_should_abort(input, position); ++position) {
|
||||
// 3. If the value of the byte at position is the value of b, then advance position to the next byte
|
||||
// and abort the "get an attribute" algorithm.
|
||||
// The attribute's name is the value of attribute name, and its value is the value of attribute value.
|
||||
if (input[position] == quote_character)
|
||||
return DOM::Attr::create(document, MUST(attribute_name.to_string()), MUST(attribute_value.to_string()));
|
||||
|
||||
// 4. Otherwise, if the value of the byte at position is in the range 0x41 (A) to 0x5A (Z),
|
||||
// then append a code point to attribute value whose value is 0x20 more than the value of the byte at position.
|
||||
if (input[position] >= 'A' && input[position] <= 'Z') {
|
||||
attribute_value.append_code_point(input[position] + 0x20);
|
||||
}
|
||||
// 5. Otherwise, append a code point to attribute value whose value is the same as the value of the byte at position.
|
||||
else {
|
||||
attribute_value.append_code_point(input[position]);
|
||||
}
|
||||
|
||||
// 6. Return to the step above labeled quote loop.
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// -> If it is 0x3E (>)
|
||||
if (input[position] == '>') {
|
||||
// Abort the get an attribute algorithm. The attribute's name is the value of attribute name, its value is the empty string.
|
||||
return DOM::Attr::create(document, MUST(attribute_name.to_string()), String {});
|
||||
}
|
||||
|
||||
// -> If it is in the range 0x41 (A) to 0x5A (Z)
|
||||
if (input[position] >= 'A' && input[position] <= 'Z') {
|
||||
// Append a code point b+0x20 to attribute value (where b is the value of the byte at position).
|
||||
attribute_value.append_code_point(input[position] + 0x20);
|
||||
// Advance position to the next byte.
|
||||
++position;
|
||||
}
|
||||
// -> Anything else
|
||||
else {
|
||||
// Append a code point with the same value as the byte at position to attribute value.
|
||||
attribute_value.append_code_point(input[position]);
|
||||
// Advance position to the next byte.
|
||||
++position;
|
||||
}
|
||||
|
||||
if (prescan_should_abort(input, position))
|
||||
return {};
|
||||
|
||||
// 11. Process the byte at position as follows:
|
||||
for (; !prescan_should_abort(input, position); ++position) {
|
||||
// -> If it is 0x09 (HT), 0x0A (LF), 0x0C (FF), 0x0D (CR), 0x20 (SP), or 0x3E (>)
|
||||
if (input[position] == '\t' || input[position] == '\n' || input[position] == '\f' || input[position] == '\r' || input[position] == ' ' || input[position] == '>') {
|
||||
// Abort the get an attribute algorithm. The attribute's name is the value of attribute name and its value is the value of attribute value.
|
||||
return DOM::Attr::create(document, MUST(attribute_name.to_string()), MUST(attribute_value.to_string()));
|
||||
}
|
||||
|
||||
// -> If it is in the range 0x41 (A) to 0x5A (Z)
|
||||
if (input[position] >= 'A' && input[position] <= 'Z') {
|
||||
// Append a code point b+0x20 to attribute value (where b is the value of the byte at position).
|
||||
attribute_value.append_code_point(input[position] + 0x20);
|
||||
}
|
||||
// -> Anything else
|
||||
else {
|
||||
// Append a code point with the same value as the byte at position to attribute value.
|
||||
attribute_value.append_code_point(input[position]);
|
||||
}
|
||||
|
||||
// 12. Advance position to the next byte and return to the previous step.
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#prescan-a-byte-stream-to-determine-its-encoding
|
||||
Optional<ByteString> run_prescan_byte_stream_algorithm(DOM::Document& document, ByteBuffer const& input)
|
||||
{
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#prescan-a-byte-stream-to-determine-its-encoding
|
||||
|
||||
// Detects '<?x'
|
||||
if (!prescan_should_abort(input, 5)) {
|
||||
// A sequence of bytes starting with: 0x3C, 0x0, 0x3F, 0x0, 0x78, 0x0
|
||||
if (input[0] == 0x3C && input[1] == 0x00 && input[2] == 0x3F && input[3] == 0x00 && input[4] == 0x78 && input[5] == 0x00)
|
||||
return "utf-16le";
|
||||
// A sequence of bytes starting with: 0x0, 0x3C, 0x0, 0x3F, 0x0, 0x78
|
||||
if (input[0] == 0x00 && input[1] == 0x3C && input[2] == 0x00 && input[3] == 0x3F && input[4] == 0x00 && input[5] == 0x78)
|
||||
return "utf-16be";
|
||||
}
|
||||
|
||||
for (size_t position = 0; !prescan_should_abort(input, position); ++position) {
|
||||
if (!prescan_should_abort(input, position + 5) && input[position] == '<' && input[position + 1] == '!'
|
||||
&& input[position + 2] == '-' && input[position + 3] == '-') {
|
||||
position += 2;
|
||||
for (; !prescan_should_abort(input, position + 3); ++position) {
|
||||
if (input[position] == '-' && input[position + 1] == '-' && input[position + 2] == '>') {
|
||||
position += 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (!prescan_should_abort(input, position + 6)
|
||||
&& input[position] == '<'
|
||||
&& (input[position + 1] == 'M' || input[position + 1] == 'm')
|
||||
&& (input[position + 2] == 'E' || input[position + 2] == 'e')
|
||||
&& (input[position + 3] == 'T' || input[position + 3] == 't')
|
||||
&& (input[position + 4] == 'A' || input[position + 4] == 'a')
|
||||
&& prescan_is_whitespace_or_slash(input[position + 5])) {
|
||||
position += 6;
|
||||
Vector<FlyString> attribute_list {};
|
||||
bool got_pragma = false;
|
||||
Optional<bool> need_pragma {};
|
||||
Optional<ByteString> charset {};
|
||||
|
||||
while (true) {
|
||||
auto attribute = prescan_get_attribute(document, input, position);
|
||||
if (!attribute)
|
||||
break;
|
||||
if (attribute_list.contains_slow(attribute->name()))
|
||||
continue;
|
||||
auto const& attribute_name = attribute->name();
|
||||
attribute_list.append(attribute->name());
|
||||
|
||||
if (attribute_name == "http-equiv") {
|
||||
got_pragma = attribute->value() == "content-type";
|
||||
} else if (attribute_name == "content") {
|
||||
auto encoding = extract_character_encoding_from_meta_element(attribute->value().to_byte_string());
|
||||
if (encoding.has_value() && !charset.has_value()) {
|
||||
charset = encoding.value();
|
||||
need_pragma = true;
|
||||
}
|
||||
} else if (attribute_name == "charset") {
|
||||
auto maybe_charset = TextCodec::get_standardized_encoding(attribute->value());
|
||||
if (maybe_charset.has_value()) {
|
||||
charset = Optional<ByteString> { maybe_charset };
|
||||
need_pragma = { false };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!need_pragma.has_value() || (need_pragma.value() && !got_pragma) || !charset.has_value())
|
||||
continue;
|
||||
if (charset.value() == "UTF-16BE/LE")
|
||||
return "UTF-8";
|
||||
else if (charset.value() == "x-user-defined")
|
||||
return "windows-1252";
|
||||
else
|
||||
return charset.value();
|
||||
} else if (!prescan_should_abort(input, position + 3) && input[position] == '<'
|
||||
&& ((input[position + 1] == '/' && isalpha(input[position + 2])) || isalpha(input[position + 1]))) {
|
||||
position += 2;
|
||||
prescan_skip_whitespace_and_slashes(input, position);
|
||||
while (prescan_get_attribute(document, input, position)) { };
|
||||
} else if (!prescan_should_abort(input, position + 1) && input[position] == '<' && (input[position + 1] == '!' || input[position + 1] == '/' || input[position + 1] == '?')) {
|
||||
position += 1;
|
||||
do {
|
||||
position += 1;
|
||||
if (prescan_should_abort(input, position))
|
||||
return {};
|
||||
} while (input[position] != '>');
|
||||
} else {
|
||||
// Do nothing.
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#bom-sniff
|
||||
Optional<ByteString> run_bom_sniff(ByteBuffer const& input)
|
||||
{
|
||||
if (input.size() >= 3) {
|
||||
// 1. Let BOM be the result of peeking 3 bytes from ioQueue, converted to a byte sequence.
|
||||
// 2. For each of the rows in the table below, starting with the first one and going down, if BOM starts with the bytes given in the first column, then return the encoding given in the cell in the second column of that row. Otherwise, return null.
|
||||
// Byte order mark Encoding
|
||||
// 0xEF 0xBB 0xBF UTF-8
|
||||
// 0xFE 0xFF UTF-16BE
|
||||
// 0xFF 0xFE UTF-16LE
|
||||
if (input[0] == 0xEF && input[1] == 0xBB && input[2] == 0xBF) {
|
||||
return "UTF-8";
|
||||
}
|
||||
if (input[0] == 0xFE && input[1] == 0xFF) {
|
||||
return "UTF-16BE";
|
||||
}
|
||||
if (input[0] == 0xFF && input[1] == 0xFE) {
|
||||
return "UTF-16LE";
|
||||
}
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
|
||||
ByteString run_encoding_sniffing_algorithm(DOM::Document& document, ByteBuffer const& input, Optional<MimeSniff::MimeType> maybe_mime_type)
|
||||
{
|
||||
// 1. If the result of BOM sniffing is an encoding, return that encoding with confidence certain.
|
||||
// FIXME: There is no concept of decoding certainty yet.
|
||||
auto bom = run_bom_sniff(input);
|
||||
if (bom.has_value())
|
||||
return bom.value();
|
||||
// 2. FIXME: If the user has explicitly instructed the user agent to override the document's character encoding with a specific encoding,
|
||||
// optionally return that encoding with the confidence certain.
|
||||
|
||||
// 3. FIXME: The user agent may wait for more bytes of the resource to be available, either in this step or at any later step in this algorithm.
|
||||
// For instance, a user agent might wait 500ms or 1024 bytes, whichever came first. In general preparsing the source to find the encoding improves performance,
|
||||
// as it reduces the need to throw away the data structures used when parsing upon finding the encoding information. However, if the user agent delays too long
|
||||
// to obtain data to determine the encoding, then the cost of the delay could outweigh any performance improvements from the preparse.
|
||||
|
||||
// 4. If the transport layer specifies a character encoding, and it is supported, return that encoding with the confidence certain.
|
||||
if (maybe_mime_type.has_value()) {
|
||||
// FIXME: This is awkward because lecacy_extract_an_encoding can not fail
|
||||
auto maybe_transport_encoding = Fetch::Infrastructure::legacy_extract_an_encoding(maybe_mime_type, "invalid"sv);
|
||||
if (maybe_transport_encoding != "invalid"sv)
|
||||
return maybe_transport_encoding;
|
||||
}
|
||||
|
||||
// 5. Optionally prescan the byte stream to determine its encoding, with the end condition being when the user agent decides that scanning further bytes would not
|
||||
// be efficient. User agents are encouraged to only prescan the first 1024 bytes. User agents may decide that scanning any bytes is not efficient, in which case
|
||||
// these substeps are entirely skipped.
|
||||
// The aforementioned algorithm returns either a character encoding or failure. If it returns a character encoding, then return the same encoding, with confidence tentative.
|
||||
auto prescan = run_prescan_byte_stream_algorithm(document, input);
|
||||
if (prescan.has_value())
|
||||
return prescan.value();
|
||||
|
||||
// 6. FIXME: If the HTML parser for which this algorithm is being run is associated with a Document d whose container document is non-null, then:
|
||||
// 1. Let parentDocument be d's container document.
|
||||
// 2. If parentDocument's origin is same origin with d's origin and parentDocument's character encoding is not UTF-16BE/LE, then return parentDocument's character
|
||||
// encoding, with the confidence tentative.
|
||||
|
||||
// 7. Otherwise, if the user agent has information on the likely encoding for this page, e.g. based on the encoding of the page when it was last visited, then return
|
||||
// that encoding, with the confidence tentative.
|
||||
|
||||
// 8. FIXME: The user agent may attempt to autodetect the character encoding from applying frequency analysis or other algorithms to the data stream. Such algorithms
|
||||
// may use information about the resource other than the resource's contents, including the address of the resource. If autodetection succeeds in determining a
|
||||
// character encoding, and that encoding is a supported encoding, then return that encoding, with the confidence tentative. [UNIVCHARDET]
|
||||
if (!Utf8View(StringView(input)).validate()) {
|
||||
// FIXME: As soon as Locale is supported, this should sometimes return a different encoding based on the locale.
|
||||
return "windows-1252";
|
||||
}
|
||||
|
||||
// 9. Otherwise, return an implementation-defined or user-specified default character encoding, with the confidence tentative.
|
||||
// In controlled environments or in environments where the encoding of documents can be prescribed (for example, for user agents intended for dedicated use in new
|
||||
// networks), the comprehensive UTF-8 encoding is suggested.
|
||||
return "UTF-8";
|
||||
}
|
||||
|
||||
}
|
25
Libraries/LibWeb/HTML/Parser/HTMLEncodingDetection.h
Normal file
25
Libraries/LibWeb/HTML/Parser/HTMLEncodingDetection.h
Normal file
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/ByteString.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <LibJS/Heap/GCPtr.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
bool prescan_should_abort(ByteBuffer const& input, size_t const& position);
|
||||
bool prescan_is_whitespace_or_slash(u8 const& byte);
|
||||
bool prescan_skip_whitespace_and_slashes(ByteBuffer const& input, size_t& position);
|
||||
Optional<StringView> extract_character_encoding_from_meta_element(ByteString const&);
|
||||
JS::GCPtr<DOM::Attr> prescan_get_attribute(DOM::Document&, ByteBuffer const& input, size_t& position);
|
||||
Optional<ByteString> run_prescan_byte_stream_algorithm(DOM::Document&, ByteBuffer const& input);
|
||||
Optional<ByteString> run_bom_sniff(ByteBuffer const& input);
|
||||
ByteString run_encoding_sniffing_algorithm(DOM::Document&, ByteBuffer const& input, Optional<MimeSniff::MimeType> maybe_mime_type = {});
|
||||
|
||||
}
|
5123
Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
Normal file
5123
Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
Normal file
File diff suppressed because it is too large
Load diff
221
Libraries/LibWeb/HTML/Parser/HTMLParser.h
Normal file
221
Libraries/LibWeb/HTML/Parser/HTMLParser.h
Normal file
|
@ -0,0 +1,221 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <LibGfx/Color.h>
|
||||
#include <LibJS/Heap/Cell.h>
|
||||
#include <LibWeb/DOM/Node.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
|
||||
#include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h>
|
||||
#include <LibWeb/HTML/Parser/StackOfOpenElements.h>
|
||||
#include <LibWeb/MimeSniff/MimeType.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
#define ENUMERATE_INSERTION_MODES \
|
||||
__ENUMERATE_INSERTION_MODE(Initial) \
|
||||
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
|
||||
__ENUMERATE_INSERTION_MODE(BeforeHead) \
|
||||
__ENUMERATE_INSERTION_MODE(InHead) \
|
||||
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterHead) \
|
||||
__ENUMERATE_INSERTION_MODE(InBody) \
|
||||
__ENUMERATE_INSERTION_MODE(Text) \
|
||||
__ENUMERATE_INSERTION_MODE(InTable) \
|
||||
__ENUMERATE_INSERTION_MODE(InTableText) \
|
||||
__ENUMERATE_INSERTION_MODE(InCaption) \
|
||||
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
|
||||
__ENUMERATE_INSERTION_MODE(InTableBody) \
|
||||
__ENUMERATE_INSERTION_MODE(InRow) \
|
||||
__ENUMERATE_INSERTION_MODE(InCell) \
|
||||
__ENUMERATE_INSERTION_MODE(InSelect) \
|
||||
__ENUMERATE_INSERTION_MODE(InSelectInTable) \
|
||||
__ENUMERATE_INSERTION_MODE(InTemplate) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterBody) \
|
||||
__ENUMERATE_INSERTION_MODE(InFrameset) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterAfterFrameset)
|
||||
|
||||
class HTMLParser final : public JS::Cell {
|
||||
JS_CELL(HTMLParser, JS::Cell);
|
||||
JS_DECLARE_ALLOCATOR(HTMLParser);
|
||||
|
||||
friend class HTMLTokenizer;
|
||||
|
||||
public:
|
||||
~HTMLParser();
|
||||
|
||||
static JS::NonnullGCPtr<HTMLParser> create_for_scripting(DOM::Document&);
|
||||
static JS::NonnullGCPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input, Optional<MimeSniff::MimeType> maybe_mime_type = {});
|
||||
static JS::NonnullGCPtr<HTMLParser> create(DOM::Document&, StringView input, StringView encoding);
|
||||
|
||||
void run(HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
|
||||
void run(const URL::URL&, HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
|
||||
|
||||
static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr);
|
||||
|
||||
DOM::Document& document();
|
||||
enum class AllowDeclarativeShadowRoots {
|
||||
No,
|
||||
Yes,
|
||||
};
|
||||
static Vector<JS::Handle<DOM::Node>> parse_html_fragment(DOM::Element& context_element, StringView, AllowDeclarativeShadowRoots = AllowDeclarativeShadowRoots::No);
|
||||
enum class SerializableShadowRoots {
|
||||
No,
|
||||
Yes,
|
||||
};
|
||||
static String serialize_html_fragment(DOM::Node const&, SerializableShadowRoots, Vector<JS::Handle<DOM::ShadowRoot>> const&, DOM::FragmentSerializationMode = DOM::FragmentSerializationMode::Inner);
|
||||
|
||||
enum class InsertionMode {
|
||||
#define __ENUMERATE_INSERTION_MODE(mode) mode,
|
||||
ENUMERATE_INSERTION_MODES
|
||||
#undef __ENUMERATE_INSERTION_MODE
|
||||
};
|
||||
|
||||
InsertionMode insertion_mode() const { return m_insertion_mode; }
|
||||
|
||||
static bool is_special_tag(FlyString const& tag_name, Optional<FlyString> const& namespace_);
|
||||
|
||||
HTMLTokenizer& tokenizer() { return m_tokenizer; }
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
|
||||
void abort();
|
||||
|
||||
bool aborted() const { return m_aborted; }
|
||||
bool stopped() const { return m_stop_parsing; }
|
||||
|
||||
size_t script_nesting_level() const { return m_script_nesting_level; }
|
||||
|
||||
private:
|
||||
HTMLParser(DOM::Document&, StringView input, StringView encoding);
|
||||
HTMLParser(DOM::Document&);
|
||||
|
||||
virtual void visit_edges(Cell::Visitor&) override;
|
||||
|
||||
char const* insertion_mode_name() const;
|
||||
|
||||
DOM::QuirksMode which_quirks_mode(HTMLToken const&) const;
|
||||
|
||||
void handle_initial(HTMLToken&);
|
||||
void handle_before_html(HTMLToken&);
|
||||
void handle_before_head(HTMLToken&);
|
||||
void handle_in_head(HTMLToken&);
|
||||
void handle_in_head_noscript(HTMLToken&);
|
||||
void handle_after_head(HTMLToken&);
|
||||
void handle_in_body(HTMLToken&);
|
||||
void handle_after_body(HTMLToken&);
|
||||
void handle_after_after_body(HTMLToken&);
|
||||
void handle_text(HTMLToken&);
|
||||
void handle_in_table(HTMLToken&);
|
||||
void handle_in_table_body(HTMLToken&);
|
||||
void handle_in_row(HTMLToken&);
|
||||
void handle_in_cell(HTMLToken&);
|
||||
void handle_in_table_text(HTMLToken&);
|
||||
void handle_in_select_in_table(HTMLToken&);
|
||||
void handle_in_select(HTMLToken&);
|
||||
void handle_in_caption(HTMLToken&);
|
||||
void handle_in_column_group(HTMLToken&);
|
||||
void handle_in_template(HTMLToken&);
|
||||
void handle_in_frameset(HTMLToken&);
|
||||
void handle_after_frameset(HTMLToken&);
|
||||
void handle_after_after_frameset(HTMLToken&);
|
||||
|
||||
void stop_parsing() { m_stop_parsing = true; }
|
||||
|
||||
void generate_implied_end_tags(FlyString const& exception = {});
|
||||
void generate_all_implied_end_tags_thoroughly();
|
||||
JS::NonnullGCPtr<DOM::Element> create_element_for(HTMLToken const&, Optional<FlyString> const& namespace_, DOM::Node& intended_parent);
|
||||
|
||||
struct AdjustedInsertionLocation {
|
||||
JS::GCPtr<DOM::Node> parent;
|
||||
JS::GCPtr<DOM::Node> insert_before_sibling;
|
||||
};
|
||||
|
||||
AdjustedInsertionLocation find_appropriate_place_for_inserting_node(JS::GCPtr<DOM::Element> override_target = nullptr);
|
||||
|
||||
void insert_an_element_at_the_adjusted_insertion_location(JS::NonnullGCPtr<DOM::Element>);
|
||||
|
||||
DOM::Text* find_character_insertion_node();
|
||||
void flush_character_insertions();
|
||||
enum class OnlyAddToElementStack {
|
||||
No,
|
||||
Yes,
|
||||
};
|
||||
JS::NonnullGCPtr<DOM::Element> insert_foreign_element(HTMLToken const&, Optional<FlyString> const& namespace_, OnlyAddToElementStack);
|
||||
JS::NonnullGCPtr<DOM::Element> insert_html_element(HTMLToken const&);
|
||||
[[nodiscard]] JS::GCPtr<DOM::Element> current_node();
|
||||
[[nodiscard]] JS::GCPtr<DOM::Element> adjusted_current_node();
|
||||
[[nodiscard]] JS::GCPtr<DOM::Element> node_before_current_node();
|
||||
void insert_character(u32 data);
|
||||
void insert_comment(HTMLToken&);
|
||||
void reconstruct_the_active_formatting_elements();
|
||||
void close_a_p_element();
|
||||
void process_using_the_rules_for(InsertionMode, HTMLToken&);
|
||||
void process_using_the_rules_for_foreign_content(HTMLToken&);
|
||||
void parse_generic_raw_text_element(HTMLToken&);
|
||||
void increment_script_nesting_level();
|
||||
void decrement_script_nesting_level();
|
||||
void reset_the_insertion_mode_appropriately();
|
||||
|
||||
void adjust_mathml_attributes(HTMLToken&);
|
||||
void adjust_svg_tag_names(HTMLToken&);
|
||||
void adjust_svg_attributes(HTMLToken&);
|
||||
static void adjust_foreign_attributes(HTMLToken&);
|
||||
|
||||
enum AdoptionAgencyAlgorithmOutcome {
|
||||
DoNothing,
|
||||
RunAnyOtherEndTagSteps,
|
||||
};
|
||||
|
||||
AdoptionAgencyAlgorithmOutcome run_the_adoption_agency_algorithm(HTMLToken&);
|
||||
void clear_the_stack_back_to_a_table_context();
|
||||
void clear_the_stack_back_to_a_table_body_context();
|
||||
void clear_the_stack_back_to_a_table_row_context();
|
||||
void close_the_cell();
|
||||
|
||||
InsertionMode m_insertion_mode { InsertionMode::Initial };
|
||||
InsertionMode m_original_insertion_mode { InsertionMode::Initial };
|
||||
|
||||
StackOfOpenElements m_stack_of_open_elements;
|
||||
Vector<InsertionMode> m_stack_of_template_insertion_modes;
|
||||
ListOfActiveFormattingElements m_list_of_active_formatting_elements;
|
||||
|
||||
HTMLTokenizer m_tokenizer;
|
||||
|
||||
bool m_foster_parenting { false };
|
||||
bool m_frameset_ok { true };
|
||||
bool m_parsing_fragment { false };
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#scripting-flag
|
||||
// The scripting flag is set to "enabled" if scripting was enabled for the Document with which the parser is associated when the parser was created, and "disabled" otherwise.
|
||||
bool m_scripting_enabled { true };
|
||||
|
||||
bool m_invoked_via_document_write { false };
|
||||
bool m_aborted { false };
|
||||
bool m_parser_pause_flag { false };
|
||||
bool m_stop_parsing { false };
|
||||
size_t m_script_nesting_level { 0 };
|
||||
|
||||
JS::Realm& realm();
|
||||
|
||||
JS::GCPtr<DOM::Document> m_document;
|
||||
JS::GCPtr<HTMLHeadElement> m_head_element;
|
||||
JS::GCPtr<HTMLFormElement> m_form_element;
|
||||
JS::GCPtr<DOM::Element> m_context_element;
|
||||
|
||||
Vector<HTMLToken> m_pending_table_character_tokens;
|
||||
|
||||
JS::GCPtr<DOM::Text> m_character_insertion_node;
|
||||
StringBuilder m_character_insertion_builder;
|
||||
};
|
||||
|
||||
RefPtr<CSS::CSSStyleValue> parse_dimension_value(StringView);
|
||||
RefPtr<CSS::CSSStyleValue> parse_nonzero_dimension_value(StringView);
|
||||
Optional<Color> parse_legacy_color_value(StringView);
|
||||
|
||||
}
|
105
Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
Normal file
105
Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
Normal file
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/HashTable.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLToken.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
String HTMLToken::to_string() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
||||
switch (type()) {
|
||||
case HTMLToken::Type::DOCTYPE:
|
||||
builder.append("DOCTYPE"sv);
|
||||
builder.append(" { name: '"sv);
|
||||
builder.append(doctype_data().name);
|
||||
builder.append("' }"sv);
|
||||
break;
|
||||
case HTMLToken::Type::StartTag:
|
||||
builder.append("StartTag"sv);
|
||||
break;
|
||||
case HTMLToken::Type::EndTag:
|
||||
builder.append("EndTag"sv);
|
||||
break;
|
||||
case HTMLToken::Type::Comment:
|
||||
builder.append("Comment"sv);
|
||||
break;
|
||||
case HTMLToken::Type::Character:
|
||||
builder.append("Character"sv);
|
||||
break;
|
||||
case HTMLToken::Type::EndOfFile:
|
||||
builder.append("EndOfFile"sv);
|
||||
break;
|
||||
case HTMLToken::Type::Invalid:
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
if (type() == HTMLToken::Type::StartTag || type() == HTMLToken::Type::EndTag) {
|
||||
builder.append(" { name: '"sv);
|
||||
builder.append(tag_name());
|
||||
builder.append("', { "sv);
|
||||
for_each_attribute([&](auto& attribute) {
|
||||
builder.append(attribute.local_name);
|
||||
builder.append("=\""sv);
|
||||
builder.append(attribute.value);
|
||||
builder.append("\" "sv);
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
builder.append("} }"sv);
|
||||
}
|
||||
|
||||
if (is_comment()) {
|
||||
builder.append(" { data: '"sv);
|
||||
builder.append(comment());
|
||||
builder.append("' }"sv);
|
||||
}
|
||||
|
||||
if (is_character()) {
|
||||
builder.append(" { data: '"sv);
|
||||
builder.append_code_point(code_point());
|
||||
builder.append("' }"sv);
|
||||
}
|
||||
|
||||
if (type() == HTMLToken::Type::Character) {
|
||||
builder.appendff("@{}:{}", m_start_position.line, m_start_position.column);
|
||||
} else {
|
||||
builder.appendff("@{}:{}-{}:{}", m_start_position.line, m_start_position.column, m_end_position.line, m_end_position.column);
|
||||
}
|
||||
|
||||
return MUST(builder.to_string());
|
||||
}
|
||||
|
||||
void HTMLToken::normalize_attributes()
|
||||
{
|
||||
// From AttributeNameState: https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
|
||||
//
|
||||
// When the user agent leaves the attribute name state (and before emitting the tag token, if appropriate),
|
||||
// the complete attribute's name must be compared to the other attributes on the same token;
|
||||
// if there is already an attribute on the token with the exact same name, then this is a duplicate-attribute
|
||||
// parse error and the new attribute must be removed from the token.
|
||||
|
||||
// NOTE: If an attribute is so removed from a token, it, and the value that gets associated with it, if any,
|
||||
// are never subsequently used by the parser, and are therefore effectively discarded. Removing the attribute
|
||||
// in this way does not change its status as the "current attribute" for the purposes of the tokenizer, however.
|
||||
|
||||
HashTable<FlyString> seen_attributes;
|
||||
auto* ptr = tag_attributes();
|
||||
if (!ptr)
|
||||
return;
|
||||
auto& tag_attributes = *ptr;
|
||||
for (size_t i = 0; i < tag_attributes.size(); ++i) {
|
||||
auto& attribute = tag_attributes[i];
|
||||
if (seen_attributes.set(attribute.local_name, AK::HashSetExistingEntryBehavior::Keep) == AK::HashSetResult::KeptExistingEntry) {
|
||||
// This is a duplicate attribute, remove it.
|
||||
tag_attributes.remove(i);
|
||||
--i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
371
Libraries/LibWeb/HTML/Parser/HTMLToken.h
Normal file
371
Libraries/LibWeb/HTML/Parser/HTMLToken.h
Normal file
|
@ -0,0 +1,371 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2021, Max Wipfli <max.wipfli@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/FlyString.h>
|
||||
#include <AK/Function.h>
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/Variant.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
class HTMLTokenizer;
|
||||
|
||||
class HTMLToken {
|
||||
AK_MAKE_NONCOPYABLE(HTMLToken);
|
||||
AK_MAKE_DEFAULT_MOVABLE(HTMLToken);
|
||||
|
||||
public:
|
||||
enum class Type : u8 {
|
||||
Invalid,
|
||||
DOCTYPE,
|
||||
StartTag,
|
||||
EndTag,
|
||||
Comment,
|
||||
Character,
|
||||
EndOfFile,
|
||||
};
|
||||
|
||||
struct Position {
|
||||
size_t line { 0 };
|
||||
size_t column { 0 };
|
||||
size_t byte_offset { 0 };
|
||||
};
|
||||
|
||||
struct Attribute {
|
||||
Optional<FlyString> prefix;
|
||||
FlyString local_name;
|
||||
Optional<FlyString> namespace_;
|
||||
String value;
|
||||
Position name_start_position;
|
||||
Position value_start_position;
|
||||
Position name_end_position;
|
||||
Position value_end_position;
|
||||
};
|
||||
|
||||
struct DoctypeData {
|
||||
// NOTE: "Missing" is a distinct state from the empty string.
|
||||
String name;
|
||||
String public_identifier;
|
||||
String system_identifier;
|
||||
bool missing_name { true };
|
||||
bool missing_public_identifier { true };
|
||||
bool missing_system_identifier { true };
|
||||
bool force_quirks { false };
|
||||
};
|
||||
|
||||
static HTMLToken make_character(u32 code_point)
|
||||
{
|
||||
HTMLToken token { Type::Character };
|
||||
token.set_code_point(code_point);
|
||||
return token;
|
||||
}
|
||||
|
||||
static HTMLToken make_start_tag(FlyString const& tag_name)
|
||||
{
|
||||
HTMLToken token { Type::StartTag };
|
||||
token.set_tag_name(tag_name);
|
||||
return token;
|
||||
}
|
||||
|
||||
HTMLToken() = default;
|
||||
|
||||
HTMLToken(Type type)
|
||||
: m_type(type)
|
||||
{
|
||||
switch (m_type) {
|
||||
case Type::Character:
|
||||
m_data.set(0u);
|
||||
break;
|
||||
case Type::DOCTYPE:
|
||||
m_data.set(OwnPtr<DoctypeData> {});
|
||||
break;
|
||||
case Type::StartTag:
|
||||
case Type::EndTag:
|
||||
m_data.set(OwnPtr<Vector<Attribute>>());
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_doctype() const { return m_type == Type::DOCTYPE; }
|
||||
bool is_start_tag() const { return m_type == Type::StartTag; }
|
||||
bool is_end_tag() const { return m_type == Type::EndTag; }
|
||||
bool is_comment() const { return m_type == Type::Comment; }
|
||||
bool is_character() const { return m_type == Type::Character; }
|
||||
bool is_end_of_file() const { return m_type == Type::EndOfFile; }
|
||||
|
||||
u32 code_point() const
|
||||
{
|
||||
VERIFY(is_character());
|
||||
return m_data.get<u32>();
|
||||
}
|
||||
|
||||
bool is_parser_whitespace() const
|
||||
{
|
||||
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
|
||||
if (!is_character())
|
||||
return false;
|
||||
switch (code_point()) {
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case ' ':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void set_code_point(u32 code_point)
|
||||
{
|
||||
VERIFY(is_character());
|
||||
m_data.get<u32>() = code_point;
|
||||
}
|
||||
|
||||
String const& comment() const
|
||||
{
|
||||
VERIFY(is_comment());
|
||||
return m_comment_data;
|
||||
}
|
||||
|
||||
void set_comment(String comment)
|
||||
{
|
||||
VERIFY(is_comment());
|
||||
m_comment_data = move(comment);
|
||||
}
|
||||
|
||||
FlyString const& tag_name() const
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
return m_string_data;
|
||||
}
|
||||
|
||||
void set_tag_name(FlyString name)
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
m_string_data = move(name);
|
||||
}
|
||||
|
||||
bool is_self_closing() const
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
return m_tag_self_closing;
|
||||
}
|
||||
|
||||
void set_self_closing(bool self_closing)
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
m_tag_self_closing = self_closing;
|
||||
}
|
||||
|
||||
bool has_acknowledged_self_closing_flag() const
|
||||
{
|
||||
VERIFY(is_self_closing());
|
||||
return m_tag_self_closing_acknowledged;
|
||||
}
|
||||
|
||||
void acknowledge_self_closing_flag_if_set()
|
||||
{
|
||||
if (is_self_closing())
|
||||
m_tag_self_closing_acknowledged = true;
|
||||
}
|
||||
|
||||
bool has_attributes() const
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
auto* ptr = tag_attributes();
|
||||
return ptr && !ptr->is_empty();
|
||||
}
|
||||
|
||||
size_t attribute_count() const
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
if (auto* ptr = tag_attributes())
|
||||
return ptr->size();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void add_attribute(Attribute attribute)
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
ensure_tag_attributes().append(move(attribute));
|
||||
}
|
||||
|
||||
Attribute const& last_attribute() const
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
VERIFY(has_attributes());
|
||||
return tag_attributes()->last();
|
||||
}
|
||||
|
||||
Attribute& last_attribute()
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
VERIFY(has_attributes());
|
||||
return tag_attributes()->last();
|
||||
}
|
||||
|
||||
void drop_attributes()
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
m_data.get<OwnPtr<Vector<Attribute>>>().clear();
|
||||
}
|
||||
|
||||
void for_each_attribute(Function<IterationDecision(Attribute const&)> callback) const
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
auto* ptr = tag_attributes();
|
||||
if (!ptr)
|
||||
return;
|
||||
for (auto& attribute : *ptr) {
|
||||
if (callback(attribute) == IterationDecision::Break)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void for_each_attribute(Function<IterationDecision(Attribute&)> callback)
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
auto* ptr = tag_attributes();
|
||||
if (!ptr)
|
||||
return;
|
||||
for (auto& attribute : *ptr) {
|
||||
if (callback(attribute) == IterationDecision::Break)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Optional<String> attribute(FlyString const& attribute_name) const
|
||||
{
|
||||
if (auto result = raw_attribute(attribute_name); result.has_value())
|
||||
return result->value;
|
||||
return {};
|
||||
}
|
||||
|
||||
Optional<Attribute const&> raw_attribute(FlyString const& attribute_name) const
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
|
||||
auto* ptr = tag_attributes();
|
||||
if (!ptr)
|
||||
return {};
|
||||
for (auto const& attribute : *ptr) {
|
||||
if (attribute_name == attribute.local_name)
|
||||
return attribute;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
bool has_attribute(FlyString const& attribute_name) const
|
||||
{
|
||||
return attribute(attribute_name).has_value();
|
||||
}
|
||||
|
||||
void adjust_tag_name(FlyString const& old_name, FlyString const& new_name)
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
if (old_name == tag_name())
|
||||
set_tag_name(new_name);
|
||||
}
|
||||
|
||||
void adjust_attribute_name(FlyString const& old_name, FlyString const& new_name)
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
for_each_attribute([&](Attribute& attribute) {
|
||||
if (old_name == attribute.local_name)
|
||||
attribute.local_name = new_name;
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
}
|
||||
|
||||
void adjust_foreign_attribute(FlyString const& old_name, Optional<FlyString> const& prefix, FlyString const& local_name, Optional<FlyString> const& namespace_)
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
for_each_attribute([&](Attribute& attribute) {
|
||||
if (old_name == attribute.local_name) {
|
||||
attribute.prefix = prefix;
|
||||
attribute.local_name = local_name;
|
||||
attribute.namespace_ = namespace_;
|
||||
}
|
||||
return IterationDecision::Continue;
|
||||
});
|
||||
}
|
||||
|
||||
DoctypeData const& doctype_data() const
|
||||
{
|
||||
VERIFY(is_doctype());
|
||||
auto* ptr = m_data.get<OwnPtr<DoctypeData>>().ptr();
|
||||
VERIFY(ptr);
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
DoctypeData& ensure_doctype_data()
|
||||
{
|
||||
VERIFY(is_doctype());
|
||||
auto& ptr = m_data.get<OwnPtr<DoctypeData>>();
|
||||
if (!ptr)
|
||||
ptr = make<DoctypeData>();
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
Type type() const { return m_type; }
|
||||
|
||||
String to_string() const;
|
||||
|
||||
Position const& start_position() const { return m_start_position; }
|
||||
Position const& end_position() const { return m_end_position; }
|
||||
|
||||
void set_start_position(Badge<HTMLTokenizer>, Position start_position) { m_start_position = start_position; }
|
||||
void set_end_position(Badge<HTMLTokenizer>, Position end_position) { m_end_position = end_position; }
|
||||
|
||||
void normalize_attributes();
|
||||
|
||||
private:
|
||||
Vector<Attribute> const* tag_attributes() const
|
||||
{
|
||||
return m_data.get<OwnPtr<Vector<Attribute>>>().ptr();
|
||||
}
|
||||
|
||||
Vector<Attribute>* tag_attributes()
|
||||
{
|
||||
return m_data.get<OwnPtr<Vector<Attribute>>>().ptr();
|
||||
}
|
||||
|
||||
Vector<Attribute>& ensure_tag_attributes()
|
||||
{
|
||||
VERIFY(is_start_tag() || is_end_tag());
|
||||
auto& ptr = m_data.get<OwnPtr<Vector<Attribute>>>();
|
||||
if (!ptr)
|
||||
ptr = make<Vector<Attribute>>();
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
Type m_type { Type::Invalid };
|
||||
|
||||
// Type::StartTag and Type::EndTag
|
||||
bool m_tag_self_closing { false };
|
||||
bool m_tag_self_closing_acknowledged { false };
|
||||
|
||||
// Type::StartTag and Type::EndTag (tag name)
|
||||
FlyString m_string_data;
|
||||
|
||||
// Type::Comment (comment data)
|
||||
String m_comment_data;
|
||||
|
||||
Variant<Empty, u32, OwnPtr<DoctypeData>, OwnPtr<Vector<Attribute>>> m_data {};
|
||||
|
||||
Position m_start_position;
|
||||
Position m_end_position;
|
||||
};
|
||||
|
||||
}
|
211
Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Normal file
211
Libraries/LibWeb/HTML/Parser/HTMLToken.swift
Normal file
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Andrew Kaster <andrew@ladybird.org>>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
@_exported import WebCxx
|
||||
|
||||
public class HTMLToken {
|
||||
public struct Position: Equatable {
|
||||
var line = UInt()
|
||||
var column = UInt()
|
||||
var byteOffset = UInt()
|
||||
}
|
||||
|
||||
public struct Attribute: Equatable {
|
||||
public var prefix: Swift.String? = nil
|
||||
public var localName: Swift.String
|
||||
public var namespace_: Swift.String? = nil
|
||||
public var value: Swift.String
|
||||
public var nameStartPosition = Position()
|
||||
public var nameEndPosition = Position()
|
||||
public var valueStartPosition = Position()
|
||||
public var valueEndPosition = Position()
|
||||
|
||||
public init(localName: Swift.String, value: Swift.String) {
|
||||
self.localName = localName
|
||||
self.value = value
|
||||
}
|
||||
}
|
||||
|
||||
public enum TokenType: Equatable {
|
||||
case Invalid
|
||||
case DOCTYPE(
|
||||
name: Swift.String?,
|
||||
publicIdentifier: Swift.String?,
|
||||
systemIdentifier: Swift.String?,
|
||||
forceQuirksMode: Bool)
|
||||
case StartTag(
|
||||
tagName: Swift.String,
|
||||
selfClosing: Bool = false,
|
||||
selfClosingAcknowledged: Bool = false,
|
||||
attributes: [Attribute] = [])
|
||||
case EndTag(
|
||||
tagName: Swift.String,
|
||||
selfClosing: Bool = false,
|
||||
selfClosingAcknowledged: Bool = false,
|
||||
attributes: [Attribute] = [])
|
||||
case Comment(data: Swift.String)
|
||||
case Character(codePoint: Character)
|
||||
case EndOfFile
|
||||
}
|
||||
|
||||
public func isCharacter() -> Bool {
|
||||
if case .Character(_) = self.type {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public func isEndTag() -> Bool {
|
||||
if case .EndTag(_, _, _, _) = self.type {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public func isStartTag() -> Bool {
|
||||
if case .StartTag(_, _, _, _) = self.type {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
public func isTag() -> Bool {
|
||||
return isStartTag() || isEndTag()
|
||||
}
|
||||
|
||||
public func isParserWhitespace() -> Bool {
|
||||
precondition(isCharacter(), "isParserWhitespace() called on non-character token")
|
||||
|
||||
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
|
||||
switch self.type {
|
||||
case .Character(codePoint: "\t"),
|
||||
.Character(codePoint: "\n"),
|
||||
.Character(codePoint: "\u{000C}"), // \f
|
||||
.Character(codePoint: "\r"),
|
||||
.Character(codePoint: " "):
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
public var type = TokenType.Invalid
|
||||
public var startPosition = Position()
|
||||
public var endPosition = Position()
|
||||
|
||||
// Is in-place mutating enums a thing? Seems not https://forums.swift.org/t/in-place-mutation-of-an-enum-associated-value/11747
|
||||
public var attributes: [Attribute] {
|
||||
get {
|
||||
switch self.type {
|
||||
case .StartTag(_, _, _, let attributes):
|
||||
return attributes
|
||||
case .EndTag(_, _, _, let attributes):
|
||||
return attributes
|
||||
default:
|
||||
preconditionFailure("attributes called on non-tag token")
|
||||
}
|
||||
}
|
||||
set {
|
||||
switch self.type {
|
||||
case .StartTag(let tagName, let selfClosing, let selfClosingAcknowledged, attributes: _):
|
||||
self.type = .StartTag(tagName: tagName, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: newValue)
|
||||
case .EndTag(let tagName, let selfClosing, let selfClosingAcknowledged, attributes: _):
|
||||
self.type = .EndTag(tagName: tagName, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: newValue)
|
||||
default:
|
||||
preconditionFailure("attributes= called on non-tag token")
|
||||
}
|
||||
}
|
||||
}
|
||||
public var tagName: Swift.String {
|
||||
get {
|
||||
switch self.type {
|
||||
case .StartTag(let tagName, _, _, _):
|
||||
return tagName
|
||||
case .EndTag(let tagName, _, _, _):
|
||||
return tagName
|
||||
default:
|
||||
preconditionFailure("tagName called on non-tag token")
|
||||
}
|
||||
}
|
||||
set {
|
||||
switch self.type {
|
||||
case .StartTag(tagName: _, let selfClosing, let selfClosingAcknowledged, let attributes):
|
||||
self.type = .StartTag(tagName: newValue, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
|
||||
case .EndTag(tagName: _, let selfClosing, let selfClosingAcknowledged, let attributes):
|
||||
self.type = .EndTag(tagName: newValue, selfClosing: selfClosing, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
|
||||
default:
|
||||
preconditionFailure("tagName= called on non-tag token")
|
||||
}
|
||||
}
|
||||
}
|
||||
public var selfClosing: Bool {
|
||||
get {
|
||||
switch self.type {
|
||||
case .StartTag(_, let selfClosing, _, _):
|
||||
return selfClosing
|
||||
case .EndTag(_, let selfClosing, _, _):
|
||||
return selfClosing
|
||||
default:
|
||||
preconditionFailure("selfClosing called on non-tag token")
|
||||
}
|
||||
}
|
||||
set {
|
||||
switch self.type {
|
||||
case .StartTag(let tagName, _, let selfClosingAcknowledged, let attributes):
|
||||
self.type = .StartTag(tagName: tagName, selfClosing: newValue, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
|
||||
case .EndTag(let tagName, _, let selfClosingAcknowledged, let attributes):
|
||||
self.type = .EndTag(tagName: tagName, selfClosing: newValue, selfClosingAcknowledged: selfClosingAcknowledged, attributes: attributes)
|
||||
default:
|
||||
preconditionFailure("selfClosing= called on non-tag token")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public init() {}
|
||||
public init(type: TokenType) {
|
||||
self.type = type
|
||||
}
|
||||
}
|
||||
|
||||
extension HTMLToken.Position: CustomStringConvertible {
|
||||
public var description: Swift.String {
|
||||
return "\(self.line):\(self.column)"
|
||||
}
|
||||
}
|
||||
|
||||
extension HTMLToken.TokenType: CustomStringConvertible {
|
||||
// FIXME: Print attributes for start/end tags
|
||||
public var description: Swift.String {
|
||||
switch self {
|
||||
case .Invalid:
|
||||
return "Invalid"
|
||||
case .DOCTYPE(let name, let publicIdentifier, let systemIdentifier, let forceQuirksMode):
|
||||
return "DOCTYPE(name: \(name ?? "nil"), publicIdentifier: \(publicIdentifier ?? "nil"), systemIdentifier: \(systemIdentifier ?? "nil"), forceQuirksMode: \(forceQuirksMode))"
|
||||
case .StartTag(let tagName, let selfClosing, let selfClosingAcknowledged, let attributes):
|
||||
return "StartTag(tagName: \(tagName), selfClosing: \(selfClosing), selfClosingAcknowledged: \(selfClosingAcknowledged), attributes: \(attributes))"
|
||||
case .EndTag(let tagName, let selfClosing, let selfClosingAcknowledged, let attributes):
|
||||
return "EndTag(tagName: \(tagName), selfClosing: \(selfClosing), selfClosingAcknowledged: \(selfClosingAcknowledged), attributes: \(attributes))"
|
||||
case .Comment(let data):
|
||||
return "Comment(data: \(data))"
|
||||
case .Character(let codePoint):
|
||||
return "Character(codePoint: \(codePoint))"
|
||||
case .EndOfFile:
|
||||
return "EndOfFile"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extension HTMLToken: CustomStringConvertible {
|
||||
public var description: Swift.String {
|
||||
if self.startPosition == Position() {
|
||||
return "HTMLToken(type: \(self.type))"
|
||||
} else if self.endPosition == Position() {
|
||||
return "HTMLToken(type: \(self.type))@\(self.startPosition)"
|
||||
} else {
|
||||
return "HTMLToken(type: \(self.type))@\(self.startPosition)-\(self.endPosition)"
|
||||
}
|
||||
}
|
||||
}
|
2908
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
Normal file
2908
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
Normal file
File diff suppressed because it is too large
Load diff
223
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
Normal file
223
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
Normal file
|
@ -0,0 +1,223 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <andreas@ladybird.org>
|
||||
* Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Queue.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibJS/Heap/GCPtr.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLToken.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
#define ENUMERATE_TOKENIZER_STATES \
|
||||
__ENUMERATE_TOKENIZER_STATE(Data) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RCDATA) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RAWTEXT) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptData) \
|
||||
__ENUMERATE_TOKENIZER_STATE(PLAINTEXT) \
|
||||
__ENUMERATE_TOKENIZER_STATE(TagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(EndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(TagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RCDATALessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RCDATAEndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RCDATAEndTagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RAWTEXTLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RAWTEXTEndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RAWTEXTEndTagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEndTagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapeStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapeStartDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscaped) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedDashDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedEndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedEndTagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapeStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscaped) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedDashDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapeEnd) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeAttributeName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AttributeName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterAttributeName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeAttributeValue) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AttributeValueDoubleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AttributeValueSingleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AttributeValueUnquoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterAttributeValueQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(SelfClosingStartTag) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BogusComment) \
|
||||
__ENUMERATE_TOKENIZER_STATE(MarkupDeclarationOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentStartDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(Comment) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBang) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBangDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBangDashDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentEndDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentEnd) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentEndBang) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPE) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPEName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPEName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEPublicKeyword) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPEPublicIdentifier) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPEPublicIdentifierDoubleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPEPublicIdentifierSingleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEPublicIdentifier) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BetweenDOCTYPEPublicAndSystemIdentifiers) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPESystemKeyword) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPESystemIdentifier) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPESystemIdentifierDoubleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPESystemIdentifierSingleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPESystemIdentifier) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BogusDOCTYPE) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CDATASection) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CDATASectionBracket) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CDATASectionEnd) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(NamedCharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AmbiguousAmpersand) \
|
||||
__ENUMERATE_TOKENIZER_STATE(NumericCharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(HexadecimalCharacterReferenceStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DecimalCharacterReferenceStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(HexadecimalCharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DecimalCharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(NumericCharacterReferenceEnd)
|
||||
|
||||
class HTMLTokenizer {
|
||||
public:
|
||||
explicit HTMLTokenizer();
|
||||
explicit HTMLTokenizer(StringView input, ByteString const& encoding);
|
||||
|
||||
enum class State {
|
||||
#define __ENUMERATE_TOKENIZER_STATE(state) state,
|
||||
ENUMERATE_TOKENIZER_STATES
|
||||
#undef __ENUMERATE_TOKENIZER_STATE
|
||||
};
|
||||
|
||||
enum class StopAtInsertionPoint {
|
||||
No,
|
||||
Yes,
|
||||
};
|
||||
Optional<HTMLToken> next_token(StopAtInsertionPoint = StopAtInsertionPoint::No);
|
||||
|
||||
void set_parser(Badge<HTMLParser>, HTMLParser& parser) { m_parser = &parser; }
|
||||
|
||||
void switch_to(Badge<HTMLParser>, State new_state);
|
||||
void switch_to(State new_state)
|
||||
{
|
||||
m_state = new_state;
|
||||
}
|
||||
|
||||
void set_blocked(bool b) { m_blocked = b; }
|
||||
bool is_blocked() const { return m_blocked; }
|
||||
|
||||
ByteString source() const { return m_decoded_input; }
|
||||
|
||||
void insert_input_at_insertion_point(StringView input);
|
||||
void insert_eof();
|
||||
bool is_eof_inserted();
|
||||
|
||||
bool is_insertion_point_defined() const { return m_insertion_point.defined; }
|
||||
bool is_insertion_point_reached()
|
||||
{
|
||||
return m_insertion_point.defined && m_utf8_view.iterator_offset(m_utf8_iterator) >= m_insertion_point.position;
|
||||
}
|
||||
void undefine_insertion_point() { m_insertion_point.defined = false; }
|
||||
void store_insertion_point() { m_old_insertion_point = m_insertion_point; }
|
||||
void restore_insertion_point() { m_insertion_point = m_old_insertion_point; }
|
||||
void update_insertion_point()
|
||||
{
|
||||
m_insertion_point.defined = true;
|
||||
m_insertion_point.position = m_utf8_view.iterator_offset(m_utf8_iterator);
|
||||
}
|
||||
|
||||
// This permanently cuts off the tokenizer input stream.
|
||||
void abort() { m_aborted = true; }
|
||||
|
||||
private:
|
||||
void skip(size_t count);
|
||||
Optional<u32> next_code_point();
|
||||
Optional<u32> peek_code_point(size_t offset) const;
|
||||
bool consume_next_if_match(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive);
|
||||
void create_new_token(HTMLToken::Type);
|
||||
bool current_end_tag_token_is_appropriate() const;
|
||||
String consume_current_builder();
|
||||
|
||||
static char const* state_name(State state)
|
||||
{
|
||||
switch (state) {
|
||||
#define __ENUMERATE_TOKENIZER_STATE(state) \
|
||||
case State::state: \
|
||||
return #state;
|
||||
ENUMERATE_TOKENIZER_STATES
|
||||
#undef __ENUMERATE_TOKENIZER_STATE
|
||||
};
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
void will_emit(HTMLToken&);
|
||||
void will_switch_to(State);
|
||||
void will_reconsume_in(State);
|
||||
|
||||
bool consumed_as_part_of_an_attribute() const;
|
||||
|
||||
void restore_to(Utf8CodePointIterator const& new_iterator);
|
||||
HTMLToken::Position nth_last_position(size_t n = 0);
|
||||
|
||||
JS::GCPtr<HTMLParser> m_parser;
|
||||
|
||||
State m_state { State::Data };
|
||||
State m_return_state { State::Data };
|
||||
|
||||
Vector<u32> m_temporary_buffer;
|
||||
|
||||
ByteString m_decoded_input;
|
||||
|
||||
struct InsertionPoint {
|
||||
size_t position { 0 };
|
||||
bool defined { false };
|
||||
};
|
||||
InsertionPoint m_insertion_point {};
|
||||
InsertionPoint m_old_insertion_point {};
|
||||
|
||||
Utf8View m_utf8_view;
|
||||
Utf8CodePointIterator m_utf8_iterator;
|
||||
Utf8CodePointIterator m_prev_utf8_iterator;
|
||||
|
||||
HTMLToken m_current_token;
|
||||
StringBuilder m_current_builder;
|
||||
|
||||
Optional<ByteString> m_last_emitted_start_tag_name;
|
||||
|
||||
bool m_explicit_eof_inserted { false };
|
||||
bool m_has_emitted_eof { false };
|
||||
|
||||
Queue<HTMLToken> m_queued_tokens;
|
||||
|
||||
u32 m_character_reference_code { 0 };
|
||||
|
||||
bool m_blocked { false };
|
||||
|
||||
bool m_aborted { false };
|
||||
|
||||
Vector<HTMLToken::Position> m_source_positions;
|
||||
};
|
||||
|
||||
}
|
1233
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.swift
Normal file
1233
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.swift
Normal file
File diff suppressed because it is too large
Load diff
23
Libraries/LibWeb/HTML/Parser/HTMLTokenizerHelpers.cpp
Normal file
23
Libraries/LibWeb/HTML/Parser/HTMLTokenizerHelpers.cpp
Normal file
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Andrew Kaster <akaster@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLTokenizerHelpers.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
OptionalString decode_to_utf8(StringView text, StringView encoding)
|
||||
{
|
||||
auto decoder = TextCodec::decoder_for(encoding);
|
||||
if (!decoder.has_value())
|
||||
return std::nullopt;
|
||||
auto decoded_or_error = decoder.value().to_utf8(text);
|
||||
if (decoded_or_error.is_error())
|
||||
return std::nullopt;
|
||||
return decoded_or_error.release_value();
|
||||
}
|
||||
|
||||
}
|
19
Libraries/LibWeb/HTML/Parser/HTMLTokenizerHelpers.h
Normal file
19
Libraries/LibWeb/HTML/Parser/HTMLTokenizerHelpers.h
Normal file
|
@ -0,0 +1,19 @@
|
|||
/*
|
||||
* Copyright (c) 2024, Andrew Kaster <akaster@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <optional>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
// Swift-friendly wrapper for TextCodec::Decoder::to_utf8
|
||||
using OptionalString = std::optional<String>;
|
||||
OptionalString decode_to_utf8(StringView text, StringView encoding);
|
||||
|
||||
}
|
|
@ -0,0 +1,90 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
ListOfActiveFormattingElements::~ListOfActiveFormattingElements() = default;
|
||||
|
||||
void ListOfActiveFormattingElements::visit_edges(JS::Cell::Visitor& visitor)
|
||||
{
|
||||
for (auto& entry : m_entries)
|
||||
visitor.visit(entry.element);
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::add(DOM::Element& element)
|
||||
{
|
||||
// FIXME: Implement the Noah's Ark clause https://html.spec.whatwg.org/multipage/parsing.html#push-onto-the-list-of-active-formatting-elements
|
||||
m_entries.append({ element });
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::add_marker()
|
||||
{
|
||||
m_entries.append({ nullptr });
|
||||
}
|
||||
|
||||
bool ListOfActiveFormattingElements::contains(const DOM::Element& element) const
|
||||
{
|
||||
for (auto& entry : m_entries) {
|
||||
if (entry.element.ptr() == &element)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
DOM::Element* ListOfActiveFormattingElements::last_element_with_tag_name_before_marker(FlyString const& tag_name)
|
||||
{
|
||||
for (ssize_t i = m_entries.size() - 1; i >= 0; --i) {
|
||||
auto& entry = m_entries[i];
|
||||
if (entry.is_marker())
|
||||
return nullptr;
|
||||
if (entry.element->local_name() == tag_name)
|
||||
return entry.element.ptr();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::remove(DOM::Element& element)
|
||||
{
|
||||
m_entries.remove_first_matching([&](auto& entry) {
|
||||
return entry.element.ptr() == &element;
|
||||
});
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::clear_up_to_the_last_marker()
|
||||
{
|
||||
while (!m_entries.is_empty()) {
|
||||
auto entry = m_entries.take_last();
|
||||
if (entry.is_marker())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
Optional<size_t> ListOfActiveFormattingElements::find_index(DOM::Element const& element) const
|
||||
{
|
||||
for (size_t i = 0; i < m_entries.size(); i++) {
|
||||
if (m_entries[i].element.ptr() == &element)
|
||||
return i;
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::replace(DOM::Element& to_remove, DOM::Element& to_add)
|
||||
{
|
||||
for (size_t i = 0; i < m_entries.size(); i++) {
|
||||
if (m_entries[i].element.ptr() == &to_remove)
|
||||
m_entries[i].element = JS::make_handle(to_add);
|
||||
}
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::insert_at(size_t index, DOM::Element& element)
|
||||
{
|
||||
m_entries.insert(index, { element });
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
class ListOfActiveFormattingElements {
|
||||
public:
|
||||
ListOfActiveFormattingElements() = default;
|
||||
~ListOfActiveFormattingElements();
|
||||
|
||||
struct Entry {
|
||||
bool is_marker() const { return !element; }
|
||||
|
||||
JS::GCPtr<DOM::Element> element;
|
||||
};
|
||||
|
||||
bool is_empty() const { return m_entries.is_empty(); }
|
||||
bool contains(const DOM::Element&) const;
|
||||
|
||||
void add(DOM::Element& element);
|
||||
void add_marker();
|
||||
void insert_at(size_t index, DOM::Element& element);
|
||||
|
||||
void replace(DOM::Element& to_remove, DOM::Element& to_add);
|
||||
|
||||
void remove(DOM::Element&);
|
||||
|
||||
Vector<Entry> const& entries() const { return m_entries; }
|
||||
Vector<Entry>& entries() { return m_entries; }
|
||||
|
||||
DOM::Element* last_element_with_tag_name_before_marker(FlyString const& tag_name);
|
||||
|
||||
void clear_up_to_the_last_marker();
|
||||
|
||||
Optional<size_t> find_index(DOM::Element const&) const;
|
||||
|
||||
void visit_edges(JS::Cell::Visitor&);
|
||||
|
||||
private:
|
||||
Vector<Entry> m_entries;
|
||||
};
|
||||
|
||||
}
|
189
Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp
Normal file
189
Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp
Normal file
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLParser.h>
|
||||
#include <LibWeb/HTML/Parser/StackOfOpenElements.h>
|
||||
#include <LibWeb/Namespace.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
static Vector<FlyString> s_base_list { "applet"_fly_string, "caption"_fly_string, "html"_fly_string, "table"_fly_string, "td"_fly_string, "th"_fly_string, "marquee"_fly_string, "object"_fly_string, "template"_fly_string };
|
||||
|
||||
StackOfOpenElements::~StackOfOpenElements() = default;
|
||||
|
||||
void StackOfOpenElements::visit_edges(JS::Cell::Visitor& visitor)
|
||||
{
|
||||
visitor.visit(m_elements);
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_scope_impl(FlyString const& tag_name, Vector<FlyString> const& list) const
|
||||
{
|
||||
for (auto const& element : m_elements.in_reverse()) {
|
||||
if (element->local_name() == tag_name)
|
||||
return true;
|
||||
if (list.contains_slow(element->local_name()))
|
||||
return false;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_scope(FlyString const& tag_name) const
|
||||
{
|
||||
return has_in_scope_impl(tag_name, s_base_list);
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_scope_impl(const DOM::Element& target_node, Vector<FlyString> const& list) const
|
||||
{
|
||||
for (auto& element : m_elements.in_reverse()) {
|
||||
if (element.ptr() == &target_node)
|
||||
return true;
|
||||
if (list.contains_slow(element->local_name()))
|
||||
return false;
|
||||
}
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_scope(const DOM::Element& target_node) const
|
||||
{
|
||||
return has_in_scope_impl(target_node, s_base_list);
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_button_scope(FlyString const& tag_name) const
|
||||
{
|
||||
auto list = s_base_list;
|
||||
list.append("button"_fly_string);
|
||||
return has_in_scope_impl(tag_name, list);
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_table_scope(FlyString const& tag_name) const
|
||||
{
|
||||
return has_in_scope_impl(tag_name, { "html"_fly_string, "table"_fly_string, "template"_fly_string });
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_list_item_scope(FlyString const& tag_name) const
|
||||
{
|
||||
auto list = s_base_list;
|
||||
list.append("ol"_fly_string);
|
||||
list.append("ul"_fly_string);
|
||||
return has_in_scope_impl(tag_name, list);
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-select-scope
|
||||
// The stack of open elements is said to have a particular element in select scope
|
||||
// when it has that element in the specific scope consisting of all element types except the following:
|
||||
// - optgroup in the HTML namespace
|
||||
// - option in the HTML namespace
|
||||
// NOTE: In this case it's "all element types _except_"
|
||||
bool StackOfOpenElements::has_in_select_scope(FlyString const& tag_name) const
|
||||
{
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#has-an-element-in-the-specific-scope
|
||||
// 1. Initialize node to be the current node (the bottommost node of the stack).
|
||||
for (auto& node : m_elements.in_reverse()) {
|
||||
// 2. If node is the target node, terminate in a match state.
|
||||
if (node->local_name() == tag_name)
|
||||
return true;
|
||||
// 3. Otherwise, if node is one of the element types in list, terminate in a failure state.
|
||||
// NOTE: Here "list" refers to all elements except option and optgroup
|
||||
if (node->local_name() != HTML::TagNames::option && node->local_name() != HTML::TagNames::optgroup)
|
||||
return false;
|
||||
// 4. Otherwise, set node to the previous entry in the stack of open elements and return to step 2.
|
||||
}
|
||||
// [4.] (This will never fail, since the loop will always terminate in the previous step if the top of the stack
|
||||
// — an html element — is reached.)
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::contains(const DOM::Element& element) const
|
||||
{
|
||||
for (auto& element_on_stack : m_elements) {
|
||||
if (&element == element_on_stack.ptr())
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::contains_template_element() const
|
||||
{
|
||||
for (auto const& element : m_elements) {
|
||||
if (element->namespace_uri() != Namespace::HTML)
|
||||
continue;
|
||||
if (element->local_name() == HTML::TagNames::template_)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void StackOfOpenElements::pop_until_an_element_with_tag_name_has_been_popped(FlyString const& tag_name)
|
||||
{
|
||||
while (m_elements.last()->namespace_uri() != Namespace::HTML || m_elements.last()->local_name() != tag_name)
|
||||
(void)pop();
|
||||
(void)pop();
|
||||
}
|
||||
|
||||
JS::GCPtr<DOM::Element> StackOfOpenElements::topmost_special_node_below(DOM::Element const& formatting_element)
|
||||
{
|
||||
JS::GCPtr<DOM::Element> found_element = nullptr;
|
||||
for (auto& element : m_elements.in_reverse()) {
|
||||
if (element.ptr() == &formatting_element)
|
||||
break;
|
||||
if (HTMLParser::is_special_tag(element->local_name(), element->namespace_uri()))
|
||||
found_element = element.ptr();
|
||||
}
|
||||
return found_element.ptr();
|
||||
}
|
||||
|
||||
StackOfOpenElements::LastElementResult StackOfOpenElements::last_element_with_tag_name(FlyString const& tag_name)
|
||||
{
|
||||
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
|
||||
auto& element = m_elements[i];
|
||||
if (element->local_name() == tag_name)
|
||||
return { element.ptr(), i };
|
||||
}
|
||||
return { nullptr, -1 };
|
||||
}
|
||||
|
||||
JS::GCPtr<DOM::Element> StackOfOpenElements::element_immediately_above(DOM::Element const& target)
|
||||
{
|
||||
bool found_target = false;
|
||||
for (auto& element : m_elements.in_reverse()) {
|
||||
if (element.ptr() == &target) {
|
||||
found_target = true;
|
||||
} else if (found_target)
|
||||
return element.ptr();
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void StackOfOpenElements::remove(DOM::Element const& element)
|
||||
{
|
||||
m_elements.remove_first_matching([&element](auto& other) {
|
||||
return other.ptr() == &element;
|
||||
});
|
||||
}
|
||||
|
||||
void StackOfOpenElements::replace(DOM::Element const& to_remove, JS::NonnullGCPtr<DOM::Element> to_add)
|
||||
{
|
||||
for (size_t i = 0; i < m_elements.size(); i++) {
|
||||
if (m_elements[i].ptr() == &to_remove) {
|
||||
m_elements.remove(i);
|
||||
m_elements.insert(i, to_add);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StackOfOpenElements::insert_immediately_below(JS::NonnullGCPtr<DOM::Element> element_to_add, DOM::Element const& target)
|
||||
{
|
||||
for (size_t i = 0; i < m_elements.size(); i++) {
|
||||
if (m_elements[i].ptr() == &target) {
|
||||
m_elements.insert(i + 1, element_to_add);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
72
Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h
Normal file
72
Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h
Normal file
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright (c) 2020-2022, Andreas Kling <andreas@ladybird.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#stack-of-open-elements
|
||||
class StackOfOpenElements {
|
||||
public:
|
||||
// Initially, the stack of open elements is empty.
|
||||
// The stack grows downwards; the topmost node on the stack is the first one added to the stack,
|
||||
// and the bottommost node of the stack is the most recently added node in the stack
|
||||
// (notwithstanding when the stack is manipulated in a random access fashion as part of the handling for misnested tags).
|
||||
|
||||
StackOfOpenElements() = default;
|
||||
~StackOfOpenElements();
|
||||
|
||||
DOM::Element& first() { return *m_elements.first(); }
|
||||
DOM::Element& last() { return *m_elements.last(); }
|
||||
|
||||
bool is_empty() const { return m_elements.is_empty(); }
|
||||
void push(JS::NonnullGCPtr<DOM::Element> element) { m_elements.append(element); }
|
||||
JS::NonnullGCPtr<DOM::Element> pop() { return *m_elements.take_last(); }
|
||||
void remove(DOM::Element const& element);
|
||||
void replace(DOM::Element const& to_remove, JS::NonnullGCPtr<DOM::Element> to_add);
|
||||
void insert_immediately_below(JS::NonnullGCPtr<DOM::Element> element_to_add, DOM::Element const& target);
|
||||
|
||||
const DOM::Element& current_node() const { return *m_elements.last(); }
|
||||
DOM::Element& current_node() { return *m_elements.last(); }
|
||||
|
||||
bool has_in_scope(FlyString const& tag_name) const;
|
||||
bool has_in_button_scope(FlyString const& tag_name) const;
|
||||
bool has_in_table_scope(FlyString const& tag_name) const;
|
||||
bool has_in_list_item_scope(FlyString const& tag_name) const;
|
||||
bool has_in_select_scope(FlyString const& tag_name) const;
|
||||
|
||||
bool has_in_scope(const DOM::Element&) const;
|
||||
|
||||
bool contains(const DOM::Element&) const;
|
||||
[[nodiscard]] bool contains_template_element() const;
|
||||
|
||||
auto const& elements() const { return m_elements; }
|
||||
auto& elements() { return m_elements; }
|
||||
|
||||
void pop_until_an_element_with_tag_name_has_been_popped(FlyString const& local_name);
|
||||
|
||||
JS::GCPtr<DOM::Element> topmost_special_node_below(DOM::Element const&);
|
||||
|
||||
struct LastElementResult {
|
||||
JS::GCPtr<DOM::Element> element;
|
||||
ssize_t index;
|
||||
};
|
||||
LastElementResult last_element_with_tag_name(FlyString const&);
|
||||
JS::GCPtr<DOM::Element> element_immediately_above(DOM::Element const&);
|
||||
|
||||
void visit_edges(JS::Cell::Visitor&);
|
||||
|
||||
private:
|
||||
bool has_in_scope_impl(FlyString const& tag_name, Vector<FlyString> const&) const;
|
||||
bool has_in_scope_impl(const DOM::Element& target_node, Vector<FlyString> const&) const;
|
||||
|
||||
Vector<JS::NonnullGCPtr<DOM::Element>> m_elements;
|
||||
};
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue