From e45c8b842c64bef30286ac3a34fc5385d1308a9f Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 23 May 2020 19:56:07 +0200 Subject: [PATCH] LibWeb: Implement a bit more of DOCTYPE tokenization --- Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 71 +++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index 39e3f8b5947..b5835446d39 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -58,6 +58,9 @@ #define ON_ASCII_ALPHA \ if (current_input_character.has_value() && isalpha(current_input_character.value())) +#define ON_ASCII_UPPER_ALPHA \ + if (current_input_character.has_value() && current_input_character.value() >= 'A' && current_input_character.value() <= 'Z') + #define ON_WHITESPACE \ if (current_input_character.has_value() && (current_input_character.value() == '\t' || current_input_character.value() == '\a' || current_input_character.value() == '\f' || current_input_character.value() == ' ')) @@ -207,6 +210,18 @@ void HTMLTokenizer::run() { SWITCH_TO(BeforeDOCTYPEName); } + ON('>') + { + RECONSUME_IN(BeforeDOCTYPEName); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } } END_STATE @@ -216,6 +231,24 @@ void HTMLTokenizer::run() { continue; } + ON_ASCII_UPPER_ALPHA + { + create_new_token(HTMLToken::Type::DOCTYPE); + m_current_token.m_doctype.name.append(tolower(current_input_character.value())); + SWITCH_TO(DOCTYPEName); + } + ON(0) + { + TODO(); + } + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } ANYTHING_ELSE { create_new_token(HTMLToken::Type::DOCTYPE); @@ -227,11 +260,27 @@ void HTMLTokenizer::run() BEGIN_STATE(DOCTYPEName) { + ON_WHITESPACE + { + SWITCH_TO(AfterDOCTYPEName); + } ON('>') { emit_current_token(); SWITCH_TO(Data); } + ON_ASCII_UPPER_ALPHA + { + m_current_token.m_doctype.name.append(tolower(current_input_character.value())); + } + ON(0) + { + TODO(); + } + ON_EOF + { + TODO(); + } ANYTHING_ELSE { m_current_token.m_doctype.name.append(current_input_character.value()); @@ -240,6 +289,28 @@ void HTMLTokenizer::run() } END_STATE + BEGIN_STATE(AfterDOCTYPEName) + { + ON_WHITESPACE + { + continue; + } + ON('>') + { + emit_current_token(); + SWITCH_TO(Data); + } + ON_EOF + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + BEGIN_STATE(BeforeAttributeName) { ON_WHITESPACE