mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-04-21 20:15:17 +00:00
JSSpecCompiler: Add functions for splitting node contents into tokens
This commit is contained in:
parent
8342361481
commit
9f29e04897
Notes:
sideshowbarker
2024-07-17 00:57:24 +09:00
Author: https://github.com/DanShaders Commit: https://github.com/SerenityOS/serenity/commit/9f29e04897 Pull-request: https://github.com/SerenityOS/serenity/pull/20632 Reviewed-by: https://github.com/ADKaster ✅
6 changed files with 378 additions and 0 deletions
157
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp
Normal file
157
Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp
Normal file
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/NonnullOwnPtr.h>
|
||||
|
||||
#include "Parser/Lexer.h"
|
||||
#include "Parser/XMLUtils.h"
|
||||
|
||||
namespace JSSpecCompiler {
|
||||
|
||||
namespace {
|
||||
Optional<Token> consume_number(GenericLexer& lexer, XML::Node const* node)
|
||||
{
|
||||
u64 start = lexer.tell();
|
||||
|
||||
if (lexer.next_is('-'))
|
||||
lexer.consume(1);
|
||||
|
||||
if (!lexer.next_is(is_ascii_digit)) {
|
||||
lexer.retreat(lexer.tell() - start);
|
||||
return {};
|
||||
}
|
||||
|
||||
lexer.consume_while(is_ascii_digit);
|
||||
|
||||
if (lexer.next_is('.')) {
|
||||
lexer.consume(1);
|
||||
if (lexer.consume_while(is_ascii_digit).length() == 0)
|
||||
lexer.retreat(1);
|
||||
}
|
||||
|
||||
auto length = lexer.tell() - start;
|
||||
lexer.retreat(length);
|
||||
return { Token { TokenType::Number, lexer.consume(length), node } };
|
||||
}
|
||||
|
||||
bool can_end_word_token(char c)
|
||||
{
|
||||
return is_ascii_space(c) || ".,"sv.contains(c);
|
||||
}
|
||||
}
|
||||
|
||||
ParseErrorOr<void> tokenize_string(XML::Node const* node, StringView view, Vector<Token>& tokens)
|
||||
{
|
||||
#define CONSUME_IF_NEXT(view, type) \
|
||||
if (lexer.next_is(view##sv)) { \
|
||||
size_t length = __builtin_strlen(view); \
|
||||
tokens.append({ TokenType::type, lexer.consume(length), node }); \
|
||||
continue; \
|
||||
}
|
||||
|
||||
GenericLexer lexer(view);
|
||||
while (!lexer.is_eof()) {
|
||||
lexer.ignore_while(is_ascii_space);
|
||||
|
||||
if (auto result = consume_number(lexer, node); result.has_value()) {
|
||||
tokens.append(result.release_value());
|
||||
continue;
|
||||
}
|
||||
|
||||
CONSUME_IF_NEXT("(", ParenOpen);
|
||||
CONSUME_IF_NEXT(")", ParenClose);
|
||||
CONSUME_IF_NEXT("{", BraceOpen);
|
||||
CONSUME_IF_NEXT("}", BraceClose);
|
||||
CONSUME_IF_NEXT(",", Comma);
|
||||
CONSUME_IF_NEXT(". ", Dot);
|
||||
CONSUME_IF_NEXT(".\n", Dot);
|
||||
CONSUME_IF_NEXT(":", Colon);
|
||||
CONSUME_IF_NEXT(".", MemberAccess);
|
||||
CONSUME_IF_NEXT("<", Less);
|
||||
CONSUME_IF_NEXT(">", Greater);
|
||||
CONSUME_IF_NEXT("is not equal to", NotEquals);
|
||||
CONSUME_IF_NEXT("≠", NotEquals);
|
||||
CONSUME_IF_NEXT("is equal to", Equals);
|
||||
CONSUME_IF_NEXT("=", Equals);
|
||||
CONSUME_IF_NEXT("+", Plus);
|
||||
CONSUME_IF_NEXT("-", AmbiguousMinus);
|
||||
CONSUME_IF_NEXT("×", Multiplication);
|
||||
CONSUME_IF_NEXT("/", Division);
|
||||
CONSUME_IF_NEXT("!", ExclamationMark);
|
||||
CONSUME_IF_NEXT("is", Is);
|
||||
|
||||
StringView word = lexer.consume_until(can_end_word_token);
|
||||
if (word.length())
|
||||
tokens.append({ TokenType::Word, word, node });
|
||||
}
|
||||
return {};
|
||||
|
||||
#undef CONSUME_IF_NEXT
|
||||
}
|
||||
|
||||
ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow_substeps)
|
||||
{
|
||||
TokenizeTreeResult result;
|
||||
auto& tokens = result.tokens;
|
||||
|
||||
for (auto const& child : node->as_element().children) {
|
||||
TRY(child->content.visit(
|
||||
[&](XML::Node::Element const& element) -> ParseErrorOr<void> {
|
||||
if (result.substeps != nullptr)
|
||||
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
|
||||
|
||||
if (element.name == tag_var) {
|
||||
tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child });
|
||||
return {};
|
||||
}
|
||||
|
||||
if (element.name == tag_span) {
|
||||
auto element_class = TRY(get_attribute_by_name(child, attribute_class));
|
||||
if (element_class != class_secnum)
|
||||
return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child);
|
||||
tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child });
|
||||
return {};
|
||||
}
|
||||
|
||||
if (element.name == tag_emu_val) {
|
||||
auto contents = TRY(get_text_contents(child));
|
||||
if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
|
||||
tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child });
|
||||
else if (contents == "undefined")
|
||||
tokens.append({ TokenType::Undefined, contents, child });
|
||||
else
|
||||
tokens.append({ TokenType::Identifier, contents, child });
|
||||
return {};
|
||||
}
|
||||
|
||||
if (element.name == tag_emu_xref) {
|
||||
auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv))));
|
||||
tokens.append({ TokenType::Identifier, contents, child });
|
||||
return {};
|
||||
}
|
||||
|
||||
if (element.name == tag_ol) {
|
||||
if (!allow_substeps)
|
||||
return ParseError::create("Found nested list but substeps are not allowed"sv, child);
|
||||
result.substeps = child;
|
||||
return {};
|
||||
}
|
||||
|
||||
return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child);
|
||||
},
|
||||
[&](XML::Node::Text const& text) -> ParseErrorOr<void> {
|
||||
auto view = text.builder.string_view();
|
||||
if (result.substeps && !contains_empty_text(child))
|
||||
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
|
||||
return tokenize_string(child, view, tokens);
|
||||
},
|
||||
move(ignore_comments)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Parser/ParseError.h"
|
||||
#include "Parser/Token.h"
|
||||
|
||||
namespace JSSpecCompiler {
|
||||
|
||||
inline constexpr StringView tag_emu_alg = "emu-alg"sv;
|
||||
inline constexpr StringView tag_emu_clause = "emu-clause"sv;
|
||||
inline constexpr StringView tag_emu_val = "emu-val"sv;
|
||||
inline constexpr StringView tag_emu_xref = "emu-xref"sv;
|
||||
inline constexpr StringView tag_h1 = "h1"sv;
|
||||
inline constexpr StringView tag_li = "li"sv;
|
||||
inline constexpr StringView tag_ol = "ol"sv;
|
||||
inline constexpr StringView tag_p = "p"sv;
|
||||
inline constexpr StringView tag_span = "span"sv;
|
||||
inline constexpr StringView tag_var = "var"sv;
|
||||
|
||||
inline constexpr StringView attribute_aoid = "aoid"sv;
|
||||
inline constexpr StringView attribute_class = "class"sv;
|
||||
inline constexpr StringView attribute_id = "id"sv;
|
||||
|
||||
inline constexpr StringView class_secnum = "secnum"sv;
|
||||
|
||||
ParseErrorOr<void> tokenize_string(XML::Node const* node, StringView view, Vector<Token>& tokens);
|
||||
|
||||
struct TokenizeTreeResult {
|
||||
Vector<Token> tokens;
|
||||
XML::Node const* substeps = nullptr;
|
||||
};
|
||||
|
||||
ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow_substeps = false);
|
||||
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "Parser/ParseError.h"
|
||||
|
||||
namespace JSSpecCompiler {
|
||||
|
||||
NonnullRefPtr<ParseError> ParseError::create(String message, XML::Node const* node)
|
||||
{
|
||||
return make_ref_counted<ParseError>(move(message), node);
|
||||
}
|
||||
|
||||
NonnullRefPtr<ParseError> ParseError::create(StringView message, XML::Node const* node)
|
||||
{
|
||||
return create(MUST(String::from_utf8(message)), node);
|
||||
}
|
||||
|
||||
// FIXME: Remove once String::formatted becomes infallible.
|
||||
NonnullRefPtr<ParseError> ParseError::create(ErrorOr<String> message, XML::Node const* node)
|
||||
{
|
||||
return create(MUST(message), node);
|
||||
}
|
||||
|
||||
String ParseError::to_string() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
builder.appendff("error: {}\n", m_message);
|
||||
|
||||
XML::Node const* current = m_node;
|
||||
while (current != nullptr) {
|
||||
builder.appendff(" at {}:{} ", current->offset.line + 1, current->offset.column + 1);
|
||||
if (current->is_element()) {
|
||||
builder.append("<"sv);
|
||||
builder.append(current->as_element().name);
|
||||
for (auto [key, value] : current->as_element().attributes)
|
||||
builder.appendff(" {}=\"{}\"", key, value);
|
||||
builder.append(">\n"sv);
|
||||
} else if (current->is_text()) {
|
||||
builder.appendff("text \"{}\"\n", current->as_text().builder.string_view().trim_whitespace());
|
||||
} else {
|
||||
builder.appendff("comment");
|
||||
}
|
||||
current = current->parent;
|
||||
}
|
||||
return MUST(builder.to_string());
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/String.h>
|
||||
#include <LibXML/DOM/Node.h>
|
||||
|
||||
namespace JSSpecCompiler {
|
||||
|
||||
class ParseError : public RefCounted<ParseError> {
|
||||
public:
|
||||
ParseError(String&& message, XML::Node const* node)
|
||||
: m_message(move(message))
|
||||
, m_node(node)
|
||||
{
|
||||
}
|
||||
|
||||
static NonnullRefPtr<ParseError> create(String message, XML::Node const* node);
|
||||
static NonnullRefPtr<ParseError> create(StringView message, XML::Node const* node);
|
||||
static NonnullRefPtr<ParseError> create(ErrorOr<String> message, XML::Node const* node);
|
||||
|
||||
String to_string() const;
|
||||
|
||||
private:
|
||||
String m_message;
|
||||
XML::Node const* m_node;
|
||||
// TODO: Support chained parse errors
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using ParseErrorOr = ErrorOr<T, NonnullRefPtr<ParseError>>;
|
||||
|
||||
}
|
|
@ -0,0 +1,64 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/NonnullOwnPtr.h>
|
||||
#include <LibXML/DOM/Node.h>
|
||||
|
||||
#include "Parser/XMLUtils.h"
|
||||
|
||||
namespace JSSpecCompiler {
|
||||
|
||||
bool contains_empty_text(XML::Node const* node)
|
||||
{
|
||||
return node->as_text().builder.string_view().trim_whitespace().is_empty();
|
||||
}
|
||||
|
||||
ParseErrorOr<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name)
|
||||
{
|
||||
auto const& attribute = node->as_element().attributes.get(attribute_name);
|
||||
|
||||
if (!attribute.has_value())
|
||||
return ParseError::create(String::formatted("Attribute {} is not present", attribute_name), node);
|
||||
return attribute.value();
|
||||
}
|
||||
|
||||
ParseErrorOr<StringView> get_text_contents(XML::Node const* node)
|
||||
{
|
||||
auto const& children = node->as_element().children;
|
||||
|
||||
if (children.size() != 1 || !children[0]->is_text())
|
||||
return ParseError::create("Expected single text node in a child list of the node"sv, node);
|
||||
return children[0]->as_text().builder.string_view();
|
||||
}
|
||||
|
||||
ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name)
|
||||
{
|
||||
XML::Node const* result = nullptr;
|
||||
|
||||
for (auto const& child : element->as_element().children) {
|
||||
TRY(child->content.visit(
|
||||
[&](XML::Node::Element const& element) -> ParseErrorOr<void> {
|
||||
if (element.name != tag_name)
|
||||
return ParseError::create(String::formatted("Expected child with the tag name {} but found {}", tag_name, element.name), child);
|
||||
if (result != nullptr)
|
||||
return ParseError::create("Element must have only one child"sv, child);
|
||||
result = child;
|
||||
return {};
|
||||
},
|
||||
[&](XML::Node::Text const&) -> ParseErrorOr<void> {
|
||||
if (!contains_empty_text(child))
|
||||
return ParseError::create("Element should not have non-empty child text nodes"sv, element);
|
||||
return {};
|
||||
},
|
||||
move(ignore_comments)));
|
||||
}
|
||||
|
||||
if (result == nullptr)
|
||||
return ParseError::create(String::formatted("Element must have only one child"), element);
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <LibXML/Forward.h>
|
||||
|
||||
#include "Parser/ParseError.h"
|
||||
|
||||
namespace JSSpecCompiler {
|
||||
|
||||
struct IgnoreComments {
|
||||
ParseErrorOr<void> operator()(XML::Node::Comment const&) { return {}; }
|
||||
};
|
||||
|
||||
inline constexpr IgnoreComments ignore_comments {};
|
||||
|
||||
bool contains_empty_text(XML::Node const* node);
|
||||
|
||||
ParseErrorOr<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name);
|
||||
|
||||
ParseErrorOr<StringView> get_text_contents(XML::Node const* node);
|
||||
|
||||
ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name);
|
||||
|
||||
}
|
Loading…
Add table
Reference in a new issue