mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-05-28 05:52:53 +00:00
JSSpecCompiler+LibXML: Store location for tokens
This commit is contained in:
parent
d219c91ca9
commit
dee4978d67
Notes:
sideshowbarker
2024-07-16 23:34:44 +09:00
Author: https://github.com/DanShaders
Commit: dee4978d67
Pull-request: https://github.com/SerenityOS/serenity/pull/22899
Reviewed-by: https://github.com/ADKaster ✅
7 changed files with 78 additions and 50 deletions
|
@ -59,6 +59,7 @@ class ControlFlowGraph;
|
||||||
class RecursiveASTVisitor;
|
class RecursiveASTVisitor;
|
||||||
|
|
||||||
// Parser/SpecParser.h
|
// Parser/SpecParser.h
|
||||||
|
class SpecificationParsingContext;
|
||||||
class AlgorithmStep;
|
class AlgorithmStep;
|
||||||
class AlgorithmStepList;
|
class AlgorithmStepList;
|
||||||
class Algorithm;
|
class Algorithm;
|
||||||
|
|
|
@ -4,16 +4,17 @@
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <AK/GenericLexer.h>
|
|
||||||
#include <AK/NonnullOwnPtr.h>
|
#include <AK/NonnullOwnPtr.h>
|
||||||
|
#include <LibXML/Parser/Parser.h>
|
||||||
|
|
||||||
#include "Parser/Lexer.h"
|
#include "Parser/Lexer.h"
|
||||||
|
#include "Parser/SpecParser.h"
|
||||||
#include "Parser/XMLUtils.h"
|
#include "Parser/XMLUtils.h"
|
||||||
|
|
||||||
namespace JSSpecCompiler {
|
namespace JSSpecCompiler {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
Optional<Token> consume_number(GenericLexer& lexer, XML::Node const* node)
|
Optional<Token> consume_number(XML::LineTrackingLexer& lexer, XML::Node const* node, Location& location)
|
||||||
{
|
{
|
||||||
u64 start = lexer.tell();
|
u64 start = lexer.tell();
|
||||||
|
|
||||||
|
@ -35,7 +36,7 @@ Optional<Token> consume_number(GenericLexer& lexer, XML::Node const* node)
|
||||||
|
|
||||||
auto length = lexer.tell() - start;
|
auto length = lexer.tell() - start;
|
||||||
lexer.retreat(length);
|
lexer.retreat(length);
|
||||||
return { Token { TokenType::Number, lexer.consume(length), node } };
|
return { Token { TokenType::Number, lexer.consume(length), node, move(location) } };
|
||||||
}
|
}
|
||||||
|
|
||||||
bool can_end_word_token(char c)
|
bool can_end_word_token(char c)
|
||||||
|
@ -44,56 +45,68 @@ bool can_end_word_token(char c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ParseErrorOr<void> tokenize_string(XML::Node const* node, StringView view, Vector<Token>& tokens)
|
ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
|
||||||
{
|
{
|
||||||
#define CONSUME_IF_NEXT(view, type) \
|
static constexpr struct {
|
||||||
if (lexer.next_is(view##sv)) { \
|
StringView text_to_match;
|
||||||
size_t length = __builtin_strlen(view); \
|
TokenType token_type;
|
||||||
tokens.append({ TokenType::type, lexer.consume(length), node }); \
|
} choices[] = {
|
||||||
continue; \
|
{ "-"sv, TokenType::AmbiguousMinus },
|
||||||
}
|
{ "}"sv, TokenType::BraceClose },
|
||||||
|
{ "{"sv, TokenType::BraceOpen },
|
||||||
|
{ ":"sv, TokenType::Colon },
|
||||||
|
{ ","sv, TokenType::Comma },
|
||||||
|
{ "/"sv, TokenType::Division },
|
||||||
|
{ ". "sv, TokenType::Dot },
|
||||||
|
{ ".\n"sv, TokenType::Dot },
|
||||||
|
{ "="sv, TokenType::Equals },
|
||||||
|
{ "is equal to"sv, TokenType::Equals },
|
||||||
|
{ "!"sv, TokenType::ExclamationMark },
|
||||||
|
{ ">"sv, TokenType::Greater },
|
||||||
|
{ "is"sv, TokenType::Is },
|
||||||
|
{ "<"sv, TokenType::Less },
|
||||||
|
{ "."sv, TokenType::MemberAccess },
|
||||||
|
{ "×"sv, TokenType::Multiplication },
|
||||||
|
{ "is not equal to"sv, TokenType::NotEquals },
|
||||||
|
{ "≠"sv, TokenType::NotEquals },
|
||||||
|
{ ")"sv, TokenType::ParenClose },
|
||||||
|
{ "("sv, TokenType::ParenOpen },
|
||||||
|
{ "+"sv, TokenType::Plus },
|
||||||
|
};
|
||||||
|
|
||||||
|
XML::LineTrackingLexer lexer(view, node->offset);
|
||||||
|
|
||||||
GenericLexer lexer(view);
|
|
||||||
while (!lexer.is_eof()) {
|
while (!lexer.is_eof()) {
|
||||||
lexer.ignore_while(is_ascii_space);
|
lexer.ignore_while(is_ascii_space);
|
||||||
|
|
||||||
if (auto result = consume_number(lexer, node); result.has_value()) {
|
// FIXME: This is incorrect since we count text offset after XML reference resolution. To do
|
||||||
|
// this properly, we need support from XML::Parser.
|
||||||
|
Location token_location = ctx.location_from_xml_offset(lexer.offset_for(lexer.tell()));
|
||||||
|
|
||||||
|
if (auto result = consume_number(lexer, node, token_location); result.has_value()) {
|
||||||
tokens.append(result.release_value());
|
tokens.append(result.release_value());
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
CONSUME_IF_NEXT("(", ParenOpen);
|
bool matched = false;
|
||||||
CONSUME_IF_NEXT(")", ParenClose);
|
for (auto const& [text_to_match, token_type] : choices) {
|
||||||
CONSUME_IF_NEXT("{", BraceOpen);
|
if (lexer.consume_specific(text_to_match)) {
|
||||||
CONSUME_IF_NEXT("}", BraceClose);
|
tokens.append({ token_type, ""sv, node, move(token_location) });
|
||||||
CONSUME_IF_NEXT(",", Comma);
|
matched = true;
|
||||||
CONSUME_IF_NEXT(". ", Dot);
|
break;
|
||||||
CONSUME_IF_NEXT(".\n", Dot);
|
}
|
||||||
CONSUME_IF_NEXT(":", Colon);
|
}
|
||||||
CONSUME_IF_NEXT(".", MemberAccess);
|
if (matched)
|
||||||
CONSUME_IF_NEXT("<", Less);
|
continue;
|
||||||
CONSUME_IF_NEXT(">", Greater);
|
|
||||||
CONSUME_IF_NEXT("is not equal to", NotEquals);
|
|
||||||
CONSUME_IF_NEXT("≠", NotEquals);
|
|
||||||
CONSUME_IF_NEXT("is equal to", Equals);
|
|
||||||
CONSUME_IF_NEXT("=", Equals);
|
|
||||||
CONSUME_IF_NEXT("+", Plus);
|
|
||||||
CONSUME_IF_NEXT("-", AmbiguousMinus);
|
|
||||||
CONSUME_IF_NEXT("×", Multiplication);
|
|
||||||
CONSUME_IF_NEXT("/", Division);
|
|
||||||
CONSUME_IF_NEXT("!", ExclamationMark);
|
|
||||||
CONSUME_IF_NEXT("is", Is);
|
|
||||||
|
|
||||||
StringView word = lexer.consume_until(can_end_word_token);
|
StringView word = lexer.consume_until(can_end_word_token);
|
||||||
if (word.length())
|
if (word.length())
|
||||||
tokens.append({ TokenType::Word, word, node });
|
tokens.append({ TokenType::Word, word, node, move(token_location) });
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
|
|
||||||
#undef CONSUME_IF_NEXT
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow_substeps)
|
ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps)
|
||||||
{
|
{
|
||||||
TokenizeTreeResult result;
|
TokenizeTreeResult result;
|
||||||
auto& tokens = result.tokens;
|
auto& tokens = result.tokens;
|
||||||
|
@ -104,8 +117,10 @@ ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow
|
||||||
if (result.substeps != nullptr)
|
if (result.substeps != nullptr)
|
||||||
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
|
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
|
||||||
|
|
||||||
|
Location child_location = ctx.location_from_xml_offset(child->offset);
|
||||||
|
|
||||||
if (element.name == tag_var) {
|
if (element.name == tag_var) {
|
||||||
tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child });
|
tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) });
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -113,24 +128,24 @@ ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow
|
||||||
auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class));
|
auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class));
|
||||||
if (element_class != class_secnum)
|
if (element_class != class_secnum)
|
||||||
return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child);
|
return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child);
|
||||||
tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child });
|
tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) });
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (element.name == tag_emu_val) {
|
if (element.name == tag_emu_val) {
|
||||||
auto contents = TRY(get_text_contents(child));
|
auto contents = TRY(get_text_contents(child));
|
||||||
if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
|
if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
|
||||||
tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child });
|
tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) });
|
||||||
else if (contents == "undefined")
|
else if (contents == "undefined")
|
||||||
tokens.append({ TokenType::Undefined, contents, child });
|
tokens.append({ TokenType::Undefined, contents, child, move(child_location) });
|
||||||
else
|
else
|
||||||
tokens.append({ TokenType::Identifier, contents, child });
|
tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
if (element.name == tag_emu_xref) {
|
if (element.name == tag_emu_xref) {
|
||||||
auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv))));
|
auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv))));
|
||||||
tokens.append({ TokenType::Identifier, contents, child });
|
tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,7 +162,7 @@ ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow
|
||||||
auto view = text.builder.string_view();
|
auto view = text.builder.string_view();
|
||||||
if (result.substeps && !contains_empty_text(child))
|
if (result.substeps && !contains_empty_text(child))
|
||||||
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
|
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
|
||||||
return tokenize_string(child, view, tokens);
|
return tokenize_string(ctx, child, view, tokens);
|
||||||
},
|
},
|
||||||
move(ignore_comments)));
|
move(ignore_comments)));
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,13 +31,13 @@ inline constexpr StringView attribute_id = "id"sv;
|
||||||
|
|
||||||
inline constexpr StringView class_secnum = "secnum"sv;
|
inline constexpr StringView class_secnum = "secnum"sv;
|
||||||
|
|
||||||
ParseErrorOr<void> tokenize_string(XML::Node const* node, StringView view, Vector<Token>& tokens);
|
ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens);
|
||||||
|
|
||||||
struct TokenizeTreeResult {
|
struct TokenizeTreeResult {
|
||||||
Vector<Token> tokens;
|
Vector<Token> tokens;
|
||||||
XML::Node const* substeps = nullptr;
|
XML::Node const* substeps = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
ParseErrorOr<TokenizeTreeResult> tokenize_tree(XML::Node const* node, bool allow_substeps = false);
|
ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -64,7 +64,7 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
|
||||||
{
|
{
|
||||||
VERIFY(element->as_element().name == tag_li);
|
VERIFY(element->as_element().name == tag_li);
|
||||||
|
|
||||||
auto tokenization_result = tokenize_tree(element, true);
|
auto tokenization_result = tokenize_tree(ctx, element, true);
|
||||||
if (tokenization_result.is_error()) {
|
if (tokenization_result.is_error()) {
|
||||||
ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
|
ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
|
||||||
"{}", tokenization_result.error()->to_string());
|
"{}", tokenization_result.error()->to_string());
|
||||||
|
@ -253,7 +253,7 @@ void SpecificationClause::collect_into(TranslationUnitRef translation_unit)
|
||||||
ParseErrorOr<void> SpecificationClause::parse_header(XML::Node const* element)
|
ParseErrorOr<void> SpecificationClause::parse_header(XML::Node const* element)
|
||||||
{
|
{
|
||||||
VERIFY(element->as_element().name == tag_h1);
|
VERIFY(element->as_element().name == tag_h1);
|
||||||
auto tokens = TRY(tokenize_tree(element));
|
auto tokens = TRY(tokenize_tree(*m_ctx_pointer, element));
|
||||||
TextParser parser(tokens.tokens, element);
|
TextParser parser(tokens.tokens, element);
|
||||||
m_header = TRY(parser.parse_clause_header());
|
m_header = TRY(parser.parse_clause_header());
|
||||||
return {};
|
return {};
|
||||||
|
|
|
@ -226,7 +226,7 @@ ParseErrorOr<Tree> TextParser::parse_expression()
|
||||||
|
|
||||||
if (token.type == TokenType::ParenOpen) {
|
if (token.type == TokenType::ParenOpen) {
|
||||||
if (last_element_type == ExpressionType)
|
if (last_element_type == ExpressionType)
|
||||||
stack.append(Token { TokenType::FunctionCall, ""sv, m_node });
|
stack.append(Token { TokenType::FunctionCall, ""sv, token.node, token.location });
|
||||||
stack.append(token);
|
stack.append(token);
|
||||||
|
|
||||||
if (m_next_token_index + 1 < m_tokens.size()
|
if (m_next_token_index + 1 < m_tokens.size()
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <LibXML/Forward.h>
|
#include <LibXML/Forward.h>
|
||||||
|
|
||||||
#include "AST/AST.h"
|
#include "AST/AST.h"
|
||||||
|
#include "DiagnosticEngine.h"
|
||||||
|
|
||||||
namespace JSSpecCompiler {
|
namespace JSSpecCompiler {
|
||||||
|
|
||||||
|
@ -46,7 +47,6 @@ constexpr i32 closing_bracket_precedence = 18;
|
||||||
F(Multiplication, 5, Invalid, Multiplication, Invalid) \
|
F(Multiplication, 5, Invalid, Multiplication, Invalid) \
|
||||||
F(Division, 5, Invalid, Division, Invalid) \
|
F(Division, 5, Invalid, Division, Invalid) \
|
||||||
F(FunctionCall, 2, Invalid, FunctionCall, Invalid) \
|
F(FunctionCall, 2, Invalid, FunctionCall, Invalid) \
|
||||||
F(ArraySubscript, 2, Invalid, ArraySubscript, Invalid) \
|
|
||||||
F(ExclamationMark, 3, AssertCompletion, Invalid, Invalid) \
|
F(ExclamationMark, 3, AssertCompletion, Invalid, Invalid) \
|
||||||
F(Is, -1, Invalid, Invalid, Invalid)
|
F(Is, -1, Invalid, Invalid, Invalid)
|
||||||
|
|
||||||
|
@ -110,6 +110,7 @@ struct Token {
|
||||||
TokenType type;
|
TokenType type;
|
||||||
StringView data;
|
StringView data;
|
||||||
XML::Node const* node;
|
XML::Node const* node;
|
||||||
|
Location location;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,10 +39,21 @@ struct Listener {
|
||||||
virtual void error(ParseError const&) { }
|
virtual void error(ParseError const&) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// FIXME: This is also used in JSSpecCompiler, so should probably live in AK or even merged with
|
||||||
|
// AK::GenericLexer.
|
||||||
class LineTrackingLexer : public GenericLexer {
|
class LineTrackingLexer : public GenericLexer {
|
||||||
public:
|
public:
|
||||||
using GenericLexer::GenericLexer;
|
using GenericLexer::GenericLexer;
|
||||||
|
|
||||||
|
LineTrackingLexer(StringView input, XML::Offset start_offset)
|
||||||
|
: GenericLexer(input)
|
||||||
|
, m_cached_offset {
|
||||||
|
.line = start_offset.line,
|
||||||
|
.column = start_offset.column,
|
||||||
|
}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
Offset cached_offset() const { return m_cached_offset; }
|
Offset cached_offset() const { return m_cached_offset; }
|
||||||
void restore_cached_offset(Offset cached_offset) { m_cached_offset = cached_offset; }
|
void restore_cached_offset(Offset cached_offset) { m_cached_offset = cached_offset; }
|
||||||
Offset offset_for(size_t) const;
|
Offset offset_for(size_t) const;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue