ladybird/Userland/Libraries/LibCpp/Preprocessor.cpp
Itamar c3c2fe153b LibCpp: Add "ignore invalid statements" option to Preprocessor
When we run the Preprocessor from the CppComprehensionEngine of
the language server, we don't want the preprocessor to crash if it
encounters an invalid preprocessor statement (for example, an #endif
statement without an accompanying previous #if statement).

To achieve this, this commit adds an "ignore_invalid_statements" flag
to the preprocessor which is set by the CppComprehensionEngine.

Fixes #11064.
2021-12-03 15:38:21 +01:00

394 lines
12 KiB
C++

/*
* Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "Preprocessor.h"
#include <AK/Assertions.h>
#include <AK/GenericLexer.h>
#include <AK/StringBuilder.h>
#include <LibCpp/Lexer.h>
#include <ctype.h>
namespace Cpp {
Preprocessor::Preprocessor(const String& filename, StringView program)
: m_filename(filename)
, m_program(program)
{
}
Vector<Token> Preprocessor::process_and_lex()
{
Lexer lexer { m_program };
lexer.set_ignore_whitespace(true);
auto tokens = lexer.lex();
for (size_t token_index = 0; token_index < tokens.size(); ++token_index) {
auto& token = tokens[token_index];
m_current_line = token.start().line;
if (token.type() == Token::Type::PreprocessorStatement) {
handle_preprocessor_statement(token.text());
continue;
}
if (m_state != State::Normal)
continue;
if (token.type() == Token::Type::IncludeStatement) {
if (token_index >= tokens.size() - 1 || tokens[token_index + 1].type() != Token::Type::IncludePath)
continue;
handle_include_statement(tokens[token_index + 1].text());
if (m_options.keep_include_statements) {
m_processed_tokens.append(tokens[token_index]);
m_processed_tokens.append(tokens[token_index + 1]);
}
continue;
}
if (token.type() == Token::Type::Identifier) {
if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) {
auto last_substituted_token_index = do_substitution(tokens, token_index, defined_value->value);
token_index = last_substituted_token_index;
continue;
}
}
m_processed_tokens.append(token);
}
return m_processed_tokens;
}
static void consume_whitespace(GenericLexer& lexer)
{
auto ignore_line = [&] {
for (;;) {
if (lexer.consume_specific("\\\n"sv)) {
lexer.ignore(2);
} else {
lexer.ignore_until('\n');
break;
}
}
};
for (;;) {
if (lexer.consume_specific("//"sv))
ignore_line();
else if (lexer.consume_specific("/*"sv))
lexer.ignore_until("*/");
else if (lexer.next_is("\\\n"sv))
lexer.ignore(2);
else if (lexer.is_eof() || !lexer.next_is(isspace))
break;
else
lexer.ignore();
}
}
void Preprocessor::handle_preprocessor_statement(StringView line)
{
GenericLexer lexer(line);
consume_whitespace(lexer);
lexer.consume_specific('#');
consume_whitespace(lexer);
auto keyword = lexer.consume_until(' ');
if (keyword.is_empty() || keyword.is_null() || keyword.is_whitespace())
return;
handle_preprocessor_keyword(keyword, lexer);
}
void Preprocessor::handle_include_statement(StringView include_path)
{
m_included_paths.append(include_path);
if (definitions_in_header_callback) {
for (auto& def : definitions_in_header_callback(include_path))
m_definitions.set(def.key, def.value);
}
}
void Preprocessor::handle_preprocessor_keyword(StringView keyword, GenericLexer& line_lexer)
{
if (keyword == "include") {
// Should have called 'handle_include_statement'.
VERIFY_NOT_REACHED();
}
if (keyword == "else") {
if (m_options.ignore_invalid_statements && m_current_depth == 0)
return;
VERIFY(m_current_depth > 0);
if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) {
m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
m_state = State::Normal;
}
if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
m_state = State::SkipElseBranch;
}
return;
}
if (keyword == "endif") {
if (m_options.ignore_invalid_statements && m_current_depth == 0)
return;
VERIFY(m_current_depth > 0);
--m_current_depth;
if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) {
m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
}
if (m_depths_of_taken_branches.contains_slow(m_current_depth)) {
m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
}
m_state = State::Normal;
return;
}
if (keyword == "define") {
if (m_state == State::Normal) {
auto definition = create_definition(line_lexer.consume_all());
if (definition.has_value())
m_definitions.set(definition->key, *definition);
}
return;
}
if (keyword == "undef") {
if (m_state == State::Normal) {
auto key = line_lexer.consume_until(' ');
line_lexer.consume_all();
m_definitions.remove(key);
}
return;
}
if (keyword == "ifdef") {
++m_current_depth;
if (m_state == State::Normal) {
auto key = line_lexer.consume_until(' ');
if (m_definitions.contains(key)) {
m_depths_of_taken_branches.append(m_current_depth - 1);
return;
} else {
m_depths_of_not_taken_branches.append(m_current_depth - 1);
m_state = State::SkipIfBranch;
return;
}
}
return;
}
if (keyword == "ifndef") {
++m_current_depth;
if (m_state == State::Normal) {
auto key = line_lexer.consume_until(' ');
if (!m_definitions.contains(key)) {
m_depths_of_taken_branches.append(m_current_depth - 1);
return;
} else {
m_depths_of_not_taken_branches.append(m_current_depth - 1);
m_state = State::SkipIfBranch;
return;
}
}
return;
}
if (keyword == "if") {
++m_current_depth;
if (m_state == State::Normal) {
// FIXME: Implement #if logic
// We currently always take #if branches.
m_depths_of_taken_branches.append(m_current_depth - 1);
}
return;
}
if (keyword == "elif") {
if (m_options.ignore_invalid_statements && m_current_depth == 0)
return;
VERIFY(m_current_depth > 0);
// FIXME: Evaluate the elif expression
// We currently always treat the expression in #elif as true.
if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) {
m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
m_state = State::Normal;
}
if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
m_state = State::SkipElseBranch;
}
return;
}
if (keyword == "pragma") {
line_lexer.consume_all();
return;
}
if (!m_options.ignore_unsupported_keywords) {
dbgln("Unsupported preprocessor keyword: {}", keyword);
VERIFY_NOT_REACHED();
}
}
size_t Preprocessor::do_substitution(Vector<Token> const& tokens, size_t token_index, Definition const& defined_value)
{
if (defined_value.value.is_null())
return token_index;
Substitution sub;
sub.defined_value = defined_value;
auto macro_call = parse_macro_call(tokens, token_index);
if (!macro_call.has_value())
return token_index;
Vector<Token> original_tokens;
for (size_t i = token_index; i <= macro_call->end_token_index; ++i) {
original_tokens.append(tokens[i]);
}
VERIFY(!original_tokens.is_empty());
auto processed_value = evaluate_macro_call(*macro_call, defined_value);
m_substitutions.append({ original_tokens, defined_value, processed_value });
Lexer lexer(processed_value);
lexer.lex_iterable([&](auto token) {
if (token.type() == Token::Type::Whitespace)
return;
token.set_start(original_tokens.first().start());
token.set_end(original_tokens.first().end());
m_processed_tokens.append(token);
});
return macro_call->end_token_index;
}
Optional<Preprocessor::MacroCall> Preprocessor::parse_macro_call(Vector<Token> const& tokens, size_t token_index)
{
auto name = tokens[token_index];
++token_index;
if (token_index >= tokens.size() || tokens[token_index].type() != Token::Type::LeftParen)
return MacroCall { name, {}, token_index - 1 };
++token_index;
Vector<MacroCall::Argument> arguments;
MacroCall::Argument current_argument;
size_t paren_depth = 1;
for (; token_index < tokens.size(); ++token_index) {
auto& token = tokens[token_index];
if (token.type() == Token::Type::LeftParen)
++paren_depth;
if (token.type() == Token::Type::RightParen)
--paren_depth;
if (paren_depth == 0) {
arguments.append(move(current_argument));
break;
}
if (paren_depth == 1 && token.type() == Token::Type::Comma) {
arguments.append(move(current_argument));
current_argument = {};
} else {
current_argument.tokens.append(token);
}
}
if (token_index >= tokens.size())
return {};
return MacroCall { name, move(arguments), token_index };
}
Optional<Preprocessor::Definition> Preprocessor::create_definition(StringView line)
{
Lexer lexer { line };
lexer.set_ignore_whitespace(true);
auto tokens = lexer.lex();
if (tokens.is_empty())
return {};
if (tokens.first().type() != Token::Type::Identifier)
return {};
Definition definition;
definition.filename = m_filename;
definition.line = m_current_line;
definition.key = tokens.first().text();
if (tokens.size() == 1)
return definition;
size_t token_index = 1;
// Parse macro parameters (if any)
if (tokens[token_index].type() == Token::Type::LeftParen) {
++token_index;
while (token_index < tokens.size() && tokens[token_index].type() != Token::Type::RightParen) {
auto param = tokens[token_index];
if (param.type() != Token::Type::Identifier)
return {};
if (token_index + 1 >= tokens.size())
return {};
++token_index;
if (tokens[token_index].type() == Token::Type::Comma)
++token_index;
else if (tokens[token_index].type() != Token::Type::RightParen)
return {};
definition.parameters.empend(param.text());
}
if (token_index >= tokens.size())
return {};
++token_index;
}
if (token_index < tokens.size())
definition.value = remove_escaped_newlines(line.substring_view(tokens[token_index].start().column));
return definition;
}
String Preprocessor::remove_escaped_newlines(StringView value)
{
AK::StringBuilder processed_value;
GenericLexer lexer { value };
while (!lexer.is_eof()) {
processed_value.append(lexer.consume_until("\\\n"));
}
return processed_value.to_string();
}
String Preprocessor::evaluate_macro_call(MacroCall const& macro_call, Definition const& definition)
{
if (macro_call.arguments.size() != definition.parameters.size()) {
dbgln("mismatch in # of arguments for macro call: {}", macro_call.name.text());
return {};
}
Lexer lexer { definition.value };
StringBuilder processed_value;
lexer.lex_iterable([&](auto token) {
if (token.type() != Token::Type::Identifier) {
processed_value.append(token.text());
return;
}
auto param_index = definition.parameters.find_first_index(token.text());
if (!param_index.has_value()) {
processed_value.append(token.text());
return;
}
auto& argument = macro_call.arguments[*param_index];
for (auto& arg_token : argument.tokens) {
processed_value.append(arg_token.text());
}
});
return processed_value.to_string();
}
};