ladybird/Userland/Libraries/LibCpp/Preprocessor.cpp
Ali Mohammad Pur dc68c765b7 LibCpp: Correctly parse lines that end in '\'
Such lines should be considered to be joined into the next line.
This makes multiline preprocessor stuff "work".
2021-08-02 01:03:59 +02:00

233 lines
7.1 KiB
C++

/*
* Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include "Preprocessor.h"
#include <AK/Assertions.h>
#include <AK/GenericLexer.h>
#include <AK/StringBuilder.h>
#include <ctype.h>
namespace Cpp {
Preprocessor::Preprocessor(const String& filename, const StringView& program)
: m_filename(filename)
, m_program(program)
{
GenericLexer program_lexer { m_program };
for (;;) {
if (program_lexer.is_eof())
break;
auto line = program_lexer.consume_until('\n');
bool has_multiline = false;
while (line.ends_with('\\') && !program_lexer.is_eof()) {
auto continuation = program_lexer.consume_until('\n');
line = StringView { line.characters_without_null_termination(), line.length() + continuation.length() + 1 };
// Append an empty line to keep the line count correct.
m_lines.append({});
has_multiline = true;
}
if (has_multiline)
m_lines.last() = line;
else
m_lines.append(line);
}
}
const String& Preprocessor::process()
{
for (; m_line_index < m_lines.size(); ++m_line_index) {
auto& line = m_lines[m_line_index];
bool include_in_processed_text = false;
if (line.starts_with("#")) {
auto keyword = handle_preprocessor_line(line);
if (m_options.keep_include_statements && keyword == "include")
include_in_processed_text = true;
} else if (m_state == State::Normal) {
include_in_processed_text = true;
}
if (include_in_processed_text) {
m_builder.append(line);
}
m_builder.append("\n");
}
m_processed_text = m_builder.to_string();
return m_processed_text;
}
static void consume_whitespace(GenericLexer& lexer)
{
auto ignore_line = [&] {
for (;;) {
if (lexer.consume_specific("\\\n"sv)) {
lexer.ignore(2);
} else {
lexer.ignore_until('\n');
break;
}
}
};
for (;;) {
if (lexer.consume_specific("//"sv))
ignore_line();
else if (lexer.consume_specific("/*"sv))
lexer.ignore_until("*/");
else if (lexer.next_is("\\\n"sv))
lexer.ignore(2);
else if (lexer.is_eof() || !lexer.next_is(isspace))
break;
else
lexer.ignore();
}
}
Preprocessor::PreprocessorKeyword Preprocessor::handle_preprocessor_line(const StringView& line)
{
GenericLexer lexer(line);
consume_whitespace(lexer);
lexer.consume_specific('#');
consume_whitespace(lexer);
auto keyword = lexer.consume_until(' ');
if (keyword.is_empty() || keyword.is_null() || keyword.is_whitespace())
return {};
handle_preprocessor_keyword(keyword, lexer);
return keyword;
}
void Preprocessor::handle_preprocessor_keyword(const StringView& keyword, GenericLexer& line_lexer)
{
if (keyword == "include") {
consume_whitespace(line_lexer);
m_included_paths.append(line_lexer.consume_all());
return;
}
if (keyword == "else") {
VERIFY(m_current_depth > 0);
if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) {
m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
m_state = State::Normal;
}
if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
m_state = State::SkipElseBranch;
}
return;
}
if (keyword == "endif") {
VERIFY(m_current_depth > 0);
--m_current_depth;
if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) {
m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
}
if (m_depths_of_taken_branches.contains_slow(m_current_depth)) {
m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
}
m_state = State::Normal;
return;
}
if (keyword == "define") {
if (m_state == State::Normal) {
auto key = line_lexer.consume_until(' ');
consume_whitespace(line_lexer);
DefinedValue value;
value.filename = m_filename;
value.line = m_line_index;
auto string_value = line_lexer.consume_all();
if (!string_value.is_empty())
value.value = string_value;
m_definitions.set(key, value);
}
return;
}
if (keyword == "undef") {
if (m_state == State::Normal) {
auto key = line_lexer.consume_until(' ');
line_lexer.consume_all();
m_definitions.remove(key);
}
return;
}
if (keyword == "ifdef") {
++m_current_depth;
if (m_state == State::Normal) {
auto key = line_lexer.consume_until(' ');
if (m_definitions.contains(key)) {
m_depths_of_taken_branches.append(m_current_depth - 1);
return;
} else {
m_depths_of_not_taken_branches.append(m_current_depth - 1);
m_state = State::SkipIfBranch;
return;
}
}
return;
}
if (keyword == "ifndef") {
++m_current_depth;
if (m_state == State::Normal) {
auto key = line_lexer.consume_until(' ');
if (!m_definitions.contains(key)) {
m_depths_of_taken_branches.append(m_current_depth - 1);
return;
} else {
m_depths_of_not_taken_branches.append(m_current_depth - 1);
m_state = State::SkipIfBranch;
return;
}
}
return;
}
if (keyword == "if") {
++m_current_depth;
if (m_state == State::Normal) {
// FIXME: Implement #if logic
// We currently always take #if branches.
m_depths_of_taken_branches.append(m_current_depth - 1);
}
return;
}
if (keyword == "elif") {
VERIFY(m_current_depth > 0);
// FIXME: Evaluate the elif expression
// We currently always treat the expression in #elif as true.
if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) {
m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
m_state = State::Normal;
}
if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
m_state = State::SkipElseBranch;
}
return;
}
if (keyword == "pragma") {
line_lexer.consume_all();
return;
}
if (!m_options.ignore_unsupported_keywords) {
dbgln("Unsupported preprocessor keyword: {}", keyword);
VERIFY_NOT_REACHED();
}
}
const String& Preprocessor::processed_text()
{
VERIFY(!m_processed_text.is_null());
return m_processed_text;
}
};