diff --git a/Meta/Lagom/ClangPlugins/CMakeLists.txt b/Meta/Lagom/ClangPlugins/CMakeLists.txt new file mode 100644 index 00000000000..7f9d78b73ee --- /dev/null +++ b/Meta/Lagom/ClangPlugins/CMakeLists.txt @@ -0,0 +1,17 @@ +find_package(Clang 17 CONFIG REQUIRED) + +function(clang_plugin target_name) + cmake_parse_arguments(CLANG_PLUGIN "" "" "SOURCES" ${ARGN}) + add_library(${target_name} ${CLANG_PLUGIN_SOURCES}) + install(TARGETS ${target_name} DESTINATION ${CMAKE_INSTALL_LIBDIR}) + target_include_directories(${target_name} SYSTEM PRIVATE ${CLANG_INCLUDE_DIRS} ${LLVM_INCLUDE_DIRS}) + target_compile_features(${target_name} PRIVATE cxx_std_20) + target_compile_options(${target_name} PRIVATE -Wall -Wextra -Werror -Wno-unused -fno-rtti) + + add_custom_target(${target_name}Target DEPENDS ${target_name}) + + set_property(GLOBAL APPEND PROPERTY CLANG_PLUGINS_ALL_COMPILE_OPTIONS -fplugin=${CMAKE_BINARY_DIR}/${CMAKE_INSTALL_LIBDIR}/lib${target_name}.so) +endfunction() + +clang_plugin(LambdaCaptureClangPlugin SOURCES LambdaCapturePluginAction.cpp) +clang_plugin(LibJSGCClangPlugin SOURCES LibJSGCPluginAction.cpp) diff --git a/Meta/Lagom/ClangPlugins/LambdaCapturePluginAction.cpp b/Meta/Lagom/ClangPlugins/LambdaCapturePluginAction.cpp new file mode 100644 index 00000000000..64db3aa01e4 --- /dev/null +++ b/Meta/Lagom/ClangPlugins/LambdaCapturePluginAction.cpp @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2024, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "LambdaCapturePluginAction.h" +#include +#include +#include + +AST_MATCHER_P(clang::Decl, hasAnnotation, std::string, name) +{ + (void)Builder; + (void)Finder; + for (auto const* attr : Node.attrs()) { + if (auto const* annotate_attr = llvm::dyn_cast(attr)) { + if (annotate_attr->getAnnotation() == name) + return true; + } + } + return false; +} + +class Consumer + : public clang::ASTConsumer + , public clang::ast_matchers::internal::CollectMatchesCallback { +public: + Consumer() + { + using namespace clang::ast_matchers; + + auto non_capturable_var_decl = varDecl( + hasLocalStorage(), + unless( + anyOf( + // The declaration has an annotation: + // IGNORE_USE_IN_ESCAPING_LAMBDA Foo foo; + hasAnnotation("serenity::ignore_use_in_escaping_lambda"), + // The declaration is a reference: + // Foo& foo_ref = get_foo_ref(); + // Foo* foo_ptr = get_foo_ptr(); + // do_something([&foo_ref, &foo_ptr] { + // foo_ref.foo(); // Fine, foo_ref references the underlying Foo instance + // foo_ptr->foo(); // Bad, foo_ptr references the pointer on the stack above + // }); + hasType(references(TypeMatcher(anything())))))); + + auto bad_lambda_capture = lambdaCapture(anyOf(capturesThis(), capturesVar(non_capturable_var_decl))).bind("lambda-capture"); + + auto lambda_with_bad_capture = lambdaExpr( + anyOf( + // These are both required as they have slightly different behavior. + // + // We need forEachLambdaCapture because we need to go over every explicit capture in the capture list, as + // hasAnyCapture will just take the first capture in the list that matches the criteria (usually the `this` + // capture). Without it, if the first capture in the list was flagged as bad but is actually fine (e.g. the + // `this` capture, or a var capture by value), but there was a second capture in the list that was invalid, + // it would be skipped. + // + // But forEachLambdaCapture doesn't seem to find implicit captures, so we also need hasAnyCapture to handle + // captures that aren't explicitly listed in the capture list, but are still invalid. + forEachLambdaCapture(bad_lambda_capture), + hasAnyCapture(bad_lambda_capture))); + + // Bind this varDecl so we can reference it later to make sure it isn't being called + auto lambda_with_bad_capture_decl = varDecl(hasInitializer(lambda_with_bad_capture)).bind("lambda"); + + m_finder.addMatcher( + traverse( + clang::TK_IgnoreUnlessSpelledInSource, + callExpr( + forEachArgumentWithParam( + anyOf( + // Match a lambda given directly in the function call + lambda_with_bad_capture, + // Matches an expression with a possibly-deeply-nested reference to a variable with a lambda type, e.g: + // auto lambda = [...] { ... }; + // some_func(move(lambda)); + has(declRefExpr( + to(lambda_with_bad_capture_decl), + // Avoid immediately invoked lambdas (i.e. match `move(lambda)` but not `move(lambda())`) + unless(hasParent( + // ::operator()(...) + cxxOperatorCallExpr(has(declRefExpr(to(equalsBoundNode("lambda")))))))))), + parmVarDecl( + allOf( + // It's important that the parameter has a RecordType, as a templated type can never escape its function + hasType(cxxRecordDecl()), + unless(hasAnnotation("serenity::noescape")))) + .bind("lambda-param-ref")))), + this); + } + + void HandleTranslationUnit(clang::ASTContext& Ctx) override + { + m_finder.matchAST(Ctx); + } + + void run(clang::ast_matchers::MatchFinder::MatchResult const& result) override + { + auto& diag_engine = result.Context->getDiagnostics(); + + if (auto const* capture = result.Nodes.getNodeAs("lambda-capture")) { + if (capture->capturesThis() || capture->getCaptureKind() != clang::LCK_ByRef) + return; + + auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Variable with local storage is captured by reference in a lambda that may be asynchronously executed"); + diag_engine.Report(capture->getLocation(), diag_id); + + clang::SourceLocation captured_var_location; + if (auto const* var_decl = llvm::dyn_cast(capture->getCapturedVar())) { + captured_var_location = var_decl->getTypeSourceInfo()->getTypeLoc().getBeginLoc(); + } else { + captured_var_location = capture->getCapturedVar()->getLocation(); + } + diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Note, "Annotate the variable declaration with IGNORE_USE_IN_ESCAPING_LAMBDA if it outlives the lambda"); + diag_engine.Report(captured_var_location, diag_id); + + auto const* param = result.Nodes.getNodeAs("lambda-param-ref"); + diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Note, "Annotate the parameter with NOESCAPE if the lambda will not outlive the function call"); + diag_engine.Report(param->getTypeSourceInfo()->getTypeLoc().getBeginLoc(), diag_id); + } + } + +private: + clang::ast_matchers::MatchFinder m_finder; +}; + +std::unique_ptr LambdaCapturePluginAction::CreateASTConsumer(clang::CompilerInstance&, llvm::StringRef) +{ + return std::make_unique(); +} + +static clang::FrontendPluginRegistry::Add X("lambda-capture", "analyze lambda captures"); diff --git a/Meta/Lagom/ClangPlugins/LambdaCapturePluginAction.h b/Meta/Lagom/ClangPlugins/LambdaCapturePluginAction.h new file mode 100644 index 00000000000..5cb91265b8e --- /dev/null +++ b/Meta/Lagom/ClangPlugins/LambdaCapturePluginAction.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2024, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include + +class LambdaCapturePluginAction : public clang::PluginASTAction { +public: + virtual bool ParseArgs(clang::CompilerInstance const&, std::vector const&) override + { + return true; + } + + virtual std::unique_ptr CreateASTConsumer(clang::CompilerInstance&, llvm::StringRef) override; + + ActionType getActionType() override + { + return AddAfterMainAction; + } +}; diff --git a/Meta/Lagom/ClangPlugins/LibJSGCPluginAction.cpp b/Meta/Lagom/ClangPlugins/LibJSGCPluginAction.cpp new file mode 100644 index 00000000000..7826dff3b91 --- /dev/null +++ b/Meta/Lagom/ClangPlugins/LibJSGCPluginAction.cpp @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2024, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "LibJSGCPluginAction.h" +#include +#include +#include +#include +#include +#include + +template +class SimpleCollectMatchesCallback : public clang::ast_matchers::MatchFinder::MatchCallback { +public: + explicit SimpleCollectMatchesCallback(std::string name) + : m_name(std::move(name)) + { + } + + void run(clang::ast_matchers::MatchFinder::MatchResult const& result) override + { + if (auto const* node = result.Nodes.getNodeAs(m_name)) + m_matches.push_back(node); + } + + auto const& matches() const { return m_matches; } + +private: + std::string m_name; + std::vector m_matches; +}; + +bool record_inherits_from_cell(clang::CXXRecordDecl const& record) +{ + if (!record.isCompleteDefinition()) + return false; + + bool inherits_from_cell = record.getQualifiedNameAsString() == "JS::Cell"; + record.forallBases([&](clang::CXXRecordDecl const* base) -> bool { + if (base->getQualifiedNameAsString() == "JS::Cell") { + inherits_from_cell = true; + return false; + } + return true; + }); + return inherits_from_cell; +} + +std::vector get_all_qualified_types(clang::QualType const& type) +{ + std::vector qualified_types; + + if (auto const* template_specialization = type->getAs()) { + auto specialization_name = template_specialization->getTemplateName().getAsTemplateDecl()->getQualifiedNameAsString(); + // Do not unwrap GCPtr/NonnullGCPtr/MarkedVector + if (specialization_name == "JS::GCPtr" || specialization_name == "JS::NonnullGCPtr" || specialization_name == "JS::RawGCPtr" || specialization_name == "JS::MarkedVector") { + qualified_types.push_back(type); + } else { + auto const template_arguments = template_specialization->template_arguments(); + for (size_t i = 0; i < template_arguments.size(); i++) { + auto const& template_arg = template_arguments[i]; + if (template_arg.getKind() == clang::TemplateArgument::Type) { + auto template_qualified_types = get_all_qualified_types(template_arg.getAsType()); + std::move(template_qualified_types.begin(), template_qualified_types.end(), std::back_inserter(qualified_types)); + } + } + } + } else { + qualified_types.push_back(type); + } + + return qualified_types; +} + +struct FieldValidationResult { + bool is_valid { false }; + bool is_wrapped_in_gcptr { false }; + bool needs_visiting { false }; +}; + +FieldValidationResult validate_field(clang::FieldDecl const* field_decl) +{ + auto type = field_decl->getType(); + if (auto const* elaborated_type = llvm::dyn_cast(type.getTypePtr())) + type = elaborated_type->desugar(); + + FieldValidationResult result { .is_valid = true }; + + for (auto const& qualified_type : get_all_qualified_types(type)) { + if (auto const* pointer_decl = qualified_type->getAs()) { + if (auto const* pointee = pointer_decl->getPointeeCXXRecordDecl()) { + if (record_inherits_from_cell(*pointee)) { + result.is_valid = false; + result.is_wrapped_in_gcptr = false; + result.needs_visiting = true; + return result; + } + } + } else if (auto const* reference_decl = qualified_type->getAs()) { + if (auto const* pointee = reference_decl->getPointeeCXXRecordDecl()) { + if (record_inherits_from_cell(*pointee)) { + result.is_valid = false; + result.is_wrapped_in_gcptr = false; + result.needs_visiting = true; + return result; + } + } + } else if (auto const* specialization = qualified_type->getAs()) { + auto template_type_name = specialization->getTemplateName().getAsTemplateDecl()->getName(); + if (template_type_name != "GCPtr" && template_type_name != "NonnullGCPtr" && template_type_name != "RawGCPtr") + return result; + + auto const template_args = specialization->template_arguments(); + if (template_args.size() != 1) + return result; // Not really valid, but will produce a compilation error anyway + + auto const& type_arg = template_args[0]; + auto const* record_type = type_arg.getAsType()->getAs(); + if (!record_type) + return result; + + auto const* record_decl = record_type->getAsCXXRecordDecl(); + if (!record_decl->hasDefinition()) + return result; + + result.is_wrapped_in_gcptr = true; + result.is_valid = record_inherits_from_cell(*record_decl); + result.needs_visiting = template_type_name != "RawGCPtr"; + } + } + + return result; +} + +bool LibJSGCVisitor::VisitCXXRecordDecl(clang::CXXRecordDecl* record) +{ + using namespace clang::ast_matchers; + + if (!record || !record->isCompleteDefinition() || (!record->isClass() && !record->isStruct())) + return true; + + // Cell triggers a bunch of warnings for its empty visit_edges implementation, but + // it doesn't have any members anyways so it's fine to just ignore. + auto qualified_name = record->getQualifiedNameAsString(); + if (qualified_name == "JS::Cell") + return true; + + auto& diag_engine = m_context.getDiagnostics(); + std::vector fields_that_need_visiting; + + for (clang::FieldDecl const* field : record->fields()) { + auto validation_results = validate_field(field); + if (!validation_results.is_valid) { + if (validation_results.is_wrapped_in_gcptr) { + auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Specialization type must inherit from JS::Cell"); + diag_engine.Report(field->getLocation(), diag_id); + } else { + auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "%0 to JS::Cell type should be wrapped in %1"); + auto builder = diag_engine.Report(field->getLocation(), diag_id); + if (field->getType()->isReferenceType()) { + builder << "reference" + << "JS::NonnullGCPtr"; + } else { + builder << "pointer" + << "JS::GCPtr"; + } + } + } else if (validation_results.needs_visiting) { + fields_that_need_visiting.push_back(field); + } + } + + if (!record_inherits_from_cell(*record)) + return true; + + clang::DeclarationName name = &m_context.Idents.get("visit_edges"); + auto const* visit_edges_method = record->lookup(name).find_first(); + if (!visit_edges_method && !fields_that_need_visiting.empty()) { + auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "JS::Cell-inheriting class %0 contains a GC-allocated member %1 but has no visit_edges method"); + auto builder = diag_engine.Report(record->getLocation(), diag_id); + builder << record->getName() + << fields_that_need_visiting[0]; + } + if (!visit_edges_method || !visit_edges_method->getBody()) + return true; + + // Search for a call to Base::visit_edges. Note that this also has the nice side effect of + // ensuring the classes use JS_CELL/JS_OBJECT, as Base will not be defined if they do not. + + MatchFinder base_visit_edges_finder; + SimpleCollectMatchesCallback base_visit_edges_callback("member-call"); + + auto base_visit_edges_matcher = cxxMethodDecl( + ofClass(hasName(qualified_name)), + functionDecl(hasName("visit_edges")), + isOverride(), + hasDescendant(memberExpr(member(hasName("visit_edges"))).bind("member-call"))); + + base_visit_edges_finder.addMatcher(base_visit_edges_matcher, &base_visit_edges_callback); + base_visit_edges_finder.matchAST(m_context); + + bool call_to_base_visit_edges_found = false; + + for (auto const* call_expr : base_visit_edges_callback.matches()) { + // FIXME: Can we constrain the matcher above to avoid looking directly at the source code? + auto const* source_chars = m_context.getSourceManager().getCharacterData(call_expr->getBeginLoc()); + if (strncmp(source_chars, "Base::", 6) == 0) { + call_to_base_visit_edges_found = true; + break; + } + } + + if (!call_to_base_visit_edges_found) { + auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Missing call to Base::visit_edges"); + diag_engine.Report(visit_edges_method->getBeginLoc(), diag_id); + } + + // Search for uses of all fields that need visiting. We don't ensure they are _actually_ visited + // with a call to visitor.visit(...), as that is too complex. Instead, we just assume that if the + // field is accessed at all, then it is visited. + + if (fields_that_need_visiting.empty()) + return true; + + MatchFinder field_access_finder; + SimpleCollectMatchesCallback field_access_callback("member-expr"); + + auto field_access_matcher = memberExpr( + hasAncestor(cxxMethodDecl(hasName("visit_edges"))), + hasObjectExpression(hasType(pointsTo(cxxRecordDecl(hasName(record->getName())))))) + .bind("member-expr"); + + field_access_finder.addMatcher(field_access_matcher, &field_access_callback); + field_access_finder.matchAST(visit_edges_method->getASTContext()); + + std::unordered_set fields_that_are_visited; + for (auto const* member_expr : field_access_callback.matches()) + fields_that_are_visited.insert(member_expr->getMemberNameInfo().getAsString()); + + auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "GC-allocated member is not visited in %0::visit_edges"); + + for (auto const* field : fields_that_need_visiting) { + if (!fields_that_are_visited.contains(field->getNameAsString())) { + auto builder = diag_engine.Report(field->getBeginLoc(), diag_id); + builder << record->getName(); + } + } + + return true; +} + +void LibJSGCASTConsumer::HandleTranslationUnit(clang::ASTContext& context) +{ + LibJSGCVisitor visitor { context }; + visitor.TraverseDecl(context.getTranslationUnitDecl()); +} + +static clang::FrontendPluginRegistry::Add X("libjs-gc-scanner", "analyze LibJS GC usage"); diff --git a/Meta/Lagom/ClangPlugins/LibJSGCPluginAction.h b/Meta/Lagom/ClangPlugins/LibJSGCPluginAction.h new file mode 100644 index 00000000000..0a7b6a62a2d --- /dev/null +++ b/Meta/Lagom/ClangPlugins/LibJSGCPluginAction.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024, Matthew Olsson + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include + +class LibJSGCVisitor : public clang::RecursiveASTVisitor { +public: + explicit LibJSGCVisitor(clang::ASTContext& context) + : m_context(context) + { + } + + bool VisitCXXRecordDecl(clang::CXXRecordDecl*); + +private: + clang::ASTContext& m_context; +}; + +class LibJSGCASTConsumer : public clang::ASTConsumer { +public: + virtual void HandleTranslationUnit(clang::ASTContext& context) override; +}; + +class LibJSGCPluginAction : public clang::PluginASTAction { +public: + virtual bool ParseArgs(clang::CompilerInstance const&, std::vector const&) override + { + return true; + } + + virtual std::unique_ptr CreateASTConsumer(clang::CompilerInstance&, llvm::StringRef) override + { + return std::make_unique(); + } + + ActionType getActionType() override + { + return AddAfterMainAction; + } +};