From 76fa127cbf75fd889342f2ff9a9322b979aa5cd1 Mon Sep 17 00:00:00 2001 From: Matthew Olsson Date: Sun, 7 Apr 2024 16:01:26 -0700 Subject: [PATCH] LibJSGCVerifier: Detect stack-allocated ref captures in lambdas For example, consider the following code snippet: Vector> m_callbacks; void add_callback(Function callback) { m_callbacks.append(move(callback)); } // Somewhere else... void do_something() { int a = 10; add_callback([&a] { dbgln("a is {}", a); }); } // Oops, "a" is now destroyed, but the callback in m_callbacks // has a reference to it! We now statically detect the capture of "a" in the lambda above and flag it as incorrect. Note that capturing the value implicitly with a capture list of `[&]` would also be detected. Of course, many functions that accept Function<...> don't store them anywhere, instead immediately invoking them inside of the function. To avoid a warning in this case, the parameter can be annotated with NOESCAPE to indicate that capturing stack variables is fine: void do_something_now(NOESCAPE Function<...> callback) { callback(...) } Lastly, there are situations where the callback does generally escape, but where the caller knows that it won't escape long enough to cause any issues. For example, consider this fake example from LibWeb: void do_something() { bool is_done = false; HTML::queue_global_task([&] { do_some_work(); is_done = true; }); HTML::main_thread_event_loop().spin_until([&] { return is_done; }); } In this case, we know that the lambda passed to queue_global_task will be executed before the function returns, and will not persist afterwards. To avoid this warning, annotate the type of the capture with IGNORE_USE_IN_ESCAPING_LAMBDA: void do_something() { IGNORE_USE_IN_ESCAPING_LAMBDA bool is_done = false; // ... } --- AK/Function.h | 10 ++ .../LibJSGCVerifier/src/CellsHandler.cpp | 106 ++++++++++++++++++ .../Tools/LibJSGCVerifier/src/CellsHandler.h | 3 + 3 files changed, 119 insertions(+) diff --git a/AK/Function.h b/AK/Function.h index bb3319d4d25..04e417f57de 100644 --- a/AK/Function.h +++ b/AK/Function.h @@ -38,6 +38,16 @@ namespace AK { +// These annotations are used to avoid capturing a variable with local storage in a lambda that outlives it +#if defined(AK_COMPILER_CLANG) +# define NOESCAPE [[clang::annotate("serenity::noescape")]] +// FIXME: When we get C++23, change this to be applied to the lambda directly instead of to the types of its captures +# define IGNORE_USE_IN_ESCAPING_LAMBDA [[clang::annotate("serenity::ignore_use_in_escaping_lambda")]] +#else +# define NOESCAPE +# define IGNORE_USE_IN_ESCAPING_LAMBDA +#endif + template class Function; diff --git a/Meta/Lagom/Tools/LibJSGCVerifier/src/CellsHandler.cpp b/Meta/Lagom/Tools/LibJSGCVerifier/src/CellsHandler.cpp index 5344d99b449..2fd720acd2e 100644 --- a/Meta/Lagom/Tools/LibJSGCVerifier/src/CellsHandler.cpp +++ b/Meta/Lagom/Tools/LibJSGCVerifier/src/CellsHandler.cpp @@ -18,6 +18,19 @@ #include #include +AST_MATCHER_P(clang::Decl, hasAnnotation, std::string, name) +{ + (void)Builder; + (void)Finder; + for (auto const* attr : Node.attrs()) { + if (auto const* annotate_attr = llvm::dyn_cast(attr)) { + if (annotate_attr->getAnnotation() == name) + return true; + } + } + return false; +} + template class SimpleCollectMatchesCallback : public clang::ast_matchers::MatchFinder::MatchCallback { public: @@ -48,6 +61,67 @@ CollectCellsHandler::CollectCellsHandler() clang::TK_IgnoreUnlessSpelledInSource, cxxRecordDecl(decl().bind("record-decl"))), this); + + auto non_capturable_var_decl = varDecl( + hasLocalStorage(), + unless( + anyOf( + // The declaration has an annotation: + // IGNORE_USE_IN_ESCAPING_LAMBDA Foo foo; + hasAnnotation("serenity::ignore_use_in_escaping_lambda"), + // The declaration is a reference: + // Foo& foo_ref = get_foo_ref(); + // Foo* foo_ptr = get_foo_ptr(); + // do_something([&foo_ref, &foo_ptr] { + // foo_ref.foo(); // Fine, foo_ref references the underlying Foo instance + // foo_ptr->foo(); // Bad, foo_ptr references the pointer on the stack above + // }); + hasType(references(TypeMatcher(anything())))))); + + auto bad_lambda_capture = lambdaCapture(anyOf(capturesThis(), capturesVar(non_capturable_var_decl))).bind("lambda-capture"); + + auto lambda_with_bad_capture = lambdaExpr( + anyOf( + // These are both required as they have slightly different behavior. + // + // We need forEachLambdaCapture because we need to go over every explicit capture in the capture list, as + // hasAnyCapture will just take the first capture in the list that matches the criteria (usually the `this` + // capture). Without it, if the first capture in the list was flagged as bad but is actually fine (e.g. the + // `this` capture, or a var capture by value), but there was a second capture in the list that was invalid, + // it would be skipped. + // + // But forEachLambdaCapture doesn't seem to find implicit captures, so we also need hasAnyCapture to handle + // captures that aren't explicitly listed in the capture list, but are still invalid. + forEachLambdaCapture(bad_lambda_capture), + hasAnyCapture(bad_lambda_capture))); + + // Bind this varDecl so we can reference it later to make sure it isn't being called + auto lambda_with_bad_capture_decl = varDecl(hasInitializer(lambda_with_bad_capture)).bind("lambda"); + + m_finder.addMatcher( + traverse( + clang::TK_IgnoreUnlessSpelledInSource, + callExpr( + forEachArgumentWithParam( + anyOf( + // Match a lambda given directly in the function call + lambda_with_bad_capture, + // Matches an expression with a possibly-deeply-nested reference to a variable with a lambda type, e.g: + // auto lambda = [...] { ... }; + // some_func(move(lambda)); + has(declRefExpr( + to(lambda_with_bad_capture_decl), + // Avoid immediately invoked lambdas (i.e. match `move(lambda)` but not `move(lambda())`) + unless(hasParent( + // ::operator()(...) + cxxOperatorCallExpr(has(declRefExpr(to(equalsBoundNode("lambda")))))))))), + parmVarDecl( + allOf( + // It's important that the parameter has a RecordType, as a templated type can never escape its function + hasType(cxxRecordDecl()), + unless(hasAnnotation("serenity::noescape")))) + .bind("lambda-param-ref")))), + this); } bool CollectCellsHandler::handleBeginSource(clang::CompilerInstance& ci) @@ -171,6 +245,12 @@ FieldValidationResult validate_field(clang::FieldDecl const* field_decl) } void CollectCellsHandler::run(clang::ast_matchers::MatchFinder::MatchResult const& result) +{ + check_cells(result); + check_lambda_captures(result); +} + +void CollectCellsHandler::check_cells(clang::ast_matchers::MatchFinder::MatchResult const& result) { using namespace clang::ast_matchers; @@ -285,3 +365,29 @@ void CollectCellsHandler::run(clang::ast_matchers::MatchFinder::MatchResult cons } } } + +void CollectCellsHandler::check_lambda_captures(clang::ast_matchers::MatchFinder::MatchResult const& result) +{ + auto& diag_engine = result.Context->getDiagnostics(); + + if (auto const* capture = result.Nodes.getNodeAs("lambda-capture")) { + if (capture->capturesThis() || capture->getCaptureKind() != clang::LCK_ByRef) + return; + + auto diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Warning, "Variable with local storage is captured by reference in a lambda that may be asynchronously executed"); + diag_engine.Report(capture->getLocation(), diag_id); + + clang::SourceLocation captured_var_location; + if (auto const* var_decl = llvm::dyn_cast(capture->getCapturedVar())) { + captured_var_location = var_decl->getTypeSourceInfo()->getTypeLoc().getBeginLoc(); + } else { + captured_var_location = capture->getCapturedVar()->getLocation(); + } + diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Note, "Annotate the variable declaration with IGNORE_USE_IN_ESCAPING_LAMBDA if it outlives the lambda"); + diag_engine.Report(captured_var_location, diag_id); + + auto const* param = result.Nodes.getNodeAs("lambda-param-ref"); + diag_id = diag_engine.getCustomDiagID(clang::DiagnosticsEngine::Note, "Annotate the parameter with NOESCAPE if the lambda will not outlive the function call"); + diag_engine.Report(param->getTypeSourceInfo()->getTypeLoc().getBeginLoc(), diag_id); + } +} diff --git a/Meta/Lagom/Tools/LibJSGCVerifier/src/CellsHandler.h b/Meta/Lagom/Tools/LibJSGCVerifier/src/CellsHandler.h index cde264c0e14..55ff18b744c 100644 --- a/Meta/Lagom/Tools/LibJSGCVerifier/src/CellsHandler.h +++ b/Meta/Lagom/Tools/LibJSGCVerifier/src/CellsHandler.h @@ -25,6 +25,9 @@ public: clang::ast_matchers::MatchFinder& finder() { return m_finder; } private: + void check_cells(clang::ast_matchers::MatchFinder::MatchResult const& result); + void check_lambda_captures(clang::ast_matchers::MatchFinder::MatchResult const& result); + std::unordered_set m_visited_classes; clang::ast_matchers::MatchFinder m_finder; };