Home | History | Annotate | Line # | Download | only in Core
      1 //===- ExplodedGraph.cpp - Local, Path-Sens. "Exploded Graph" -------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 //  This file defines the template classes ExplodedNode and ExplodedGraph,
     10 //  which represent a path-sensitive, intra-procedural "exploded graph."
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
     15 #include "clang/AST/Expr.h"
     16 #include "clang/AST/ExprObjC.h"
     17 #include "clang/AST/ParentMap.h"
     18 #include "clang/AST/Stmt.h"
     19 #include "clang/Analysis/CFGStmtMap.h"
     20 #include "clang/Analysis/ProgramPoint.h"
     21 #include "clang/Analysis/Support/BumpVector.h"
     22 #include "clang/Basic/LLVM.h"
     23 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
     24 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
     25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
     26 #include "llvm/ADT/DenseSet.h"
     27 #include "llvm/ADT/FoldingSet.h"
     28 #include "llvm/ADT/Optional.h"
     29 #include "llvm/ADT/PointerUnion.h"
     30 #include "llvm/ADT/SmallVector.h"
     31 #include "llvm/Support/Casting.h"
     32 #include <cassert>
     33 #include <memory>
     34 
     35 using namespace clang;
     36 using namespace ento;
     37 
     38 //===----------------------------------------------------------------------===//
     39 // Cleanup.
     40 //===----------------------------------------------------------------------===//
     41 
     42 ExplodedGraph::ExplodedGraph() = default;
     43 
     44 ExplodedGraph::~ExplodedGraph() = default;
     45 
     46 //===----------------------------------------------------------------------===//
     47 // Node reclamation.
     48 //===----------------------------------------------------------------------===//
     49 
     50 bool ExplodedGraph::isInterestingLValueExpr(const Expr *Ex) {
     51   if (!Ex->isLValue())
     52     return false;
     53   return isa<DeclRefExpr>(Ex) || isa<MemberExpr>(Ex) ||
     54          isa<ObjCIvarRefExpr>(Ex) || isa<ArraySubscriptExpr>(Ex);
     55 }
     56 
     57 bool ExplodedGraph::shouldCollect(const ExplodedNode *node) {
     58   // First, we only consider nodes for reclamation of the following
     59   // conditions apply:
     60   //
     61   // (1) 1 predecessor (that has one successor)
     62   // (2) 1 successor (that has one predecessor)
     63   //
     64   // If a node has no successor it is on the "frontier", while a node
     65   // with no predecessor is a root.
     66   //
     67   // After these prerequisites, we discard all "filler" nodes that
     68   // are used only for intermediate processing, and are not essential
     69   // for analyzer history:
     70   //
     71   // (a) PreStmtPurgeDeadSymbols
     72   //
     73   // We then discard all other nodes where *all* of the following conditions
     74   // apply:
     75   //
     76   // (3) The ProgramPoint is for a PostStmt, but not a PostStore.
     77   // (4) There is no 'tag' for the ProgramPoint.
     78   // (5) The 'store' is the same as the predecessor.
     79   // (6) The 'GDM' is the same as the predecessor.
     80   // (7) The LocationContext is the same as the predecessor.
     81   // (8) Expressions that are *not* lvalue expressions.
     82   // (9) The PostStmt isn't for a non-consumed Stmt or Expr.
     83   // (10) The successor is neither a CallExpr StmtPoint nor a CallEnter or
     84   //      PreImplicitCall (so that we would be able to find it when retrying a
     85   //      call with no inlining).
     86   // FIXME: It may be safe to reclaim PreCall and PostCall nodes as well.
     87 
     88   // Conditions 1 and 2.
     89   if (node->pred_size() != 1 || node->succ_size() != 1)
     90     return false;
     91 
     92   const ExplodedNode *pred = *(node->pred_begin());
     93   if (pred->succ_size() != 1)
     94     return false;
     95 
     96   const ExplodedNode *succ = *(node->succ_begin());
     97   if (succ->pred_size() != 1)
     98     return false;
     99 
    100   // Now reclaim any nodes that are (by definition) not essential to
    101   // analysis history and are not consulted by any client code.
    102   ProgramPoint progPoint = node->getLocation();
    103   if (progPoint.getAs<PreStmtPurgeDeadSymbols>())
    104     return !progPoint.getTag();
    105 
    106   // Condition 3.
    107   if (!progPoint.getAs<PostStmt>() || progPoint.getAs<PostStore>())
    108     return false;
    109 
    110   // Condition 4.
    111   if (progPoint.getTag())
    112     return false;
    113 
    114   // Conditions 5, 6, and 7.
    115   ProgramStateRef state = node->getState();
    116   ProgramStateRef pred_state = pred->getState();
    117   if (state->store != pred_state->store || state->GDM != pred_state->GDM ||
    118       progPoint.getLocationContext() != pred->getLocationContext())
    119     return false;
    120 
    121   // All further checks require expressions. As per #3, we know that we have
    122   // a PostStmt.
    123   const Expr *Ex = dyn_cast<Expr>(progPoint.castAs<PostStmt>().getStmt());
    124   if (!Ex)
    125     return false;
    126 
    127   // Condition 8.
    128   // Do not collect nodes for "interesting" lvalue expressions since they are
    129   // used extensively for generating path diagnostics.
    130   if (isInterestingLValueExpr(Ex))
    131     return false;
    132 
    133   // Condition 9.
    134   // Do not collect nodes for non-consumed Stmt or Expr to ensure precise
    135   // diagnostic generation; specifically, so that we could anchor arrows
    136   // pointing to the beginning of statements (as written in code).
    137   const ParentMap &PM = progPoint.getLocationContext()->getParentMap();
    138   if (!PM.isConsumedExpr(Ex))
    139     return false;
    140 
    141   // Condition 10.
    142   const ProgramPoint SuccLoc = succ->getLocation();
    143   if (Optional<StmtPoint> SP = SuccLoc.getAs<StmtPoint>())
    144     if (CallEvent::isCallStmt(SP->getStmt()))
    145       return false;
    146 
    147   // Condition 10, continuation.
    148   if (SuccLoc.getAs<CallEnter>() || SuccLoc.getAs<PreImplicitCall>())
    149     return false;
    150 
    151   return true;
    152 }
    153 
    154 void ExplodedGraph::collectNode(ExplodedNode *node) {
    155   // Removing a node means:
    156   // (a) changing the predecessors successor to the successor of this node
    157   // (b) changing the successors predecessor to the predecessor of this node
    158   // (c) Putting 'node' onto freeNodes.
    159   assert(node->pred_size() == 1 || node->succ_size() == 1);
    160   ExplodedNode *pred = *(node->pred_begin());
    161   ExplodedNode *succ = *(node->succ_begin());
    162   pred->replaceSuccessor(succ);
    163   succ->replacePredecessor(pred);
    164   FreeNodes.push_back(node);
    165   Nodes.RemoveNode(node);
    166   --NumNodes;
    167   node->~ExplodedNode();
    168 }
    169 
    170 void ExplodedGraph::reclaimRecentlyAllocatedNodes() {
    171   if (ChangedNodes.empty())
    172     return;
    173 
    174   // Only periodically reclaim nodes so that we can build up a set of
    175   // nodes that meet the reclamation criteria.  Freshly created nodes
    176   // by definition have no successor, and thus cannot be reclaimed (see below).
    177   assert(ReclaimCounter > 0);
    178   if (--ReclaimCounter != 0)
    179     return;
    180   ReclaimCounter = ReclaimNodeInterval;
    181 
    182   for (const auto node : ChangedNodes)
    183     if (shouldCollect(node))
    184       collectNode(node);
    185   ChangedNodes.clear();
    186 }
    187 
    188 //===----------------------------------------------------------------------===//
    189 // ExplodedNode.
    190 //===----------------------------------------------------------------------===//
    191 
    192 // An NodeGroup's storage type is actually very much like a TinyPtrVector:
    193 // it can be either a pointer to a single ExplodedNode, or a pointer to a
    194 // BumpVector allocated with the ExplodedGraph's allocator. This allows the
    195 // common case of single-node NodeGroups to be implemented with no extra memory.
    196 //
    197 // Consequently, each of the NodeGroup methods have up to four cases to handle:
    198 // 1. The flag is set and this group does not actually contain any nodes.
    199 // 2. The group is empty, in which case the storage value is null.
    200 // 3. The group contains a single node.
    201 // 4. The group contains more than one node.
    202 using ExplodedNodeVector = BumpVector<ExplodedNode *>;
    203 using GroupStorage = llvm::PointerUnion<ExplodedNode *, ExplodedNodeVector *>;
    204 
    205 void ExplodedNode::addPredecessor(ExplodedNode *V, ExplodedGraph &G) {
    206   assert(!V->isSink());
    207   Preds.addNode(V, G);
    208   V->Succs.addNode(this, G);
    209 }
    210 
    211 void ExplodedNode::NodeGroup::replaceNode(ExplodedNode *node) {
    212   assert(!getFlag());
    213 
    214   GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
    215   assert(Storage.is<ExplodedNode *>());
    216   Storage = node;
    217   assert(Storage.is<ExplodedNode *>());
    218 }
    219 
    220 void ExplodedNode::NodeGroup::addNode(ExplodedNode *N, ExplodedGraph &G) {
    221   assert(!getFlag());
    222 
    223   GroupStorage &Storage = reinterpret_cast<GroupStorage&>(P);
    224   if (Storage.isNull()) {
    225     Storage = N;
    226     assert(Storage.is<ExplodedNode *>());
    227     return;
    228   }
    229 
    230   ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>();
    231 
    232   if (!V) {
    233     // Switch from single-node to multi-node representation.
    234     ExplodedNode *Old = Storage.get<ExplodedNode *>();
    235 
    236     BumpVectorContext &Ctx = G.getNodeAllocator();
    237     V = G.getAllocator().Allocate<ExplodedNodeVector>();
    238     new (V) ExplodedNodeVector(Ctx, 4);
    239     V->push_back(Old, Ctx);
    240 
    241     Storage = V;
    242     assert(!getFlag());
    243     assert(Storage.is<ExplodedNodeVector *>());
    244   }
    245 
    246   V->push_back(N, G.getNodeAllocator());
    247 }
    248 
    249 unsigned ExplodedNode::NodeGroup::size() const {
    250   if (getFlag())
    251     return 0;
    252 
    253   const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
    254   if (Storage.isNull())
    255     return 0;
    256   if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
    257     return V->size();
    258   return 1;
    259 }
    260 
    261 ExplodedNode * const *ExplodedNode::NodeGroup::begin() const {
    262   if (getFlag())
    263     return nullptr;
    264 
    265   const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
    266   if (Storage.isNull())
    267     return nullptr;
    268   if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
    269     return V->begin();
    270   return Storage.getAddrOfPtr1();
    271 }
    272 
    273 ExplodedNode * const *ExplodedNode::NodeGroup::end() const {
    274   if (getFlag())
    275     return nullptr;
    276 
    277   const GroupStorage &Storage = reinterpret_cast<const GroupStorage &>(P);
    278   if (Storage.isNull())
    279     return nullptr;
    280   if (ExplodedNodeVector *V = Storage.dyn_cast<ExplodedNodeVector *>())
    281     return V->end();
    282   return Storage.getAddrOfPtr1() + 1;
    283 }
    284 
    285 bool ExplodedNode::isTrivial() const {
    286   return pred_size() == 1 && succ_size() == 1 &&
    287          getFirstPred()->getState()->getID() == getState()->getID() &&
    288          getFirstPred()->succ_size() == 1;
    289 }
    290 
    291 const CFGBlock *ExplodedNode::getCFGBlock() const {
    292   ProgramPoint P = getLocation();
    293   if (auto BEP = P.getAs<BlockEntrance>())
    294     return BEP->getBlock();
    295 
    296   // Find the node's current statement in the CFG.
    297   // FIXME: getStmtForDiagnostics() does nasty things in order to provide
    298   // a valid statement for body farms, do we need this behavior here?
    299   if (const Stmt *S = getStmtForDiagnostics())
    300     return getLocationContext()
    301         ->getAnalysisDeclContext()
    302         ->getCFGStmtMap()
    303         ->getBlock(S);
    304 
    305   return nullptr;
    306 }
    307 
    308 static const LocationContext *
    309 findTopAutosynthesizedParentContext(const LocationContext *LC) {
    310   assert(LC->getAnalysisDeclContext()->isBodyAutosynthesized());
    311   const LocationContext *ParentLC = LC->getParent();
    312   assert(ParentLC && "We don't start analysis from autosynthesized code");
    313   while (ParentLC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
    314     LC = ParentLC;
    315     ParentLC = LC->getParent();
    316     assert(ParentLC && "We don't start analysis from autosynthesized code");
    317   }
    318   return LC;
    319 }
    320 
    321 const Stmt *ExplodedNode::getStmtForDiagnostics() const {
    322   // We cannot place diagnostics on autosynthesized code.
    323   // Put them onto the call site through which we jumped into autosynthesized
    324   // code for the first time.
    325   const LocationContext *LC = getLocationContext();
    326   if (LC->getAnalysisDeclContext()->isBodyAutosynthesized()) {
    327     // It must be a stack frame because we only autosynthesize functions.
    328     return cast<StackFrameContext>(findTopAutosynthesizedParentContext(LC))
    329         ->getCallSite();
    330   }
    331   // Otherwise, see if the node's program point directly points to a statement.
    332   // FIXME: Refactor into a ProgramPoint method?
    333   ProgramPoint P = getLocation();
    334   if (auto SP = P.getAs<StmtPoint>())
    335     return SP->getStmt();
    336   if (auto BE = P.getAs<BlockEdge>())
    337     return BE->getSrc()->getTerminatorStmt();
    338   if (auto CE = P.getAs<CallEnter>())
    339     return CE->getCallExpr();
    340   if (auto CEE = P.getAs<CallExitEnd>())
    341     return CEE->getCalleeContext()->getCallSite();
    342   if (auto PIPP = P.getAs<PostInitializer>())
    343     return PIPP->getInitializer()->getInit();
    344   if (auto CEB = P.getAs<CallExitBegin>())
    345     return CEB->getReturnStmt();
    346   if (auto FEP = P.getAs<FunctionExitPoint>())
    347     return FEP->getStmt();
    348 
    349   return nullptr;
    350 }
    351 
    352 const Stmt *ExplodedNode::getNextStmtForDiagnostics() const {
    353   for (const ExplodedNode *N = getFirstSucc(); N; N = N->getFirstSucc()) {
    354     if (const Stmt *S = N->getStmtForDiagnostics()) {
    355       // Check if the statement is '?' or '&&'/'||'.  These are "merges",
    356       // not actual statement points.
    357       switch (S->getStmtClass()) {
    358         case Stmt::ChooseExprClass:
    359         case Stmt::BinaryConditionalOperatorClass:
    360         case Stmt::ConditionalOperatorClass:
    361           continue;
    362         case Stmt::BinaryOperatorClass: {
    363           BinaryOperatorKind Op = cast<BinaryOperator>(S)->getOpcode();
    364           if (Op == BO_LAnd || Op == BO_LOr)
    365             continue;
    366           break;
    367         }
    368         default:
    369           break;
    370       }
    371       // We found the statement, so return it.
    372       return S;
    373     }
    374   }
    375 
    376   return nullptr;
    377 }
    378 
    379 const Stmt *ExplodedNode::getPreviousStmtForDiagnostics() const {
    380   for (const ExplodedNode *N = getFirstPred(); N; N = N->getFirstPred())
    381     if (const Stmt *S = N->getStmtForDiagnostics())
    382       return S;
    383 
    384   return nullptr;
    385 }
    386 
    387 const Stmt *ExplodedNode::getCurrentOrPreviousStmtForDiagnostics() const {
    388   if (const Stmt *S = getStmtForDiagnostics())
    389     return S;
    390 
    391   return getPreviousStmtForDiagnostics();
    392 }
    393 
    394 ExplodedNode *ExplodedGraph::getNode(const ProgramPoint &L,
    395                                      ProgramStateRef State,
    396                                      bool IsSink,
    397                                      bool* IsNew) {
    398   // Profile 'State' to determine if we already have an existing node.
    399   llvm::FoldingSetNodeID profile;
    400   void *InsertPos = nullptr;
    401 
    402   NodeTy::Profile(profile, L, State, IsSink);
    403   NodeTy* V = Nodes.FindNodeOrInsertPos(profile, InsertPos);
    404 
    405   if (!V) {
    406     if (!FreeNodes.empty()) {
    407       V = FreeNodes.back();
    408       FreeNodes.pop_back();
    409     }
    410     else {
    411       // Allocate a new node.
    412       V = (NodeTy*) getAllocator().Allocate<NodeTy>();
    413     }
    414 
    415     ++NumNodes;
    416     new (V) NodeTy(L, State, NumNodes, IsSink);
    417 
    418     if (ReclaimNodeInterval)
    419       ChangedNodes.push_back(V);
    420 
    421     // Insert the node into the node set and return it.
    422     Nodes.InsertNode(V, InsertPos);
    423 
    424     if (IsNew) *IsNew = true;
    425   }
    426   else
    427     if (IsNew) *IsNew = false;
    428 
    429   return V;
    430 }
    431 
    432 ExplodedNode *ExplodedGraph::createUncachedNode(const ProgramPoint &L,
    433                                                 ProgramStateRef State,
    434                                                 int64_t Id,
    435                                                 bool IsSink) {
    436   NodeTy *V = (NodeTy *) getAllocator().Allocate<NodeTy>();
    437   new (V) NodeTy(L, State, Id, IsSink);
    438   return V;
    439 }
    440 
    441 std::unique_ptr<ExplodedGraph>
    442 ExplodedGraph::trim(ArrayRef<const NodeTy *> Sinks,
    443                     InterExplodedGraphMap *ForwardMap,
    444                     InterExplodedGraphMap *InverseMap) const {
    445   if (Nodes.empty())
    446     return nullptr;
    447 
    448   using Pass1Ty = llvm::DenseSet<const ExplodedNode *>;
    449   Pass1Ty Pass1;
    450 
    451   using Pass2Ty = InterExplodedGraphMap;
    452   InterExplodedGraphMap Pass2Scratch;
    453   Pass2Ty &Pass2 = ForwardMap ? *ForwardMap : Pass2Scratch;
    454 
    455   SmallVector<const ExplodedNode*, 10> WL1, WL2;
    456 
    457   // ===- Pass 1 (reverse DFS) -===
    458   for (const auto Sink : Sinks)
    459     if (Sink)
    460       WL1.push_back(Sink);
    461 
    462   // Process the first worklist until it is empty.
    463   while (!WL1.empty()) {
    464     const ExplodedNode *N = WL1.pop_back_val();
    465 
    466     // Have we already visited this node?  If so, continue to the next one.
    467     if (!Pass1.insert(N).second)
    468       continue;
    469 
    470     // If this is a root enqueue it to the second worklist.
    471     if (N->Preds.empty()) {
    472       WL2.push_back(N);
    473       continue;
    474     }
    475 
    476     // Visit our predecessors and enqueue them.
    477     WL1.append(N->Preds.begin(), N->Preds.end());
    478   }
    479 
    480   // We didn't hit a root? Return with a null pointer for the new graph.
    481   if (WL2.empty())
    482     return nullptr;
    483 
    484   // Create an empty graph.
    485   std::unique_ptr<ExplodedGraph> G = MakeEmptyGraph();
    486 
    487   // ===- Pass 2 (forward DFS to construct the new graph) -===
    488   while (!WL2.empty()) {
    489     const ExplodedNode *N = WL2.pop_back_val();
    490 
    491     // Skip this node if we have already processed it.
    492     if (Pass2.find(N) != Pass2.end())
    493       continue;
    494 
    495     // Create the corresponding node in the new graph and record the mapping
    496     // from the old node to the new node.
    497     ExplodedNode *NewN = G->createUncachedNode(N->getLocation(), N->State,
    498                                                N->getID(), N->isSink());
    499     Pass2[N] = NewN;
    500 
    501     // Also record the reverse mapping from the new node to the old node.
    502     if (InverseMap) (*InverseMap)[NewN] = N;
    503 
    504     // If this node is a root, designate it as such in the graph.
    505     if (N->Preds.empty())
    506       G->addRoot(NewN);
    507 
    508     // In the case that some of the intended predecessors of NewN have already
    509     // been created, we should hook them up as predecessors.
    510 
    511     // Walk through the predecessors of 'N' and hook up their corresponding
    512     // nodes in the new graph (if any) to the freshly created node.
    513     for (ExplodedNode::pred_iterator I = N->Preds.begin(), E = N->Preds.end();
    514          I != E; ++I) {
    515       Pass2Ty::iterator PI = Pass2.find(*I);
    516       if (PI == Pass2.end())
    517         continue;
    518 
    519       NewN->addPredecessor(const_cast<ExplodedNode *>(PI->second), *G);
    520     }
    521 
    522     // In the case that some of the intended successors of NewN have already
    523     // been created, we should hook them up as successors.  Otherwise, enqueue
    524     // the new nodes from the original graph that should have nodes created
    525     // in the new graph.
    526     for (ExplodedNode::succ_iterator I = N->Succs.begin(), E = N->Succs.end();
    527          I != E; ++I) {
    528       Pass2Ty::iterator PI = Pass2.find(*I);
    529       if (PI != Pass2.end()) {
    530         const_cast<ExplodedNode *>(PI->second)->addPredecessor(NewN, *G);
    531         continue;
    532       }
    533 
    534       // Enqueue nodes to the worklist that were marked during pass 1.
    535       if (Pass1.count(*I))
    536         WL2.push_back(*I);
    537     }
    538   }
    539 
    540   return G;
    541 }
    542