Home | History | Annotate | Line # | Download | only in Analysis
      1 //===- CFLSteensAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements a CFL-base, summary-based alias analysis algorithm. It
     10 // does not depend on types. The algorithm is a mixture of the one described in
     11 // "Demand-driven alias analysis for C" by Xin Zheng and Radu Rugina, and "Fast
     12 // algorithms for Dyck-CFL-reachability with applications to Alias Analysis" by
     13 // Zhang Q, Lyu M R, Yuan H, and Su Z. -- to summarize the papers, we build a
     14 // graph of the uses of a variable, where each node is a memory location, and
     15 // each edge is an action that happened on that memory location.  The "actions"
     16 // can be one of Dereference, Reference, or Assign. The precision of this
     17 // analysis is roughly the same as that of an one level context-sensitive
     18 // Steensgaard's algorithm.
     19 //
     20 // Two variables are considered as aliasing iff you can reach one value's node
     21 // from the other value's node and the language formed by concatenating all of
     22 // the edge labels (actions) conforms to a context-free grammar.
     23 //
     24 // Because this algorithm requires a graph search on each query, we execute the
     25 // algorithm outlined in "Fast algorithms..." (mentioned above)
     26 // in order to transform the graph into sets of variables that may alias in
     27 // ~nlogn time (n = number of variables), which makes queries take constant
     28 // time.
     29 //===----------------------------------------------------------------------===//
     30 
     31 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
     32 // CFLSteensAA is interprocedural. This is *technically* A Bad Thing, because
     33 // FunctionPasses are only allowed to inspect the Function that they're being
     34 // run on. Realistically, this likely isn't a problem until we allow
     35 // FunctionPasses to run concurrently.
     36 
     37 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
     38 #include "AliasAnalysisSummary.h"
     39 #include "CFLGraph.h"
     40 #include "StratifiedSets.h"
     41 #include "llvm/ADT/DenseMap.h"
     42 #include "llvm/ADT/Optional.h"
     43 #include "llvm/ADT/SmallVector.h"
     44 #include "llvm/Analysis/TargetLibraryInfo.h"
     45 #include "llvm/IR/Constants.h"
     46 #include "llvm/IR/Function.h"
     47 #include "llvm/IR/Type.h"
     48 #include "llvm/IR/Value.h"
     49 #include "llvm/InitializePasses.h"
     50 #include "llvm/Pass.h"
     51 #include "llvm/Support/Debug.h"
     52 #include "llvm/Support/raw_ostream.h"
     53 #include <algorithm>
     54 #include <cassert>
     55 #include <limits>
     56 #include <memory>
     57 #include <utility>
     58 
     59 using namespace llvm;
     60 using namespace llvm::cflaa;
     61 
     62 #define DEBUG_TYPE "cfl-steens-aa"
     63 
     64 CFLSteensAAResult::CFLSteensAAResult(
     65     std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
     66     : AAResultBase(), GetTLI(std::move(GetTLI)) {}
     67 CFLSteensAAResult::CFLSteensAAResult(CFLSteensAAResult &&Arg)
     68     : AAResultBase(std::move(Arg)), GetTLI(std::move(Arg.GetTLI)) {}
     69 CFLSteensAAResult::~CFLSteensAAResult() = default;
     70 
     71 /// Information we have about a function and would like to keep around.
     72 class CFLSteensAAResult::FunctionInfo {
     73   StratifiedSets<InstantiatedValue> Sets;
     74   AliasSummary Summary;
     75 
     76 public:
     77   FunctionInfo(Function &Fn, const SmallVectorImpl<Value *> &RetVals,
     78                StratifiedSets<InstantiatedValue> S);
     79 
     80   const StratifiedSets<InstantiatedValue> &getStratifiedSets() const {
     81     return Sets;
     82   }
     83 
     84   const AliasSummary &getAliasSummary() const { return Summary; }
     85 };
     86 
     87 const StratifiedIndex StratifiedLink::SetSentinel =
     88     std::numeric_limits<StratifiedIndex>::max();
     89 
     90 //===----------------------------------------------------------------------===//
     91 // Function declarations that require types defined in the namespace above
     92 //===----------------------------------------------------------------------===//
     93 
     94 /// Determines whether it would be pointless to add the given Value to our sets.
     95 static bool canSkipAddingToSets(Value *Val) {
     96   // Constants can share instances, which may falsely unify multiple
     97   // sets, e.g. in
     98   // store i32* null, i32** %ptr1
     99   // store i32* null, i32** %ptr2
    100   // clearly ptr1 and ptr2 should not be unified into the same set, so
    101   // we should filter out the (potentially shared) instance to
    102   // i32* null.
    103   if (isa<Constant>(Val)) {
    104     // TODO: Because all of these things are constant, we can determine whether
    105     // the data is *actually* mutable at graph building time. This will probably
    106     // come for free/cheap with offset awareness.
    107     bool CanStoreMutableData = isa<GlobalValue>(Val) ||
    108                                isa<ConstantExpr>(Val) ||
    109                                isa<ConstantAggregate>(Val);
    110     return !CanStoreMutableData;
    111   }
    112 
    113   return false;
    114 }
    115 
    116 CFLSteensAAResult::FunctionInfo::FunctionInfo(
    117     Function &Fn, const SmallVectorImpl<Value *> &RetVals,
    118     StratifiedSets<InstantiatedValue> S)
    119     : Sets(std::move(S)) {
    120   // Historically, an arbitrary upper-bound of 50 args was selected. We may want
    121   // to remove this if it doesn't really matter in practice.
    122   if (Fn.arg_size() > MaxSupportedArgsInSummary)
    123     return;
    124 
    125   DenseMap<StratifiedIndex, InterfaceValue> InterfaceMap;
    126 
    127   // Our intention here is to record all InterfaceValues that share the same
    128   // StratifiedIndex in RetParamRelations. For each valid InterfaceValue, we
    129   // have its StratifiedIndex scanned here and check if the index is presented
    130   // in InterfaceMap: if it is not, we add the correspondence to the map;
    131   // otherwise, an aliasing relation is found and we add it to
    132   // RetParamRelations.
    133 
    134   auto AddToRetParamRelations = [&](unsigned InterfaceIndex,
    135                                     StratifiedIndex SetIndex) {
    136     unsigned Level = 0;
    137     while (true) {
    138       InterfaceValue CurrValue{InterfaceIndex, Level};
    139 
    140       auto Itr = InterfaceMap.find(SetIndex);
    141       if (Itr != InterfaceMap.end()) {
    142         if (CurrValue != Itr->second)
    143           Summary.RetParamRelations.push_back(
    144               ExternalRelation{CurrValue, Itr->second, UnknownOffset});
    145         break;
    146       }
    147 
    148       auto &Link = Sets.getLink(SetIndex);
    149       InterfaceMap.insert(std::make_pair(SetIndex, CurrValue));
    150       auto ExternalAttrs = getExternallyVisibleAttrs(Link.Attrs);
    151       if (ExternalAttrs.any())
    152         Summary.RetParamAttributes.push_back(
    153             ExternalAttribute{CurrValue, ExternalAttrs});
    154 
    155       if (!Link.hasBelow())
    156         break;
    157 
    158       ++Level;
    159       SetIndex = Link.Below;
    160     }
    161   };
    162 
    163   // Populate RetParamRelations for return values
    164   for (auto *RetVal : RetVals) {
    165     assert(RetVal != nullptr);
    166     assert(RetVal->getType()->isPointerTy());
    167     auto RetInfo = Sets.find(InstantiatedValue{RetVal, 0});
    168     if (RetInfo.hasValue())
    169       AddToRetParamRelations(0, RetInfo->Index);
    170   }
    171 
    172   // Populate RetParamRelations for parameters
    173   unsigned I = 0;
    174   for (auto &Param : Fn.args()) {
    175     if (Param.getType()->isPointerTy()) {
    176       auto ParamInfo = Sets.find(InstantiatedValue{&Param, 0});
    177       if (ParamInfo.hasValue())
    178         AddToRetParamRelations(I + 1, ParamInfo->Index);
    179     }
    180     ++I;
    181   }
    182 }
    183 
    184 // Builds the graph + StratifiedSets for a function.
    185 CFLSteensAAResult::FunctionInfo CFLSteensAAResult::buildSetsFrom(Function *Fn) {
    186   CFLGraphBuilder<CFLSteensAAResult> GraphBuilder(*this, GetTLI(*Fn), *Fn);
    187   StratifiedSetsBuilder<InstantiatedValue> SetBuilder;
    188 
    189   // Add all CFLGraph nodes and all Dereference edges to StratifiedSets
    190   auto &Graph = GraphBuilder.getCFLGraph();
    191   for (const auto &Mapping : Graph.value_mappings()) {
    192     auto Val = Mapping.first;
    193     if (canSkipAddingToSets(Val))
    194       continue;
    195     auto &ValueInfo = Mapping.second;
    196 
    197     assert(ValueInfo.getNumLevels() > 0);
    198     SetBuilder.add(InstantiatedValue{Val, 0});
    199     SetBuilder.noteAttributes(InstantiatedValue{Val, 0},
    200                               ValueInfo.getNodeInfoAtLevel(0).Attr);
    201     for (unsigned I = 0, E = ValueInfo.getNumLevels() - 1; I < E; ++I) {
    202       SetBuilder.add(InstantiatedValue{Val, I + 1});
    203       SetBuilder.noteAttributes(InstantiatedValue{Val, I + 1},
    204                                 ValueInfo.getNodeInfoAtLevel(I + 1).Attr);
    205       SetBuilder.addBelow(InstantiatedValue{Val, I},
    206                           InstantiatedValue{Val, I + 1});
    207     }
    208   }
    209 
    210   // Add all assign edges to StratifiedSets
    211   for (const auto &Mapping : Graph.value_mappings()) {
    212     auto Val = Mapping.first;
    213     if (canSkipAddingToSets(Val))
    214       continue;
    215     auto &ValueInfo = Mapping.second;
    216 
    217     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
    218       auto Src = InstantiatedValue{Val, I};
    219       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges)
    220         SetBuilder.addWith(Src, Edge.Other);
    221     }
    222   }
    223 
    224   return FunctionInfo(*Fn, GraphBuilder.getReturnValues(), SetBuilder.build());
    225 }
    226 
    227 void CFLSteensAAResult::scan(Function *Fn) {
    228   auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));
    229   (void)InsertPair;
    230   assert(InsertPair.second &&
    231          "Trying to scan a function that has already been cached");
    232 
    233   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
    234   // may get evaluated after operator[], potentially triggering a DenseMap
    235   // resize and invalidating the reference returned by operator[]
    236   auto FunInfo = buildSetsFrom(Fn);
    237   Cache[Fn] = std::move(FunInfo);
    238 
    239   Handles.emplace_front(Fn, this);
    240 }
    241 
    242 void CFLSteensAAResult::evict(Function *Fn) { Cache.erase(Fn); }
    243 
    244 /// Ensures that the given function is available in the cache, and returns the
    245 /// entry.
    246 const Optional<CFLSteensAAResult::FunctionInfo> &
    247 CFLSteensAAResult::ensureCached(Function *Fn) {
    248   auto Iter = Cache.find(Fn);
    249   if (Iter == Cache.end()) {
    250     scan(Fn);
    251     Iter = Cache.find(Fn);
    252     assert(Iter != Cache.end());
    253     assert(Iter->second.hasValue());
    254   }
    255   return Iter->second;
    256 }
    257 
    258 const AliasSummary *CFLSteensAAResult::getAliasSummary(Function &Fn) {
    259   auto &FunInfo = ensureCached(&Fn);
    260   if (FunInfo.hasValue())
    261     return &FunInfo->getAliasSummary();
    262   else
    263     return nullptr;
    264 }
    265 
    266 AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
    267                                      const MemoryLocation &LocB) {
    268   auto *ValA = const_cast<Value *>(LocA.Ptr);
    269   auto *ValB = const_cast<Value *>(LocB.Ptr);
    270 
    271   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
    272     return AliasResult::NoAlias;
    273 
    274   Function *Fn = nullptr;
    275   Function *MaybeFnA = const_cast<Function *>(parentFunctionOfValue(ValA));
    276   Function *MaybeFnB = const_cast<Function *>(parentFunctionOfValue(ValB));
    277   if (!MaybeFnA && !MaybeFnB) {
    278     // The only times this is known to happen are when globals + InlineAsm are
    279     // involved
    280     LLVM_DEBUG(
    281         dbgs()
    282         << "CFLSteensAA: could not extract parent function information.\n");
    283     return AliasResult::MayAlias;
    284   }
    285 
    286   if (MaybeFnA) {
    287     Fn = MaybeFnA;
    288     assert((!MaybeFnB || MaybeFnB == MaybeFnA) &&
    289            "Interprocedural queries not supported");
    290   } else {
    291     Fn = MaybeFnB;
    292   }
    293 
    294   assert(Fn != nullptr);
    295   auto &MaybeInfo = ensureCached(Fn);
    296   assert(MaybeInfo.hasValue());
    297 
    298   auto &Sets = MaybeInfo->getStratifiedSets();
    299   auto MaybeA = Sets.find(InstantiatedValue{ValA, 0});
    300   if (!MaybeA.hasValue())
    301     return AliasResult::MayAlias;
    302 
    303   auto MaybeB = Sets.find(InstantiatedValue{ValB, 0});
    304   if (!MaybeB.hasValue())
    305     return AliasResult::MayAlias;
    306 
    307   auto SetA = *MaybeA;
    308   auto SetB = *MaybeB;
    309   auto AttrsA = Sets.getLink(SetA.Index).Attrs;
    310   auto AttrsB = Sets.getLink(SetB.Index).Attrs;
    311 
    312   // If both values are local (meaning the corresponding set has attribute
    313   // AttrNone or AttrEscaped), then we know that CFLSteensAA fully models them:
    314   // they may-alias each other if and only if they are in the same set.
    315   // If at least one value is non-local (meaning it either is global/argument or
    316   // it comes from unknown sources like integer cast), the situation becomes a
    317   // bit more interesting. We follow three general rules described below:
    318   // - Non-local values may alias each other
    319   // - AttrNone values do not alias any non-local values
    320   // - AttrEscaped do not alias globals/arguments, but they may alias
    321   // AttrUnknown values
    322   if (SetA.Index == SetB.Index)
    323     return AliasResult::MayAlias;
    324   if (AttrsA.none() || AttrsB.none())
    325     return AliasResult::NoAlias;
    326   if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
    327     return AliasResult::MayAlias;
    328   if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
    329     return AliasResult::MayAlias;
    330   return AliasResult::NoAlias;
    331 }
    332 
    333 AnalysisKey CFLSteensAA::Key;
    334 
    335 CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
    336   auto GetTLI = [&AM](Function &F) -> const TargetLibraryInfo & {
    337     return AM.getResult<TargetLibraryAnalysis>(F);
    338   };
    339   return CFLSteensAAResult(GetTLI);
    340 }
    341 
    342 char CFLSteensAAWrapperPass::ID = 0;
    343 INITIALIZE_PASS(CFLSteensAAWrapperPass, "cfl-steens-aa",
    344                 "Unification-Based CFL Alias Analysis", false, true)
    345 
    346 ImmutablePass *llvm::createCFLSteensAAWrapperPass() {
    347   return new CFLSteensAAWrapperPass();
    348 }
    349 
    350 CFLSteensAAWrapperPass::CFLSteensAAWrapperPass() : ImmutablePass(ID) {
    351   initializeCFLSteensAAWrapperPassPass(*PassRegistry::getPassRegistry());
    352 }
    353 
    354 void CFLSteensAAWrapperPass::initializePass() {
    355   auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
    356     return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
    357   };
    358   Result.reset(new CFLSteensAAResult(GetTLI));
    359 }
    360 
    361 void CFLSteensAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
    362   AU.setPreservesAll();
    363   AU.addRequired<TargetLibraryInfoWrapperPass>();
    364 }
    365