Home | History | Annotate | Line # | Download | only in Analysis
      1 //===- CFLAndersAliasAnalysis.cpp - Unification-based Alias Analysis ------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements a CFL-based, summary-based alias analysis algorithm. It
     10 // differs from CFLSteensAliasAnalysis in its inclusion-based nature while
     11 // CFLSteensAliasAnalysis is unification-based. This pass has worse performance
     12 // than CFLSteensAliasAnalysis (the worst case complexity of
     13 // CFLAndersAliasAnalysis is cubic, while the worst case complexity of
     14 // CFLSteensAliasAnalysis is almost linear), but it is able to yield more
     15 // precise analysis result. The precision of this analysis is roughly the same
     16 // as that of an one level context-sensitive Andersen's algorithm.
     17 //
     18 // The algorithm used here is based on recursive state machine matching scheme
     19 // proposed in "Demand-driven alias analysis for C" by Xin Zheng and Radu
     20 // Rugina. The general idea is to extend the traditional transitive closure
     21 // algorithm to perform CFL matching along the way: instead of recording
     22 // "whether X is reachable from Y", we keep track of "whether X is reachable
     23 // from Y at state Z", where the "state" field indicates where we are in the CFL
     24 // matching process. To understand the matching better, it is advisable to have
     25 // the state machine shown in Figure 3 of the paper available when reading the
     26 // codes: all we do here is to selectively expand the transitive closure by
     27 // discarding edges that are not recognized by the state machine.
     28 //
     29 // There are two differences between our current implementation and the one
     30 // described in the paper:
     31 // - Our algorithm eagerly computes all alias pairs after the CFLGraph is built,
     32 // while in the paper the authors did the computation in a demand-driven
     33 // fashion. We did not implement the demand-driven algorithm due to the
     34 // additional coding complexity and higher memory profile, but if we found it
     35 // necessary we may switch to it eventually.
     36 // - In the paper the authors use a state machine that does not distinguish
     37 // value reads from value writes. For example, if Y is reachable from X at state
     38 // S3, it may be the case that X is written into Y, or it may be the case that
     39 // there's a third value Z that writes into both X and Y. To make that
     40 // distinction (which is crucial in building function summary as well as
     41 // retrieving mod-ref info), we choose to duplicate some of the states in the
     42 // paper's proposed state machine. The duplication does not change the set the
     43 // machine accepts. Given a pair of reachable values, it only provides more
     44 // detailed information on which value is being written into and which is being
     45 // read from.
     46 //
     47 //===----------------------------------------------------------------------===//
     48 
     49 // N.B. AliasAnalysis as a whole is phrased as a FunctionPass at the moment, and
     50 // CFLAndersAA is interprocedural. This is *technically* A Bad Thing, because
     51 // FunctionPasses are only allowed to inspect the Function that they're being
     52 // run on. Realistically, this likely isn't a problem until we allow
     53 // FunctionPasses to run concurrently.
     54 
     55 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
     56 #include "AliasAnalysisSummary.h"
     57 #include "CFLGraph.h"
     58 #include "llvm/ADT/DenseMap.h"
     59 #include "llvm/ADT/DenseMapInfo.h"
     60 #include "llvm/ADT/DenseSet.h"
     61 #include "llvm/ADT/None.h"
     62 #include "llvm/ADT/Optional.h"
     63 #include "llvm/ADT/STLExtras.h"
     64 #include "llvm/ADT/SmallVector.h"
     65 #include "llvm/ADT/iterator_range.h"
     66 #include "llvm/Analysis/AliasAnalysis.h"
     67 #include "llvm/Analysis/MemoryLocation.h"
     68 #include "llvm/IR/Argument.h"
     69 #include "llvm/IR/Function.h"
     70 #include "llvm/IR/PassManager.h"
     71 #include "llvm/IR/Type.h"
     72 #include "llvm/InitializePasses.h"
     73 #include "llvm/Pass.h"
     74 #include "llvm/Support/Casting.h"
     75 #include "llvm/Support/Compiler.h"
     76 #include "llvm/Support/Debug.h"
     77 #include "llvm/Support/raw_ostream.h"
     78 #include <algorithm>
     79 #include <bitset>
     80 #include <cassert>
     81 #include <cstddef>
     82 #include <cstdint>
     83 #include <functional>
     84 #include <utility>
     85 #include <vector>
     86 
     87 using namespace llvm;
     88 using namespace llvm::cflaa;
     89 
     90 #define DEBUG_TYPE "cfl-anders-aa"
     91 
     92 CFLAndersAAResult::CFLAndersAAResult(
     93     std::function<const TargetLibraryInfo &(Function &F)> GetTLI)
     94     : GetTLI(std::move(GetTLI)) {}
     95 CFLAndersAAResult::CFLAndersAAResult(CFLAndersAAResult &&RHS)
     96     : AAResultBase(std::move(RHS)), GetTLI(std::move(RHS.GetTLI)) {}
     97 CFLAndersAAResult::~CFLAndersAAResult() = default;
     98 
     99 namespace {
    100 
    101 enum class MatchState : uint8_t {
    102   // The following state represents S1 in the paper.
    103   FlowFromReadOnly = 0,
    104   // The following two states together represent S2 in the paper.
    105   // The 'NoReadWrite' suffix indicates that there exists an alias path that
    106   // does not contain assignment and reverse assignment edges.
    107   // The 'ReadOnly' suffix indicates that there exists an alias path that
    108   // contains reverse assignment edges only.
    109   FlowFromMemAliasNoReadWrite,
    110   FlowFromMemAliasReadOnly,
    111   // The following two states together represent S3 in the paper.
    112   // The 'WriteOnly' suffix indicates that there exists an alias path that
    113   // contains assignment edges only.
    114   // The 'ReadWrite' suffix indicates that there exists an alias path that
    115   // contains both assignment and reverse assignment edges. Note that if X and Y
    116   // are reachable at 'ReadWrite' state, it does NOT mean X is both read from
    117   // and written to Y. Instead, it means that a third value Z is written to both
    118   // X and Y.
    119   FlowToWriteOnly,
    120   FlowToReadWrite,
    121   // The following two states together represent S4 in the paper.
    122   FlowToMemAliasWriteOnly,
    123   FlowToMemAliasReadWrite,
    124 };
    125 
    126 using StateSet = std::bitset<7>;
    127 
    128 const unsigned ReadOnlyStateMask =
    129     (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) |
    130     (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly));
    131 const unsigned WriteOnlyStateMask =
    132     (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) |
    133     (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly));
    134 
    135 // A pair that consists of a value and an offset
    136 struct OffsetValue {
    137   const Value *Val;
    138   int64_t Offset;
    139 };
    140 
    141 bool operator==(OffsetValue LHS, OffsetValue RHS) {
    142   return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset;
    143 }
    144 bool operator<(OffsetValue LHS, OffsetValue RHS) {
    145   return std::less<const Value *>()(LHS.Val, RHS.Val) ||
    146          (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset);
    147 }
    148 
    149 // A pair that consists of an InstantiatedValue and an offset
    150 struct OffsetInstantiatedValue {
    151   InstantiatedValue IVal;
    152   int64_t Offset;
    153 };
    154 
    155 bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) {
    156   return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset;
    157 }
    158 
    159 // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in
    160 // the paper) during the analysis.
    161 class ReachabilitySet {
    162   using ValueStateMap = DenseMap<InstantiatedValue, StateSet>;
    163   using ValueReachMap = DenseMap<InstantiatedValue, ValueStateMap>;
    164 
    165   ValueReachMap ReachMap;
    166 
    167 public:
    168   using const_valuestate_iterator = ValueStateMap::const_iterator;
    169   using const_value_iterator = ValueReachMap::const_iterator;
    170 
    171   // Insert edge 'From->To' at state 'State'
    172   bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) {
    173     assert(From != To);
    174     auto &States = ReachMap[To][From];
    175     auto Idx = static_cast<size_t>(State);
    176     if (!States.test(Idx)) {
    177       States.set(Idx);
    178       return true;
    179     }
    180     return false;
    181   }
    182 
    183   // Return the set of all ('From', 'State') pair for a given node 'To'
    184   iterator_range<const_valuestate_iterator>
    185   reachableValueAliases(InstantiatedValue V) const {
    186     auto Itr = ReachMap.find(V);
    187     if (Itr == ReachMap.end())
    188       return make_range<const_valuestate_iterator>(const_valuestate_iterator(),
    189                                                    const_valuestate_iterator());
    190     return make_range<const_valuestate_iterator>(Itr->second.begin(),
    191                                                  Itr->second.end());
    192   }
    193 
    194   iterator_range<const_value_iterator> value_mappings() const {
    195     return make_range<const_value_iterator>(ReachMap.begin(), ReachMap.end());
    196   }
    197 };
    198 
    199 // We use AliasMemSet to keep track of all memory aliases (the nonterminal "M"
    200 // in the paper) during the analysis.
    201 class AliasMemSet {
    202   using MemSet = DenseSet<InstantiatedValue>;
    203   using MemMapType = DenseMap<InstantiatedValue, MemSet>;
    204 
    205   MemMapType MemMap;
    206 
    207 public:
    208   using const_mem_iterator = MemSet::const_iterator;
    209 
    210   bool insert(InstantiatedValue LHS, InstantiatedValue RHS) {
    211     // Top-level values can never be memory aliases because one cannot take the
    212     // addresses of them
    213     assert(LHS.DerefLevel > 0 && RHS.DerefLevel > 0);
    214     return MemMap[LHS].insert(RHS).second;
    215   }
    216 
    217   const MemSet *getMemoryAliases(InstantiatedValue V) const {
    218     auto Itr = MemMap.find(V);
    219     if (Itr == MemMap.end())
    220       return nullptr;
    221     return &Itr->second;
    222   }
    223 };
    224 
    225 // We use AliasAttrMap to keep track of the AliasAttr of each node.
    226 class AliasAttrMap {
    227   using MapType = DenseMap<InstantiatedValue, AliasAttrs>;
    228 
    229   MapType AttrMap;
    230 
    231 public:
    232   using const_iterator = MapType::const_iterator;
    233 
    234   bool add(InstantiatedValue V, AliasAttrs Attr) {
    235     auto &OldAttr = AttrMap[V];
    236     auto NewAttr = OldAttr | Attr;
    237     if (OldAttr == NewAttr)
    238       return false;
    239     OldAttr = NewAttr;
    240     return true;
    241   }
    242 
    243   AliasAttrs getAttrs(InstantiatedValue V) const {
    244     AliasAttrs Attr;
    245     auto Itr = AttrMap.find(V);
    246     if (Itr != AttrMap.end())
    247       Attr = Itr->second;
    248     return Attr;
    249   }
    250 
    251   iterator_range<const_iterator> mappings() const {
    252     return make_range<const_iterator>(AttrMap.begin(), AttrMap.end());
    253   }
    254 };
    255 
    256 struct WorkListItem {
    257   InstantiatedValue From;
    258   InstantiatedValue To;
    259   MatchState State;
    260 };
    261 
    262 struct ValueSummary {
    263   struct Record {
    264     InterfaceValue IValue;
    265     unsigned DerefLevel;
    266   };
    267   SmallVector<Record, 4> FromRecords, ToRecords;
    268 };
    269 
    270 } // end anonymous namespace
    271 
    272 namespace llvm {
    273 
    274 // Specialize DenseMapInfo for OffsetValue.
    275 template <> struct DenseMapInfo<OffsetValue> {
    276   static OffsetValue getEmptyKey() {
    277     return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(),
    278                        DenseMapInfo<int64_t>::getEmptyKey()};
    279   }
    280 
    281   static OffsetValue getTombstoneKey() {
    282     return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(),
    283                        DenseMapInfo<int64_t>::getEmptyKey()};
    284   }
    285 
    286   static unsigned getHashValue(const OffsetValue &OVal) {
    287     return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue(
    288         std::make_pair(OVal.Val, OVal.Offset));
    289   }
    290 
    291   static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) {
    292     return LHS == RHS;
    293   }
    294 };
    295 
    296 // Specialize DenseMapInfo for OffsetInstantiatedValue.
    297 template <> struct DenseMapInfo<OffsetInstantiatedValue> {
    298   static OffsetInstantiatedValue getEmptyKey() {
    299     return OffsetInstantiatedValue{
    300         DenseMapInfo<InstantiatedValue>::getEmptyKey(),
    301         DenseMapInfo<int64_t>::getEmptyKey()};
    302   }
    303 
    304   static OffsetInstantiatedValue getTombstoneKey() {
    305     return OffsetInstantiatedValue{
    306         DenseMapInfo<InstantiatedValue>::getTombstoneKey(),
    307         DenseMapInfo<int64_t>::getEmptyKey()};
    308   }
    309 
    310   static unsigned getHashValue(const OffsetInstantiatedValue &OVal) {
    311     return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue(
    312         std::make_pair(OVal.IVal, OVal.Offset));
    313   }
    314 
    315   static bool isEqual(const OffsetInstantiatedValue &LHS,
    316                       const OffsetInstantiatedValue &RHS) {
    317     return LHS == RHS;
    318   }
    319 };
    320 
    321 } // end namespace llvm
    322 
    323 class CFLAndersAAResult::FunctionInfo {
    324   /// Map a value to other values that may alias it
    325   /// Since the alias relation is symmetric, to save some space we assume values
    326   /// are properly ordered: if a and b alias each other, and a < b, then b is in
    327   /// AliasMap[a] but not vice versa.
    328   DenseMap<const Value *, std::vector<OffsetValue>> AliasMap;
    329 
    330   /// Map a value to its corresponding AliasAttrs
    331   DenseMap<const Value *, AliasAttrs> AttrMap;
    332 
    333   /// Summary of externally visible effects.
    334   AliasSummary Summary;
    335 
    336   Optional<AliasAttrs> getAttrs(const Value *) const;
    337 
    338 public:
    339   FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
    340                const ReachabilitySet &, const AliasAttrMap &);
    341 
    342   bool mayAlias(const Value *, LocationSize, const Value *, LocationSize) const;
    343   const AliasSummary &getAliasSummary() const { return Summary; }
    344 };
    345 
    346 static bool hasReadOnlyState(StateSet Set) {
    347   return (Set & StateSet(ReadOnlyStateMask)).any();
    348 }
    349 
    350 static bool hasWriteOnlyState(StateSet Set) {
    351   return (Set & StateSet(WriteOnlyStateMask)).any();
    352 }
    353 
    354 static Optional<InterfaceValue>
    355 getInterfaceValue(InstantiatedValue IValue,
    356                   const SmallVectorImpl<Value *> &RetVals) {
    357   auto Val = IValue.Val;
    358 
    359   Optional<unsigned> Index;
    360   if (auto Arg = dyn_cast<Argument>(Val))
    361     Index = Arg->getArgNo() + 1;
    362   else if (is_contained(RetVals, Val))
    363     Index = 0;
    364 
    365   if (Index)
    366     return InterfaceValue{*Index, IValue.DerefLevel};
    367   return None;
    368 }
    369 
    370 static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap,
    371                             const AliasAttrMap &AMap) {
    372   for (const auto &Mapping : AMap.mappings()) {
    373     auto IVal = Mapping.first;
    374 
    375     // Insert IVal into the map
    376     auto &Attr = AttrMap[IVal.Val];
    377     // AttrMap only cares about top-level values
    378     if (IVal.DerefLevel == 0)
    379       Attr |= Mapping.second;
    380   }
    381 }
    382 
    383 static void
    384 populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
    385                  const ReachabilitySet &ReachSet) {
    386   for (const auto &OuterMapping : ReachSet.value_mappings()) {
    387     // AliasMap only cares about top-level values
    388     if (OuterMapping.first.DerefLevel > 0)
    389       continue;
    390 
    391     auto Val = OuterMapping.first.Val;
    392     auto &AliasList = AliasMap[Val];
    393     for (const auto &InnerMapping : OuterMapping.second) {
    394       // Again, AliasMap only cares about top-level values
    395       if (InnerMapping.first.DerefLevel == 0)
    396         AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset});
    397     }
    398 
    399     // Sort AliasList for faster lookup
    400     llvm::sort(AliasList);
    401   }
    402 }
    403 
    404 static void populateExternalRelations(
    405     SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn,
    406     const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) {
    407   // If a function only returns one of its argument X, then X will be both an
    408   // argument and a return value at the same time. This is an edge case that
    409   // needs special handling here.
    410   for (const auto &Arg : Fn.args()) {
    411     if (is_contained(RetVals, &Arg)) {
    412       auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0};
    413       auto RetVal = InterfaceValue{0, 0};
    414       ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0});
    415     }
    416   }
    417 
    418   // Below is the core summary construction logic.
    419   // A naive solution of adding only the value aliases that are parameters or
    420   // return values in ReachSet to the summary won't work: It is possible that a
    421   // parameter P is written into an intermediate value I, and the function
    422   // subsequently returns *I. In that case, *I is does not value alias anything
    423   // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to
    424   // (I, 1).
    425   // To account for the aforementioned case, we need to check each non-parameter
    426   // and non-return value for the possibility of acting as an intermediate.
    427   // 'ValueMap' here records, for each value, which InterfaceValues read from or
    428   // write into it. If both the read list and the write list of a given value
    429   // are non-empty, we know that a particular value is an intermidate and we
    430   // need to add summary edges from the writes to the reads.
    431   DenseMap<Value *, ValueSummary> ValueMap;
    432   for (const auto &OuterMapping : ReachSet.value_mappings()) {
    433     if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) {
    434       for (const auto &InnerMapping : OuterMapping.second) {
    435         // If Src is a param/return value, we get a same-level assignment.
    436         if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) {
    437           // This may happen if both Dst and Src are return values
    438           if (*Dst == *Src)
    439             continue;
    440 
    441           if (hasReadOnlyState(InnerMapping.second))
    442             ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset});
    443           // No need to check for WriteOnly state, since ReachSet is symmetric
    444         } else {
    445           // If Src is not a param/return, add it to ValueMap
    446           auto SrcIVal = InnerMapping.first;
    447           if (hasReadOnlyState(InnerMapping.second))
    448             ValueMap[SrcIVal.Val].FromRecords.push_back(
    449                 ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
    450           if (hasWriteOnlyState(InnerMapping.second))
    451             ValueMap[SrcIVal.Val].ToRecords.push_back(
    452                 ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
    453         }
    454       }
    455     }
    456   }
    457 
    458   for (const auto &Mapping : ValueMap) {
    459     for (const auto &FromRecord : Mapping.second.FromRecords) {
    460       for (const auto &ToRecord : Mapping.second.ToRecords) {
    461         auto ToLevel = ToRecord.DerefLevel;
    462         auto FromLevel = FromRecord.DerefLevel;
    463         // Same-level assignments should have already been processed by now
    464         if (ToLevel == FromLevel)
    465           continue;
    466 
    467         auto SrcIndex = FromRecord.IValue.Index;
    468         auto SrcLevel = FromRecord.IValue.DerefLevel;
    469         auto DstIndex = ToRecord.IValue.Index;
    470         auto DstLevel = ToRecord.IValue.DerefLevel;
    471         if (ToLevel > FromLevel)
    472           SrcLevel += ToLevel - FromLevel;
    473         else
    474           DstLevel += FromLevel - ToLevel;
    475 
    476         ExtRelations.push_back(ExternalRelation{
    477             InterfaceValue{SrcIndex, SrcLevel},
    478             InterfaceValue{DstIndex, DstLevel}, UnknownOffset});
    479       }
    480     }
    481   }
    482 
    483   // Remove duplicates in ExtRelations
    484   llvm::sort(ExtRelations);
    485   ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
    486                      ExtRelations.end());
    487 }
    488 
    489 static void populateExternalAttributes(
    490     SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn,
    491     const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) {
    492   for (const auto &Mapping : AMap.mappings()) {
    493     if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) {
    494       auto Attr = getExternallyVisibleAttrs(Mapping.second);
    495       if (Attr.any())
    496         ExtAttributes.push_back(ExternalAttribute{*IVal, Attr});
    497     }
    498   }
    499 }
    500 
    501 CFLAndersAAResult::FunctionInfo::FunctionInfo(
    502     const Function &Fn, const SmallVectorImpl<Value *> &RetVals,
    503     const ReachabilitySet &ReachSet, const AliasAttrMap &AMap) {
    504   populateAttrMap(AttrMap, AMap);
    505   populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap);
    506   populateAliasMap(AliasMap, ReachSet);
    507   populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet);
    508 }
    509 
    510 Optional<AliasAttrs>
    511 CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
    512   assert(V != nullptr);
    513 
    514   auto Itr = AttrMap.find(V);
    515   if (Itr != AttrMap.end())
    516     return Itr->second;
    517   return None;
    518 }
    519 
    520 bool CFLAndersAAResult::FunctionInfo::mayAlias(
    521     const Value *LHS, LocationSize MaybeLHSSize, const Value *RHS,
    522     LocationSize MaybeRHSSize) const {
    523   assert(LHS && RHS);
    524 
    525   // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
    526   // after the analysis gets executed, and we want to be conservative in those
    527   // cases.
    528   auto MaybeAttrsA = getAttrs(LHS);
    529   auto MaybeAttrsB = getAttrs(RHS);
    530   if (!MaybeAttrsA || !MaybeAttrsB)
    531     return true;
    532 
    533   // Check AliasAttrs before AliasMap lookup since it's cheaper
    534   auto AttrsA = *MaybeAttrsA;
    535   auto AttrsB = *MaybeAttrsB;
    536   if (hasUnknownOrCallerAttr(AttrsA))
    537     return AttrsB.any();
    538   if (hasUnknownOrCallerAttr(AttrsB))
    539     return AttrsA.any();
    540   if (isGlobalOrArgAttr(AttrsA))
    541     return isGlobalOrArgAttr(AttrsB);
    542   if (isGlobalOrArgAttr(AttrsB))
    543     return isGlobalOrArgAttr(AttrsA);
    544 
    545   // At this point both LHS and RHS should point to locally allocated objects
    546 
    547   auto Itr = AliasMap.find(LHS);
    548   if (Itr != AliasMap.end()) {
    549 
    550     // Find out all (X, Offset) where X == RHS
    551     auto Comparator = [](OffsetValue LHS, OffsetValue RHS) {
    552       return std::less<const Value *>()(LHS.Val, RHS.Val);
    553     };
    554 #ifdef EXPENSIVE_CHECKS
    555     assert(llvm::is_sorted(Itr->second, Comparator));
    556 #endif
    557     auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(),
    558                                       OffsetValue{RHS, 0}, Comparator);
    559 
    560     if (RangePair.first != RangePair.second) {
    561       // Be conservative about unknown sizes
    562       if (!MaybeLHSSize.hasValue() || !MaybeRHSSize.hasValue())
    563         return true;
    564 
    565       const uint64_t LHSSize = MaybeLHSSize.getValue();
    566       const uint64_t RHSSize = MaybeRHSSize.getValue();
    567 
    568       for (const auto &OVal : make_range(RangePair)) {
    569         // Be conservative about UnknownOffset
    570         if (OVal.Offset == UnknownOffset)
    571           return true;
    572 
    573         // We know that LHS aliases (RHS + OVal.Offset) if the control flow
    574         // reaches here. The may-alias query essentially becomes integer
    575         // range-overlap queries over two ranges [OVal.Offset, OVal.Offset +
    576         // LHSSize) and [0, RHSSize).
    577 
    578         // Try to be conservative on super large offsets
    579         if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX))
    580           return true;
    581 
    582         auto LHSStart = OVal.Offset;
    583         // FIXME: Do we need to guard against integer overflow?
    584         auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize);
    585         auto RHSStart = 0;
    586         auto RHSEnd = static_cast<int64_t>(RHSSize);
    587         if (LHSEnd > RHSStart && LHSStart < RHSEnd)
    588           return true;
    589       }
    590     }
    591   }
    592 
    593   return false;
    594 }
    595 
    596 static void propagate(InstantiatedValue From, InstantiatedValue To,
    597                       MatchState State, ReachabilitySet &ReachSet,
    598                       std::vector<WorkListItem> &WorkList) {
    599   if (From == To)
    600     return;
    601   if (ReachSet.insert(From, To, State))
    602     WorkList.push_back(WorkListItem{From, To, State});
    603 }
    604 
    605 static void initializeWorkList(std::vector<WorkListItem> &WorkList,
    606                                ReachabilitySet &ReachSet,
    607                                const CFLGraph &Graph) {
    608   for (const auto &Mapping : Graph.value_mappings()) {
    609     auto Val = Mapping.first;
    610     auto &ValueInfo = Mapping.second;
    611     assert(ValueInfo.getNumLevels() > 0);
    612 
    613     // Insert all immediate assignment neighbors to the worklist
    614     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
    615       auto Src = InstantiatedValue{Val, I};
    616       // If there's an assignment edge from X to Y, it means Y is reachable from
    617       // X at S3 and X is reachable from Y at S1
    618       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
    619         propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
    620                   WorkList);
    621         propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet,
    622                   WorkList);
    623       }
    624     }
    625   }
    626 }
    627 
    628 static Optional<InstantiatedValue> getNodeBelow(const CFLGraph &Graph,
    629                                                 InstantiatedValue V) {
    630   auto NodeBelow = InstantiatedValue{V.Val, V.DerefLevel + 1};
    631   if (Graph.getNode(NodeBelow))
    632     return NodeBelow;
    633   return None;
    634 }
    635 
    636 static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
    637                                 ReachabilitySet &ReachSet, AliasMemSet &MemSet,
    638                                 std::vector<WorkListItem> &WorkList) {
    639   auto FromNode = Item.From;
    640   auto ToNode = Item.To;
    641 
    642   auto NodeInfo = Graph.getNode(ToNode);
    643   assert(NodeInfo != nullptr);
    644 
    645   // TODO: propagate field offsets
    646 
    647   // FIXME: Here is a neat trick we can do: since both ReachSet and MemSet holds
    648   // relations that are symmetric, we could actually cut the storage by half by
    649   // sorting FromNode and ToNode before insertion happens.
    650 
    651   // The newly added value alias pair may potentially generate more memory
    652   // alias pairs. Check for them here.
    653   auto FromNodeBelow = getNodeBelow(Graph, FromNode);
    654   auto ToNodeBelow = getNodeBelow(Graph, ToNode);
    655   if (FromNodeBelow && ToNodeBelow &&
    656       MemSet.insert(*FromNodeBelow, *ToNodeBelow)) {
    657     propagate(*FromNodeBelow, *ToNodeBelow,
    658               MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList);
    659     for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) {
    660       auto Src = Mapping.first;
    661       auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) {
    662         if (Mapping.second.test(static_cast<size_t>(FromState)))
    663           propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList);
    664       };
    665 
    666       MemAliasPropagate(MatchState::FlowFromReadOnly,
    667                         MatchState::FlowFromMemAliasReadOnly);
    668       MemAliasPropagate(MatchState::FlowToWriteOnly,
    669                         MatchState::FlowToMemAliasWriteOnly);
    670       MemAliasPropagate(MatchState::FlowToReadWrite,
    671                         MatchState::FlowToMemAliasReadWrite);
    672     }
    673   }
    674 
    675   // This is the core of the state machine walking algorithm. We expand ReachSet
    676   // based on which state we are at (which in turn dictates what edges we
    677   // should examine)
    678   // From a high-level point of view, the state machine here guarantees two
    679   // properties:
    680   // - If *X and *Y are memory aliases, then X and Y are value aliases
    681   // - If Y is an alias of X, then reverse assignment edges (if there is any)
    682   // should precede any assignment edges on the path from X to Y.
    683   auto NextAssignState = [&](MatchState State) {
    684     for (const auto &AssignEdge : NodeInfo->Edges)
    685       propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList);
    686   };
    687   auto NextRevAssignState = [&](MatchState State) {
    688     for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
    689       propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList);
    690   };
    691   auto NextMemState = [&](MatchState State) {
    692     if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
    693       for (const auto &MemAlias : *AliasSet)
    694         propagate(FromNode, MemAlias, State, ReachSet, WorkList);
    695     }
    696   };
    697 
    698   switch (Item.State) {
    699   case MatchState::FlowFromReadOnly:
    700     NextRevAssignState(MatchState::FlowFromReadOnly);
    701     NextAssignState(MatchState::FlowToReadWrite);
    702     NextMemState(MatchState::FlowFromMemAliasReadOnly);
    703     break;
    704 
    705   case MatchState::FlowFromMemAliasNoReadWrite:
    706     NextRevAssignState(MatchState::FlowFromReadOnly);
    707     NextAssignState(MatchState::FlowToWriteOnly);
    708     break;
    709 
    710   case MatchState::FlowFromMemAliasReadOnly:
    711     NextRevAssignState(MatchState::FlowFromReadOnly);
    712     NextAssignState(MatchState::FlowToReadWrite);
    713     break;
    714 
    715   case MatchState::FlowToWriteOnly:
    716     NextAssignState(MatchState::FlowToWriteOnly);
    717     NextMemState(MatchState::FlowToMemAliasWriteOnly);
    718     break;
    719 
    720   case MatchState::FlowToReadWrite:
    721     NextAssignState(MatchState::FlowToReadWrite);
    722     NextMemState(MatchState::FlowToMemAliasReadWrite);
    723     break;
    724 
    725   case MatchState::FlowToMemAliasWriteOnly:
    726     NextAssignState(MatchState::FlowToWriteOnly);
    727     break;
    728 
    729   case MatchState::FlowToMemAliasReadWrite:
    730     NextAssignState(MatchState::FlowToReadWrite);
    731     break;
    732   }
    733 }
    734 
    735 static AliasAttrMap buildAttrMap(const CFLGraph &Graph,
    736                                  const ReachabilitySet &ReachSet) {
    737   AliasAttrMap AttrMap;
    738   std::vector<InstantiatedValue> WorkList, NextList;
    739 
    740   // Initialize each node with its original AliasAttrs in CFLGraph
    741   for (const auto &Mapping : Graph.value_mappings()) {
    742     auto Val = Mapping.first;
    743     auto &ValueInfo = Mapping.second;
    744     for (unsigned I = 0, E = ValueInfo.getNumLevels(); I < E; ++I) {
    745       auto Node = InstantiatedValue{Val, I};
    746       AttrMap.add(Node, ValueInfo.getNodeInfoAtLevel(I).Attr);
    747       WorkList.push_back(Node);
    748     }
    749   }
    750 
    751   while (!WorkList.empty()) {
    752     for (const auto &Dst : WorkList) {
    753       auto DstAttr = AttrMap.getAttrs(Dst);
    754       if (DstAttr.none())
    755         continue;
    756 
    757       // Propagate attr on the same level
    758       for (const auto &Mapping : ReachSet.reachableValueAliases(Dst)) {
    759         auto Src = Mapping.first;
    760         if (AttrMap.add(Src, DstAttr))
    761           NextList.push_back(Src);
    762       }
    763 
    764       // Propagate attr to the levels below
    765       auto DstBelow = getNodeBelow(Graph, Dst);
    766       while (DstBelow) {
    767         if (AttrMap.add(*DstBelow, DstAttr)) {
    768           NextList.push_back(*DstBelow);
    769           break;
    770         }
    771         DstBelow = getNodeBelow(Graph, *DstBelow);
    772       }
    773     }
    774     WorkList.swap(NextList);
    775     NextList.clear();
    776   }
    777 
    778   return AttrMap;
    779 }
    780 
    781 CFLAndersAAResult::FunctionInfo
    782 CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
    783   CFLGraphBuilder<CFLAndersAAResult> GraphBuilder(
    784       *this, GetTLI(const_cast<Function &>(Fn)),
    785       // Cast away the constness here due to GraphBuilder's API requirement
    786       const_cast<Function &>(Fn));
    787   auto &Graph = GraphBuilder.getCFLGraph();
    788 
    789   ReachabilitySet ReachSet;
    790   AliasMemSet MemSet;
    791 
    792   std::vector<WorkListItem> WorkList, NextList;
    793   initializeWorkList(WorkList, ReachSet, Graph);
    794   // TODO: make sure we don't stop before the fix point is reached
    795   while (!WorkList.empty()) {
    796     for (const auto &Item : WorkList)
    797       processWorkListItem(Item, Graph, ReachSet, MemSet, NextList);
    798 
    799     NextList.swap(WorkList);
    800     NextList.clear();
    801   }
    802 
    803   // Now that we have all the reachability info, propagate AliasAttrs according
    804   // to it
    805   auto IValueAttrMap = buildAttrMap(Graph, ReachSet);
    806 
    807   return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet,
    808                       std::move(IValueAttrMap));
    809 }
    810 
    811 void CFLAndersAAResult::scan(const Function &Fn) {
    812   auto InsertPair = Cache.insert(std::make_pair(&Fn, Optional<FunctionInfo>()));
    813   (void)InsertPair;
    814   assert(InsertPair.second &&
    815          "Trying to scan a function that has already been cached");
    816 
    817   // Note that we can't do Cache[Fn] = buildSetsFrom(Fn) here: the function call
    818   // may get evaluated after operator[], potentially triggering a DenseMap
    819   // resize and invalidating the reference returned by operator[]
    820   auto FunInfo = buildInfoFrom(Fn);
    821   Cache[&Fn] = std::move(FunInfo);
    822   Handles.emplace_front(const_cast<Function *>(&Fn), this);
    823 }
    824 
    825 void CFLAndersAAResult::evict(const Function *Fn) { Cache.erase(Fn); }
    826 
    827 const Optional<CFLAndersAAResult::FunctionInfo> &
    828 CFLAndersAAResult::ensureCached(const Function &Fn) {
    829   auto Iter = Cache.find(&Fn);
    830   if (Iter == Cache.end()) {
    831     scan(Fn);
    832     Iter = Cache.find(&Fn);
    833     assert(Iter != Cache.end());
    834     assert(Iter->second.hasValue());
    835   }
    836   return Iter->second;
    837 }
    838 
    839 const AliasSummary *CFLAndersAAResult::getAliasSummary(const Function &Fn) {
    840   auto &FunInfo = ensureCached(Fn);
    841   if (FunInfo.hasValue())
    842     return &FunInfo->getAliasSummary();
    843   else
    844     return nullptr;
    845 }
    846 
    847 AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
    848                                      const MemoryLocation &LocB) {
    849   auto *ValA = LocA.Ptr;
    850   auto *ValB = LocB.Ptr;
    851 
    852   if (!ValA->getType()->isPointerTy() || !ValB->getType()->isPointerTy())
    853     return AliasResult::NoAlias;
    854 
    855   auto *Fn = parentFunctionOfValue(ValA);
    856   if (!Fn) {
    857     Fn = parentFunctionOfValue(ValB);
    858     if (!Fn) {
    859       // The only times this is known to happen are when globals + InlineAsm are
    860       // involved
    861       LLVM_DEBUG(
    862           dbgs()
    863           << "CFLAndersAA: could not extract parent function information.\n");
    864       return AliasResult::MayAlias;
    865     }
    866   } else {
    867     assert(!parentFunctionOfValue(ValB) || parentFunctionOfValue(ValB) == Fn);
    868   }
    869 
    870   assert(Fn != nullptr);
    871   auto &FunInfo = ensureCached(*Fn);
    872 
    873   // AliasMap lookup
    874   if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size))
    875     return AliasResult::MayAlias;
    876   return AliasResult::NoAlias;
    877 }
    878 
    879 AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
    880                                      const MemoryLocation &LocB,
    881                                      AAQueryInfo &AAQI) {
    882   if (LocA.Ptr == LocB.Ptr)
    883     return AliasResult::MustAlias;
    884 
    885   // Comparisons between global variables and other constants should be
    886   // handled by BasicAA.
    887   // CFLAndersAA may report NoAlias when comparing a GlobalValue and
    888   // ConstantExpr, but every query needs to have at least one Value tied to a
    889   // Function, and neither GlobalValues nor ConstantExprs are.
    890   if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
    891     return AAResultBase::alias(LocA, LocB, AAQI);
    892 
    893   AliasResult QueryResult = query(LocA, LocB);
    894   if (QueryResult == AliasResult::MayAlias)
    895     return AAResultBase::alias(LocA, LocB, AAQI);
    896 
    897   return QueryResult;
    898 }
    899 
    900 AnalysisKey CFLAndersAA::Key;
    901 
    902 CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
    903   auto GetTLI = [&AM](Function &F) -> TargetLibraryInfo & {
    904     return AM.getResult<TargetLibraryAnalysis>(F);
    905   };
    906   return CFLAndersAAResult(GetTLI);
    907 }
    908 
    909 char CFLAndersAAWrapperPass::ID = 0;
    910 INITIALIZE_PASS(CFLAndersAAWrapperPass, "cfl-anders-aa",
    911                 "Inclusion-Based CFL Alias Analysis", false, true)
    912 
    913 ImmutablePass *llvm::createCFLAndersAAWrapperPass() {
    914   return new CFLAndersAAWrapperPass();
    915 }
    916 
    917 CFLAndersAAWrapperPass::CFLAndersAAWrapperPass() : ImmutablePass(ID) {
    918   initializeCFLAndersAAWrapperPassPass(*PassRegistry::getPassRegistry());
    919 }
    920 
    921 void CFLAndersAAWrapperPass::initializePass() {
    922   auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
    923     return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
    924   };
    925   Result.reset(new CFLAndersAAResult(GetTLI));
    926 }
    927 
    928 void CFLAndersAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
    929   AU.setPreservesAll();
    930   AU.addRequired<TargetLibraryInfoWrapperPass>();
    931 }
    932