Home | History | Annotate | Line # | Download | only in OpenMP
      1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient
     10 // way to create LLVM instructions for OpenMP directives.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
     15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
     16 
     17 #include "llvm/Frontend/OpenMP/OMPConstants.h"
     18 #include "llvm/IR/DebugLoc.h"
     19 #include "llvm/IR/IRBuilder.h"
     20 #include "llvm/Support/Allocator.h"
     21 #include <forward_list>
     22 
     23 namespace llvm {
     24 class CanonicalLoopInfo;
     25 
     26 /// An interface to create LLVM-IR for OpenMP directives.
     27 ///
     28 /// Each OpenMP directive has a corresponding public generator method.
     29 class OpenMPIRBuilder {
     30 public:
     31   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
     32   /// not have an effect on \p M (see initialize).
     33   OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
     34   ~OpenMPIRBuilder();
     35 
     36   /// Initialize the internal state, this will put structures types and
     37   /// potentially other helpers into the underlying module. Must be called
     38   /// before any other method and only once!
     39   void initialize();
     40 
     41   /// Finalize the underlying module, e.g., by outlining regions.
     42   /// \param Fn                    The function to be finalized. If not used,
     43   ///                              all functions are finalized.
     44   /// \param AllowExtractorSinking Flag to include sinking instructions,
     45   ///                              emitted by CodeExtractor, in the
     46   ///                              outlined region. Default is false.
     47   void finalize(Function *Fn = nullptr, bool AllowExtractorSinking = false);
     48 
     49   /// Add attributes known for \p FnID to \p Fn.
     50   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
     51 
     52   /// Type used throughout for insertion points.
     53   using InsertPointTy = IRBuilder<>::InsertPoint;
     54 
     55   /// Callback type for variable finalization (think destructors).
     56   ///
     57   /// \param CodeGenIP is the insertion point at which the finalization code
     58   ///                  should be placed.
     59   ///
     60   /// A finalize callback knows about all objects that need finalization, e.g.
     61   /// destruction, when the scope of the currently generated construct is left
     62   /// at the time, and location, the callback is invoked.
     63   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
     64 
     65   struct FinalizationInfo {
     66     /// The finalization callback provided by the last in-flight invocation of
     67     /// createXXXX for the directive of kind DK.
     68     FinalizeCallbackTy FiniCB;
     69 
     70     /// The directive kind of the innermost directive that has an associated
     71     /// region which might require finalization when it is left.
     72     omp::Directive DK;
     73 
     74     /// Flag to indicate if the directive is cancellable.
     75     bool IsCancellable;
     76   };
     77 
     78   /// Push a finalization callback on the finalization stack.
     79   ///
     80   /// NOTE: Temporary solution until Clang CG is gone.
     81   void pushFinalizationCB(const FinalizationInfo &FI) {
     82     FinalizationStack.push_back(FI);
     83   }
     84 
     85   /// Pop the last finalization callback from the finalization stack.
     86   ///
     87   /// NOTE: Temporary solution until Clang CG is gone.
     88   void popFinalizationCB() { FinalizationStack.pop_back(); }
     89 
     90   /// Callback type for body (=inner region) code generation
     91   ///
     92   /// The callback takes code locations as arguments, each describing a
     93   /// location at which code might need to be generated or a location that is
     94   /// the target of control transfer.
     95   ///
     96   /// \param AllocaIP is the insertion point at which new alloca instructions
     97   ///                 should be placed.
     98   /// \param CodeGenIP is the insertion point at which the body code should be
     99   ///                  placed.
    100   /// \param ContinuationBB is the basic block target to leave the body.
    101   ///
    102   /// Note that all blocks pointed to by the arguments have terminators.
    103   using BodyGenCallbackTy =
    104       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
    105                         BasicBlock &ContinuationBB)>;
    106 
    107   // This is created primarily for sections construct as llvm::function_ref
    108   // (BodyGenCallbackTy) is not storable (as described in the comments of
    109   // function_ref class - function_ref contains non-ownable reference
    110   // to the callable.
    111   using StorableBodyGenCallbackTy =
    112       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
    113                          BasicBlock &ContinuationBB)>;
    114 
    115   /// Callback type for loop body code generation.
    116   ///
    117   /// \param CodeGenIP is the insertion point where the loop's body code must be
    118   ///                  placed. This will be a dedicated BasicBlock with a
    119   ///                  conditional branch from the loop condition check and
    120   ///                  terminated with an unconditional branch to the loop
    121   ///                  latch.
    122   /// \param IndVar    is the induction variable usable at the insertion point.
    123   using LoopBodyGenCallbackTy =
    124       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
    125 
    126   /// Callback type for variable privatization (think copy & default
    127   /// constructor).
    128   ///
    129   /// \param AllocaIP is the insertion point at which new alloca instructions
    130   ///                 should be placed.
    131   /// \param CodeGenIP is the insertion point at which the privatization code
    132   ///                  should be placed.
    133   /// \param Original The value being copied/created, should not be used in the
    134   ///                 generated IR.
    135   /// \param Inner The equivalent of \p Original that should be used in the
    136   ///              generated IR; this is equal to \p Original if the value is
    137   ///              a pointer and can thus be passed directly, otherwise it is
    138   ///              an equivalent but different value.
    139   /// \param ReplVal The replacement value, thus a copy or new created version
    140   ///                of \p Inner.
    141   ///
    142   /// \returns The new insertion point where code generation continues and
    143   ///          \p ReplVal the replacement value.
    144   using PrivatizeCallbackTy = function_ref<InsertPointTy(
    145       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
    146       Value &Inner, Value *&ReplVal)>;
    147 
    148   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
    149   /// (filename, line, column, ...).
    150   struct LocationDescription {
    151     template <typename T, typename U>
    152     LocationDescription(const IRBuilder<T, U> &IRB)
    153         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
    154     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
    155     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
    156         : IP(IP), DL(DL) {}
    157     InsertPointTy IP;
    158     DebugLoc DL;
    159   };
    160 
    161   /// Emitter methods for OpenMP directives.
    162   ///
    163   ///{
    164 
    165   /// Generator for '#omp barrier'
    166   ///
    167   /// \param Loc The location where the barrier directive was encountered.
    168   /// \param DK The kind of directive that caused the barrier.
    169   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
    170   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
    171   ///                        should be checked and acted upon.
    172   ///
    173   /// \returns The insertion point after the barrier.
    174   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
    175                               bool ForceSimpleCall = false,
    176                               bool CheckCancelFlag = true);
    177 
    178   /// Generator for '#omp cancel'
    179   ///
    180   /// \param Loc The location where the directive was encountered.
    181   /// \param IfCondition The evaluated 'if' clause expression, if any.
    182   /// \param CanceledDirective The kind of directive that is cancled.
    183   ///
    184   /// \returns The insertion point after the barrier.
    185   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
    186                              omp::Directive CanceledDirective);
    187 
    188   /// Generator for '#omp parallel'
    189   ///
    190   /// \param Loc The insert and source location description.
    191   /// \param AllocaIP The insertion points to be used for alloca instructions.
    192   /// \param BodyGenCB Callback that will generate the region code.
    193   /// \param PrivCB Callback to copy a given variable (think copy constructor).
    194   /// \param FiniCB Callback to finalize variable copies.
    195   /// \param IfCondition The evaluated 'if' clause expression, if any.
    196   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
    197   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
    198   /// \param IsCancellable Flag to indicate a cancellable parallel region.
    199   ///
    200   /// \returns The insertion position *after* the parallel.
    201   IRBuilder<>::InsertPoint
    202   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
    203                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
    204                  FinalizeCallbackTy FiniCB, Value *IfCondition,
    205                  Value *NumThreads, omp::ProcBindKind ProcBind,
    206                  bool IsCancellable);
    207 
    208   /// Generator for the control flow structure of an OpenMP canonical loop.
    209   ///
    210   /// This generator operates on the logical iteration space of the loop, i.e.
    211   /// the caller only has to provide a loop trip count of the loop as defined by
    212   /// base language semantics. The trip count is interpreted as an unsigned
    213   /// integer. The induction variable passed to \p BodyGenCB will be of the same
    214   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
    215   /// convert the logical iteration variable to the loop counter variable in the
    216   /// loop body.
    217   ///
    218   /// \param Loc       The insert and source location description. The insert
    219   ///                  location can be between two instructions or the end of a
    220   ///                  degenerate block (e.g. a BB under construction).
    221   /// \param BodyGenCB Callback that will generate the loop body code.
    222   /// \param TripCount Number of iterations the loop body is executed.
    223   /// \param Name      Base name used to derive BB and instruction names.
    224   ///
    225   /// \returns An object representing the created control flow structure which
    226   ///          can be used for loop-associated directives.
    227   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
    228                                          LoopBodyGenCallbackTy BodyGenCB,
    229                                          Value *TripCount,
    230                                          const Twine &Name = "loop");
    231 
    232   /// Generator for the control flow structure of an OpenMP canonical loop.
    233   ///
    234   /// Instead of a logical iteration space, this allows specifying user-defined
    235   /// loop counter values using increment, upper- and lower bounds. To
    236   /// disambiguate the terminology when counting downwards, instead of lower
    237   /// bounds we use \p Start for the loop counter value in the first body
    238   /// iteration.
    239   ///
    240   /// Consider the following limitations:
    241   ///
    242   ///  * A loop counter space over all integer values of its bit-width cannot be
    243   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
    244   ///    stored into an 8 bit integer):
    245   ///
    246   ///      DO I = 0, 255, 1
    247   ///
    248   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
    249   ///    effectively counting downwards:
    250   ///
    251   ///      for (uint8_t i = 100u; i > 0; i += 127u)
    252   ///
    253   ///
    254   /// TODO: May need to add additional parameters to represent:
    255   ///
    256   ///  * Allow representing downcounting with unsigned integers.
    257   ///
    258   ///  * Sign of the step and the comparison operator might disagree:
    259   ///
    260   ///      for (int i = 0; i < 42; --i)
    261   ///
    262   //
    263   /// \param Loc       The insert and source location description.
    264   /// \param BodyGenCB Callback that will generate the loop body code.
    265   /// \param Start     Value of the loop counter for the first iterations.
    266   /// \param Stop      Loop counter values past this will stop the the
    267   ///                  iterations.
    268   /// \param Step      Loop counter increment after each iteration; negative
    269   ///                  means counting down. \param IsSigned  Whether Start, Stop
    270   ///                  and Stop are signed integers.
    271   /// \param InclusiveStop Whether  \p Stop itself is a valid value for the loop
    272   ///                      counter.
    273   /// \param ComputeIP Insertion point for instructions computing the trip
    274   ///                  count. Can be used to ensure the trip count is available
    275   ///                  at the outermost loop of a loop nest. If not set,
    276   ///                  defaults to the preheader of the generated loop.
    277   /// \param Name      Base name used to derive BB and instruction names.
    278   ///
    279   /// \returns An object representing the created control flow structure which
    280   ///          can be used for loop-associated directives.
    281   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
    282                                          LoopBodyGenCallbackTy BodyGenCB,
    283                                          Value *Start, Value *Stop, Value *Step,
    284                                          bool IsSigned, bool InclusiveStop,
    285                                          InsertPointTy ComputeIP = {},
    286                                          const Twine &Name = "loop");
    287 
    288   /// Collapse a loop nest into a single loop.
    289   ///
    290   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
    291   /// that has the same number of innermost loop iterations as the origin loop
    292   /// nest. The induction variables of the input loops are derived from the
    293   /// collapsed loop's induction variable. This is intended to be used to
    294   /// implement OpenMP's collapse clause. Before applying a directive,
    295   /// collapseLoops normalizes a loop nest to contain only a single loop and the
    296   /// directive's implementation does not need to handle multiple loops itself.
    297   /// This does not remove the need to handle all loop nest handling by
    298   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
    299   /// modifier of the worksharing-loop directive.
    300   ///
    301   /// Example:
    302   /// \code
    303   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
    304   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
    305   ///       body(i, j);
    306   /// \endcode
    307   ///
    308   /// After collapsing with Loops={i,j}, the loop is changed to
    309   /// \code
    310   ///   for (int ij = 0; ij < 63; ++ij) {
    311   ///     int i = ij / 9;
    312   ///     int j = ij % 9;
    313   ///     body(i, j);
    314   ///   }
    315   /// \endcode
    316   ///
    317   /// In the current implementation, the following limitations apply:
    318   ///
    319   ///  * All input loops have an induction variable of the same type.
    320   ///
    321   ///  * The collapsed loop will have the same trip count integer type as the
    322   ///    input loops. Therefore it is possible that the collapsed loop cannot
    323   ///    represent all iterations of the input loops. For instance, assuming a
    324   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
    325   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
    326   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
    327   ///    in this case.
    328   ///
    329   ///  * The trip counts of every input loop must be available at \p ComputeIP.
    330   ///    Non-rectangular loops are not yet supported.
    331   ///
    332   ///  * At each nest level, code between a surrounding loop and its nested loop
    333   ///    is hoisted into the loop body, and such code will be executed more
    334   ///    often than before collapsing (or not at all if any inner loop iteration
    335   ///    has a trip count of 0). This is permitted by the OpenMP specification.
    336   ///
    337   /// \param DL        Debug location for instructions added for collapsing,
    338   ///                  such as instructions to compute derive the input loop's
    339   ///                  induction variables.
    340   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
    341   ///                  from outermost-to-innermost and every control flow of a
    342   ///                  loop's body must pass through its directly nested loop.
    343   /// \param ComputeIP Where additional instruction that compute the collapsed
    344   ///                  trip count. If not set, defaults to before the generated
    345   ///                  loop.
    346   ///
    347   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
    348   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
    349                                    ArrayRef<CanonicalLoopInfo *> Loops,
    350                                    InsertPointTy ComputeIP);
    351 
    352   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
    353   ///
    354   /// This takes a \p LoopInfo representing a canonical loop, such as the one
    355   /// created by \p createCanonicalLoop and emits additional instructions to
    356   /// turn it into a workshare loop. In particular, it calls to an OpenMP
    357   /// runtime function in the preheader to obtain the loop bounds to be used in
    358   /// the current thread, updates the relevant instructions in the canonical
    359   /// loop and calls to an OpenMP runtime finalization function after the loop.
    360   ///
    361   /// \param Loc      The source location description, the insertion location
    362   ///                 is not used.
    363   /// \param CLI      A descriptor of the canonical loop to workshare.
    364   /// \param AllocaIP An insertion point for Alloca instructions usable in the
    365   ///                 preheader of the loop.
    366   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
    367   ///                     the loop.
    368   /// \param Chunk    The size of loop chunk considered as a unit when
    369   ///                 scheduling. If \p nullptr, defaults to 1.
    370   ///
    371   /// \returns Updated CanonicalLoopInfo.
    372   CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
    373                                                CanonicalLoopInfo *CLI,
    374                                                InsertPointTy AllocaIP,
    375                                                bool NeedsBarrier,
    376                                                Value *Chunk = nullptr);
    377 
    378   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
    379   ///
    380   /// This takes a \p LoopInfo representing a canonical loop, such as the one
    381   /// created by \p createCanonicalLoop and emits additional instructions to
    382   /// turn it into a workshare loop. In particular, it calls to an OpenMP
    383   /// runtime function in the preheader to obtain, and then in each iteration
    384   /// to update the loop counter.
    385   /// \param Loc      The source location description, the insertion location
    386   ///                 is not used.
    387   /// \param CLI      A descriptor of the canonical loop to workshare.
    388   /// \param AllocaIP An insertion point for Alloca instructions usable in the
    389   ///                 preheader of the loop.
    390   /// \param SchedType Type of scheduling to be passed to the init function.
    391   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
    392   ///                     the loop.
    393   /// \param Chunk    The size of loop chunk considered as a unit when
    394   ///                 scheduling. If \p nullptr, defaults to 1.
    395   ///
    396   /// \returns Point where to insert code after the loop.
    397   InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc,
    398                                            CanonicalLoopInfo *CLI,
    399                                            InsertPointTy AllocaIP,
    400                                            omp::OMPScheduleType SchedType,
    401                                            bool NeedsBarrier,
    402                                            Value *Chunk = nullptr);
    403 
    404   /// Modifies the canonical loop to be a workshare loop.
    405   ///
    406   /// This takes a \p LoopInfo representing a canonical loop, such as the one
    407   /// created by \p createCanonicalLoop and emits additional instructions to
    408   /// turn it into a workshare loop. In particular, it calls to an OpenMP
    409   /// runtime function in the preheader to obtain the loop bounds to be used in
    410   /// the current thread, updates the relevant instructions in the canonical
    411   /// loop and calls to an OpenMP runtime finalization function after the loop.
    412   ///
    413   /// \param Loc      The source location description, the insertion location
    414   ///                 is not used.
    415   /// \param CLI      A descriptor of the canonical loop to workshare.
    416   /// \param AllocaIP An insertion point for Alloca instructions usable in the
    417   ///                 preheader of the loop.
    418   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
    419   ///                     the loop.
    420   ///
    421   /// \returns Updated CanonicalLoopInfo.
    422   CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
    423                                          CanonicalLoopInfo *CLI,
    424                                          InsertPointTy AllocaIP,
    425                                          bool NeedsBarrier);
    426 
    427   /// Tile a loop nest.
    428   ///
    429   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
    430   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
    431   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
    432   /// of every loop and every tile sizes must be usable in the outermost
    433   /// loop's preheader. This implies that the loop nest is rectangular.
    434   ///
    435   /// Example:
    436   /// \code
    437   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
    438   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
    439   ///         body(i, j);
    440   /// \endcode
    441   ///
    442   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
    443   /// \code
    444   ///   for (int i1 = 0; i1 < 3; ++i1)
    445   ///     for (int j1 = 0; j1 < 2; ++j1)
    446   ///       for (int i2 = 0; i2 < 5; ++i2)
    447   ///         for (int j2 = 0; j2 < 7; ++j2)
    448   ///           body(i1*3+i2, j1*3+j2);
    449   /// \endcode
    450   ///
    451   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
    452   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
    453   /// handles non-constant trip counts, non-constant tile sizes and trip counts
    454   /// that are not multiples of the tile size. In the latter case the tile loop
    455   /// of the last floor-loop iteration will have fewer iterations than specified
    456   /// as its tile size.
    457   ///
    458   ///
    459   /// @param DL        Debug location for instructions added by tiling, for
    460   ///                  instance the floor- and tile trip count computation.
    461   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
    462   ///                  invalidated by this method, i.e. should not used after
    463   ///                  tiling.
    464   /// @param TileSizes For each loop in \p Loops, the tile size for that
    465   ///                  dimensions.
    466   ///
    467   /// \returns A list of generated loops. Contains twice as many loops as the
    468   ///          input loop nest; the first half are the floor loops and the
    469   ///          second half are the tile loops.
    470   std::vector<CanonicalLoopInfo *>
    471   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
    472             ArrayRef<Value *> TileSizes);
    473 
    474   /// Generator for '#omp flush'
    475   ///
    476   /// \param Loc The location where the flush directive was encountered
    477   void createFlush(const LocationDescription &Loc);
    478 
    479   /// Generator for '#omp taskwait'
    480   ///
    481   /// \param Loc The location where the taskwait directive was encountered.
    482   void createTaskwait(const LocationDescription &Loc);
    483 
    484   /// Generator for '#omp taskyield'
    485   ///
    486   /// \param Loc The location where the taskyield directive was encountered.
    487   void createTaskyield(const LocationDescription &Loc);
    488 
    489   ///}
    490 
    491   /// Return the insertion point used by the underlying IRBuilder.
    492   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
    493 
    494   /// Update the internal location to \p Loc.
    495   bool updateToLocation(const LocationDescription &Loc) {
    496     Builder.restoreIP(Loc.IP);
    497     Builder.SetCurrentDebugLocation(Loc.DL);
    498     return Loc.IP.getBlock() != nullptr;
    499   }
    500 
    501   /// Return the function declaration for the runtime function with \p FnID.
    502   FunctionCallee getOrCreateRuntimeFunction(Module &M,
    503                                             omp::RuntimeFunction FnID);
    504 
    505   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
    506 
    507   /// Return the (LLVM-IR) string describing the source location \p LocStr.
    508   Constant *getOrCreateSrcLocStr(StringRef LocStr);
    509 
    510   /// Return the (LLVM-IR) string describing the default source location.
    511   Constant *getOrCreateDefaultSrcLocStr();
    512 
    513   /// Return the (LLVM-IR) string describing the source location identified by
    514   /// the arguments.
    515   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
    516                                  unsigned Line, unsigned Column);
    517 
    518   /// Return the (LLVM-IR) string describing the source location \p Loc.
    519   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
    520 
    521   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
    522   /// TODO: Create a enum class for the Reserve2Flags
    523   Value *getOrCreateIdent(Constant *SrcLocStr,
    524                           omp::IdentFlag Flags = omp::IdentFlag(0),
    525                           unsigned Reserve2Flags = 0);
    526 
    527   // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
    528   Type *getLanemaskType();
    529 
    530   /// Generate control flow and cleanup for cancellation.
    531   ///
    532   /// \param CancelFlag Flag indicating if the cancellation is performed.
    533   /// \param CanceledDirective The kind of directive that is cancled.
    534   void emitCancelationCheckImpl(Value *CancelFlag,
    535                                 omp::Directive CanceledDirective);
    536 
    537   /// Generate a barrier runtime call.
    538   ///
    539   /// \param Loc The location at which the request originated and is fulfilled.
    540   /// \param DK The directive which caused the barrier
    541   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
    542   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
    543   ///                        should be checked and acted upon.
    544   ///
    545   /// \returns The insertion point after the barrier.
    546   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
    547                                 omp::Directive DK, bool ForceSimpleCall,
    548                                 bool CheckCancelFlag);
    549 
    550   /// Generate a flush runtime call.
    551   ///
    552   /// \param Loc The location at which the request originated and is fulfilled.
    553   void emitFlush(const LocationDescription &Loc);
    554 
    555   /// The finalization stack made up of finalize callbacks currently in-flight,
    556   /// wrapped into FinalizationInfo objects that reference also the finalization
    557   /// target block and the kind of cancellable directive.
    558   SmallVector<FinalizationInfo, 8> FinalizationStack;
    559 
    560   /// Return true if the last entry in the finalization stack is of kind \p DK
    561   /// and cancellable.
    562   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
    563     return !FinalizationStack.empty() &&
    564            FinalizationStack.back().IsCancellable &&
    565            FinalizationStack.back().DK == DK;
    566   }
    567 
    568   /// Generate a taskwait runtime call.
    569   ///
    570   /// \param Loc The location at which the request originated and is fulfilled.
    571   void emitTaskwaitImpl(const LocationDescription &Loc);
    572 
    573   /// Generate a taskyield runtime call.
    574   ///
    575   /// \param Loc The location at which the request originated and is fulfilled.
    576   void emitTaskyieldImpl(const LocationDescription &Loc);
    577 
    578   /// Return the current thread ID.
    579   ///
    580   /// \param Ident The ident (ident_t*) describing the query origin.
    581   Value *getOrCreateThreadID(Value *Ident);
    582 
    583   /// The underlying LLVM-IR module
    584   Module &M;
    585 
    586   /// The LLVM-IR Builder used to create IR.
    587   IRBuilder<> Builder;
    588 
    589   /// Map to remember source location strings
    590   StringMap<Constant *> SrcLocStrMap;
    591 
    592   /// Map to remember existing ident_t*.
    593   DenseMap<std::pair<Constant *, uint64_t>, Value *> IdentMap;
    594 
    595   /// Helper that contains information about regions we need to outline
    596   /// during finalization.
    597   struct OutlineInfo {
    598     using PostOutlineCBTy = std::function<void(Function &)>;
    599     PostOutlineCBTy PostOutlineCB;
    600     BasicBlock *EntryBB, *ExitBB;
    601 
    602     /// Collect all blocks in between EntryBB and ExitBB in both the given
    603     /// vector and set.
    604     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
    605                        SmallVectorImpl<BasicBlock *> &BlockVector);
    606 
    607     /// Return the function that contains the region to be outlined.
    608     Function *getFunction() const { return EntryBB->getParent(); }
    609   };
    610 
    611   /// Collection of regions that need to be outlined during finalization.
    612   SmallVector<OutlineInfo, 16> OutlineInfos;
    613 
    614   /// Collection of owned canonical loop objects that eventually need to be
    615   /// free'd.
    616   std::forward_list<CanonicalLoopInfo> LoopInfos;
    617 
    618   /// Add a new region that will be outlined later.
    619   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
    620 
    621   /// An ordered map of auto-generated variables to their unique names.
    622   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
    623   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
    624   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
    625   /// variables.
    626   StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
    627 
    628   /// Create the global variable holding the offload mappings information.
    629   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
    630                                         std::string VarName);
    631 
    632   /// Create the global variable holding the offload names information.
    633   GlobalVariable *
    634   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
    635                         std::string VarName);
    636 
    637 public:
    638   /// Generator for __kmpc_copyprivate
    639   ///
    640   /// \param Loc The source location description.
    641   /// \param BufSize Number of elements in the buffer.
    642   /// \param CpyBuf List of pointers to data to be copied.
    643   /// \param CpyFn function to call for copying data.
    644   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
    645   ///
    646   /// \return The insertion position *after* the CopyPrivate call.
    647 
    648   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
    649                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
    650                                   llvm::Value *CpyFn, llvm::Value *DidIt);
    651 
    652   /// Generator for '#omp single'
    653   ///
    654   /// \param Loc The source location description.
    655   /// \param BodyGenCB Callback that will generate the region code.
    656   /// \param FiniCB Callback to finalize variable copies.
    657   /// \param DidIt Local variable used as a flag to indicate 'single' thread
    658   ///
    659   /// \returns The insertion position *after* the single call.
    660   InsertPointTy createSingle(const LocationDescription &Loc,
    661                              BodyGenCallbackTy BodyGenCB,
    662                              FinalizeCallbackTy FiniCB, llvm::Value *DidIt);
    663 
    664   /// Generator for '#omp master'
    665   ///
    666   /// \param Loc The insert and source location description.
    667   /// \param BodyGenCB Callback that will generate the region code.
    668   /// \param FiniCB Callback to finalize variable copies.
    669   ///
    670   /// \returns The insertion position *after* the master.
    671   InsertPointTy createMaster(const LocationDescription &Loc,
    672                              BodyGenCallbackTy BodyGenCB,
    673                              FinalizeCallbackTy FiniCB);
    674 
    675   /// Generator for '#omp masked'
    676   ///
    677   /// \param Loc The insert and source location description.
    678   /// \param BodyGenCB Callback that will generate the region code.
    679   /// \param FiniCB Callback to finialize variable copies.
    680   ///
    681   /// \returns The insertion position *after* the master.
    682   InsertPointTy createMasked(const LocationDescription &Loc,
    683                              BodyGenCallbackTy BodyGenCB,
    684                              FinalizeCallbackTy FiniCB, Value *Filter);
    685 
    686   /// Generator for '#omp critical'
    687   ///
    688   /// \param Loc The insert and source location description.
    689   /// \param BodyGenCB Callback that will generate the region body code.
    690   /// \param FiniCB Callback to finalize variable copies.
    691   /// \param CriticalName name of the lock used by the critical directive
    692   /// \param HintInst Hint Instruction for hint clause associated with critical
    693   ///
    694   /// \returns The insertion position *after* the master.
    695   InsertPointTy createCritical(const LocationDescription &Loc,
    696                                BodyGenCallbackTy BodyGenCB,
    697                                FinalizeCallbackTy FiniCB,
    698                                StringRef CriticalName, Value *HintInst);
    699 
    700   /// Generator for '#omp sections'
    701   ///
    702   /// \param Loc The insert and source location description.
    703   /// \param AllocaIP The insertion points to be used for alloca instructions.
    704   /// \param SectionCBs Callbacks that will generate body of each section.
    705   /// \param PrivCB Callback to copy a given variable (think copy constructor).
    706   /// \param FiniCB Callback to finalize variable copies.
    707   /// \param IsCancellable Flag to indicate a cancellable parallel region.
    708   /// \param IsNowait If true, barrier - to ensure all sections are executed
    709   /// before moving forward will not be generated.
    710   /// \returns The insertion position *after* the sections.
    711   InsertPointTy createSections(const LocationDescription &Loc,
    712                                InsertPointTy AllocaIP,
    713                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
    714                                PrivatizeCallbackTy PrivCB,
    715                                FinalizeCallbackTy FiniCB, bool IsCancellable,
    716                                bool IsNowait);
    717 
    718   /// Generator for '#omp section'
    719   ///
    720   /// \param Loc The insert and source location description.
    721   /// \param BodyGenCB Callback that will generate the region body code.
    722   /// \param FiniCB Callback to finalize variable copies.
    723   /// \returns The insertion position *after* the section.
    724   InsertPointTy createSection(const LocationDescription &Loc,
    725                               BodyGenCallbackTy BodyGenCB,
    726                               FinalizeCallbackTy FiniCB);
    727 
    728   /// Generate conditional branch and relevant BasicBlocks through which private
    729   /// threads copy the 'copyin' variables from Master copy to threadprivate
    730   /// copies.
    731   ///
    732   /// \param IP insertion block for copyin conditional
    733   /// \param MasterVarPtr a pointer to the master variable
    734   /// \param PrivateVarPtr a pointer to the threadprivate variable
    735   /// \param IntPtrTy Pointer size type
    736   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
    737   //				 and copy.in.end block
    738   ///
    739   /// \returns The insertion point where copying operation to be emitted.
    740   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
    741                                          Value *PrivateAddr,
    742                                          llvm::IntegerType *IntPtrTy,
    743                                          bool BranchtoEnd = true);
    744 
    745   /// Create a runtime call for kmpc_Alloc
    746   ///
    747   /// \param Loc The insert and source location description.
    748   /// \param Size Size of allocated memory space
    749   /// \param Allocator Allocator information instruction
    750   /// \param Name Name of call Instruction for OMP_alloc
    751   ///
    752   /// \returns CallInst to the OMP_Alloc call
    753   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
    754                            Value *Allocator, std::string Name = "");
    755 
    756   /// Create a runtime call for kmpc_free
    757   ///
    758   /// \param Loc The insert and source location description.
    759   /// \param Addr Address of memory space to be freed
    760   /// \param Allocator Allocator information instruction
    761   /// \param Name Name of call Instruction for OMP_Free
    762   ///
    763   /// \returns CallInst to the OMP_Free call
    764   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
    765                           Value *Allocator, std::string Name = "");
    766 
    767   /// Create a runtime call for kmpc_threadprivate_cached
    768   ///
    769   /// \param Loc The insert and source location description.
    770   /// \param Pointer pointer to data to be cached
    771   /// \param Size size of data to be cached
    772   /// \param Name Name of call Instruction for callinst
    773   ///
    774   /// \returns CallInst to the thread private cache call.
    775   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
    776                                       llvm::Value *Pointer,
    777                                       llvm::ConstantInt *Size,
    778                                       const llvm::Twine &Name = Twine(""));
    779 
    780   /// Declarations for LLVM-IR types (simple, array, function and structure) are
    781   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
    782   /// we provide the declarations, the initializeTypes function will provide the
    783   /// values.
    784   ///
    785   ///{
    786 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
    787 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
    788   ArrayType *VarName##Ty = nullptr;                                            \
    789   PointerType *VarName##PtrTy = nullptr;
    790 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
    791   FunctionType *VarName = nullptr;                                             \
    792   PointerType *VarName##Ptr = nullptr;
    793 #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
    794   StructType *VarName = nullptr;                                               \
    795   PointerType *VarName##Ptr = nullptr;
    796 #include "llvm/Frontend/OpenMP/OMPKinds.def"
    797 
    798   ///}
    799 
    800 private:
    801   /// Create all simple and struct types exposed by the runtime and remember
    802   /// the llvm::PointerTypes of them for easy access later.
    803   void initializeTypes(Module &M);
    804 
    805   /// Common interface for generating entry calls for OMP Directives.
    806   /// if the directive has a region/body, It will set the insertion
    807   /// point to the body
    808   ///
    809   /// \param OMPD Directive to generate entry blocks for
    810   /// \param EntryCall Call to the entry OMP Runtime Function
    811   /// \param ExitBB block where the region ends.
    812   /// \param Conditional indicate if the entry call result will be used
    813   ///        to evaluate a conditional of whether a thread will execute
    814   ///        body code or not.
    815   ///
    816   /// \return The insertion position in exit block
    817   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
    818                                          BasicBlock *ExitBB,
    819                                          bool Conditional = false);
    820 
    821   /// Common interface to finalize the region
    822   ///
    823   /// \param OMPD Directive to generate exiting code for
    824   /// \param FinIP Insertion point for emitting Finalization code and exit call
    825   /// \param ExitCall Call to the ending OMP Runtime Function
    826   /// \param HasFinalize indicate if the directive will require finalization
    827   ///         and has a finalization callback in the stack that
    828   ///        should be called.
    829   ///
    830   /// \return The insertion position in exit block
    831   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
    832                                         InsertPointTy FinIP,
    833                                         Instruction *ExitCall,
    834                                         bool HasFinalize = true);
    835 
    836   /// Common Interface to generate OMP inlined regions
    837   ///
    838   /// \param OMPD Directive to generate inlined region for
    839   /// \param EntryCall Call to the entry OMP Runtime Function
    840   /// \param ExitCall Call to the ending OMP Runtime Function
    841   /// \param BodyGenCB Body code generation callback.
    842   /// \param FiniCB Finalization Callback. Will be called when finalizing region
    843   /// \param Conditional indicate if the entry call result will be used
    844   ///        to evaluate a conditional of whether a thread will execute
    845   ///        body code or not.
    846   /// \param HasFinalize indicate if the directive will require finalization
    847   ///        and has a finalization callback in the stack that
    848   ///        should be called.
    849   /// \param IsCancellable if HasFinalize is set to true, indicate if the
    850   ///        the directive should be cancellable.
    851   /// \return The insertion point after the region
    852 
    853   InsertPointTy
    854   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
    855                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
    856                        FinalizeCallbackTy FiniCB, bool Conditional = false,
    857                        bool HasFinalize = true, bool IsCancellable = false);
    858 
    859   /// Get the platform-specific name separator.
    860   /// \param Parts different parts of the final name that needs separation
    861   /// \param FirstSeparator First separator used between the initial two
    862   ///        parts of the name.
    863   /// \param Separator separator used between all of the rest consecutive
    864   ///        parts of the name
    865   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
    866                                            StringRef FirstSeparator,
    867                                            StringRef Separator);
    868 
    869   /// Gets (if variable with the given name already exist) or creates
    870   /// internal global variable with the specified Name. The created variable has
    871   /// linkage CommonLinkage by default and is initialized by null value.
    872   /// \param Ty Type of the global variable. If it is exist already the type
    873   /// must be the same.
    874   /// \param Name Name of the variable.
    875   Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name,
    876                                            unsigned AddressSpace = 0);
    877 
    878   /// Returns corresponding lock object for the specified critical region
    879   /// name. If the lock object does not exist it is created, otherwise the
    880   /// reference to the existing copy is returned.
    881   /// \param CriticalName Name of the critical region.
    882   ///
    883   Value *getOMPCriticalRegionLock(StringRef CriticalName);
    884 
    885   /// Callback type for Atomic Expression update
    886   /// ex:
    887   /// \code{.cpp}
    888   /// unsigned x = 0;
    889   /// #pragma omp atomic update
    890   /// x = Expr(x_old);  //Expr() is any legal operation
    891   /// \endcode
    892   ///
    893   /// \param XOld the value of the atomic memory address to use for update
    894   /// \param IRB reference to the IRBuilder to use
    895   ///
    896   /// \returns Value to update X to.
    897   using AtomicUpdateCallbackTy =
    898       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
    899 
    900 private:
    901   enum AtomicKind { Read, Write, Update, Capture };
    902 
    903   /// Determine whether to emit flush or not
    904   ///
    905   /// \param Loc    The insert and source location description.
    906   /// \param AO     The required atomic ordering
    907   /// \param AK     The OpenMP atomic operation kind used.
    908   ///
    909   /// \returns		wether a flush was emitted or not
    910   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
    911                                     AtomicOrdering AO, AtomicKind AK);
    912 
    913   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
    914   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
    915   /// Only Scalar data types.
    916   ///
    917   /// \param AllocIP	  Instruction to create AllocaInst before.
    918   /// \param X			    The target atomic pointer to be updated
    919   /// \param Expr		    The value to update X with.
    920   /// \param AO			    Atomic ordering of the generated atomic
    921   ///                   instructions.
    922   /// \param RMWOp		  The binary operation used for update. If
    923   ///                   operation is not supported by atomicRMW,
    924   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
    925   ///                   Then a `cmpExch` based	atomic will be generated.
    926   /// \param UpdateOp 	Code generator for complex expressions that cannot be
    927   ///                   expressed through atomicrmw instruction.
    928   /// \param VolatileX	     true if \a X volatile?
    929   /// \param IsXLHSInRHSPart true if \a X is Left H.S. in Right H.S. part of
    930   ///                        the update expression, false otherwise.
    931   ///                        (e.g. true for X = X BinOp Expr)
    932   ///
    933   /// \returns A pair of the old value of X before the update, and the value
    934   ///          used for the update.
    935   std::pair<Value *, Value *> emitAtomicUpdate(Instruction *AllocIP, Value *X,
    936                                                Value *Expr, AtomicOrdering AO,
    937                                                AtomicRMWInst::BinOp RMWOp,
    938                                                AtomicUpdateCallbackTy &UpdateOp,
    939                                                bool VolatileX,
    940                                                bool IsXLHSInRHSPart);
    941 
    942   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
    943   ///
    944   /// \Return The instruction
    945   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
    946                                 AtomicRMWInst::BinOp RMWOp);
    947 
    948 public:
    949   /// a struct to pack relevant information while generating atomic Ops
    950   struct AtomicOpValue {
    951     Value *Var = nullptr;
    952     bool IsSigned = false;
    953     bool IsVolatile = false;
    954   };
    955 
    956   /// Emit atomic Read for : V = X --- Only Scalar data types.
    957   ///
    958   /// \param Loc    The insert and source location description.
    959   /// \param X			The target pointer to be atomically read
    960   /// \param V			Memory address where to store atomically read
    961   /// 					    value
    962   /// \param AO			Atomic ordering of the generated atomic
    963   /// 					    instructions.
    964   ///
    965   /// \return Insertion point after generated atomic read IR.
    966   InsertPointTy createAtomicRead(const LocationDescription &Loc,
    967                                  AtomicOpValue &X, AtomicOpValue &V,
    968                                  AtomicOrdering AO);
    969 
    970   /// Emit atomic write for : X = Expr --- Only Scalar data types.
    971   ///
    972   /// \param Loc    The insert and source location description.
    973   /// \param X			The target pointer to be atomically written to
    974   /// \param Expr		The value to store.
    975   /// \param AO			Atomic ordering of the generated atomic
    976   ///               instructions.
    977   ///
    978   /// \return Insertion point after generated atomic Write IR.
    979   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
    980                                   AtomicOpValue &X, Value *Expr,
    981                                   AtomicOrdering AO);
    982 
    983   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
    984   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
    985   /// Only Scalar data types.
    986   ///
    987   /// \param Loc      The insert and source location description.
    988   /// \param AllocIP  Instruction to create AllocaInst before.
    989   /// \param X        The target atomic pointer to be updated
    990   /// \param Expr     The value to update X with.
    991   /// \param AO       Atomic ordering of the generated atomic instructions.
    992   /// \param RMWOp    The binary operation used for update. If operation
    993   ///                 is	not supported by atomicRMW, or belong to
    994   ///	                {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
    995   ///                 atomic will be generated.
    996   /// \param UpdateOp 	Code generator for complex expressions that cannot be
    997   ///                   expressed through atomicrmw instruction.
    998   /// \param IsXLHSInRHSPart true if \a X is Left H.S. in Right H.S. part of
    999   ///                        the update expression, false otherwise.
   1000   ///	                       (e.g. true for X = X BinOp Expr)
   1001   ///
   1002   /// \return Insertion point after generated atomic update IR.
   1003   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
   1004                                    Instruction *AllocIP, AtomicOpValue &X,
   1005                                    Value *Expr, AtomicOrdering AO,
   1006                                    AtomicRMWInst::BinOp RMWOp,
   1007                                    AtomicUpdateCallbackTy &UpdateOp,
   1008                                    bool IsXLHSInRHSPart);
   1009 
   1010   /// Emit atomic update for constructs: --- Only Scalar data types
   1011   /// V = X; X = X BinOp Expr ,
   1012   /// X = X BinOp Expr; V = X,
   1013   /// V = X; X = Expr BinOp X,
   1014   /// X = Expr BinOp X; V = X,
   1015   /// V = X; X = UpdateOp(X),
   1016   /// X = UpdateOp(X); V = X,
   1017   ///
   1018   /// \param Loc        The insert and source location description.
   1019   /// \param AllocIP    Instruction to create AllocaInst before.
   1020   /// \param X          The target atomic pointer to be updated
   1021   /// \param V          Memory address where to store captured value
   1022   /// \param Expr       The value to update X with.
   1023   /// \param AO         Atomic ordering of the generated atomic instructions
   1024   /// \param RMWOp      The binary operation used for update. If
   1025   ///                   operation is not supported by atomicRMW, or belong to
   1026   ///	                  {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
   1027   ///                   atomic will be generated.
   1028   /// \param UpdateOp   Code generator for complex expressions that cannot be
   1029   ///                   expressed through atomicrmw instruction.
   1030   /// \param UpdateExpr true if X is an in place update of the form
   1031   ///                   X = X BinOp Expr or X = Expr BinOp X
   1032   /// \param IsXLHSInRHSPart true if X is Left H.S. in Right H.S. part of the
   1033   ///                        update expression, false otherwise.
   1034   ///                        (e.g. true for X = X BinOp Expr)
   1035   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
   1036   ///                        'v', not an updated one.
   1037   ///
   1038   /// \return Insertion point after generated atomic capture IR.
   1039   InsertPointTy
   1040   createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP,
   1041                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
   1042                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
   1043                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
   1044                       bool IsPostfixUpdate, bool IsXLHSInRHSPart);
   1045 
   1046   /// Create the control flow structure of a canonical OpenMP loop.
   1047   ///
   1048   /// The emitted loop will be disconnected, i.e. no edge to the loop's
   1049   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
   1050   /// IRBuilder location is not preserved.
   1051   ///
   1052   /// \param DL        DebugLoc used for the instructions in the skeleton.
   1053   /// \param TripCount Value to be used for the trip count.
   1054   /// \param F         Function in which to insert the BasicBlocks.
   1055   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
   1056   ///                         typically the body itself.
   1057   /// \param PostInsertBefore Where to insert BBs that execute after the body.
   1058   /// \param Name      Base name used to derive BB
   1059   ///                  and instruction names.
   1060   ///
   1061   /// \returns The CanonicalLoopInfo that represents the emitted loop.
   1062   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
   1063                                         Function *F,
   1064                                         BasicBlock *PreInsertBefore,
   1065                                         BasicBlock *PostInsertBefore,
   1066                                         const Twine &Name = {});
   1067 };
   1068 
   1069 /// Class to represented the control flow structure of an OpenMP canonical loop.
   1070 ///
   1071 /// The control-flow structure is standardized for easy consumption by
   1072 /// directives associated with loops. For instance, the worksharing-loop
   1073 /// construct may change this control flow such that each loop iteration is
   1074 /// executed on only one thread.
   1075 ///
   1076 /// The control flow can be described as follows:
   1077 ///
   1078 ///     Preheader
   1079 ///        |
   1080 ///  /-> Header
   1081 ///  |     |
   1082 ///  |    Cond---\
   1083 ///  |     |     |
   1084 ///  |    Body   |
   1085 ///  |    | |    |
   1086 ///  |   <...>   |
   1087 ///  |    | |    |
   1088 ///   \--Latch   |
   1089 ///              |
   1090 ///             Exit
   1091 ///              |
   1092 ///            After
   1093 ///
   1094 /// Code in the header, condition block, latch and exit block must not have any
   1095 /// side-effect. The body block is the single entry point into the loop body,
   1096 /// which may contain arbitrary control flow as long as all control paths
   1097 /// eventually branch to the latch block.
   1098 ///
   1099 /// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
   1100 /// classes.
   1101 class CanonicalLoopInfo {
   1102   friend class OpenMPIRBuilder;
   1103 
   1104 private:
   1105   /// Whether this object currently represents a loop.
   1106   bool IsValid = false;
   1107 
   1108   BasicBlock *Preheader;
   1109   BasicBlock *Header;
   1110   BasicBlock *Cond;
   1111   BasicBlock *Body;
   1112   BasicBlock *Latch;
   1113   BasicBlock *Exit;
   1114   BasicBlock *After;
   1115 
   1116   /// Add the control blocks of this loop to \p BBs.
   1117   ///
   1118   /// This does not include any block from the body, including the one returned
   1119   /// by getBody().
   1120   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
   1121 
   1122 public:
   1123   /// The preheader ensures that there is only a single edge entering the loop.
   1124   /// Code that must be execute before any loop iteration can be emitted here,
   1125   /// such as computing the loop trip count and begin lifetime markers. Code in
   1126   /// the preheader is not considered part of the canonical loop.
   1127   BasicBlock *getPreheader() const { return Preheader; }
   1128 
   1129   /// The header is the entry for each iteration. In the canonical control flow,
   1130   /// it only contains the PHINode for the induction variable.
   1131   BasicBlock *getHeader() const { return Header; }
   1132 
   1133   /// The condition block computes whether there is another loop iteration. If
   1134   /// yes, branches to the body; otherwise to the exit block.
   1135   BasicBlock *getCond() const { return Cond; }
   1136 
   1137   /// The body block is the single entry for a loop iteration and not controlled
   1138   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
   1139   /// eventually branch to the \p Latch block.
   1140   BasicBlock *getBody() const { return Body; }
   1141 
   1142   /// Reaching the latch indicates the end of the loop body code. In the
   1143   /// canonical control flow, it only contains the increment of the induction
   1144   /// variable.
   1145   BasicBlock *getLatch() const { return Latch; }
   1146 
   1147   /// Reaching the exit indicates no more iterations are being executed.
   1148   BasicBlock *getExit() const { return Exit; }
   1149 
   1150   /// The after block is intended for clean-up code such as lifetime end
   1151   /// markers. It is separate from the exit block to ensure, analogous to the
   1152   /// preheader, it having just a single entry edge and being free from PHI
   1153   /// nodes should there be multiple loop exits (such as from break
   1154   /// statements/cancellations).
   1155   BasicBlock *getAfter() const { return After; }
   1156 
   1157   /// Returns the llvm::Value containing the number of loop iterations. It must
   1158   /// be valid in the preheader and always interpreted as an unsigned integer of
   1159   /// any bit-width.
   1160   Value *getTripCount() const {
   1161     Instruction *CmpI = &Cond->front();
   1162     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
   1163     return CmpI->getOperand(1);
   1164   }
   1165 
   1166   /// Returns the instruction representing the current logical induction
   1167   /// variable. Always unsigned, always starting at 0 with an increment of one.
   1168   Instruction *getIndVar() const {
   1169     Instruction *IndVarPHI = &Header->front();
   1170     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
   1171     return IndVarPHI;
   1172   }
   1173 
   1174   /// Return the type of the induction variable (and the trip count).
   1175   Type *getIndVarType() const { return getIndVar()->getType(); }
   1176 
   1177   /// Return the insertion point for user code before the loop.
   1178   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
   1179     return {Preheader, std::prev(Preheader->end())};
   1180   };
   1181 
   1182   /// Return the insertion point for user code in the body.
   1183   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
   1184     return {Body, Body->begin()};
   1185   };
   1186 
   1187   /// Return the insertion point for user code after the loop.
   1188   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
   1189     return {After, After->begin()};
   1190   };
   1191 
   1192   Function *getFunction() const { return Header->getParent(); }
   1193 
   1194   /// Consistency self-check.
   1195   void assertOK() const;
   1196 };
   1197 
   1198 } // end namespace llvm
   1199 
   1200 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
   1201