Home | History | Annotate | Line # | Download | only in TableGen
      1 //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This tablegen backend is responsible for emitting arm_neon.h, which includes
     10 // a declaration and definition of each function specified by the ARM NEON
     11 // compiler interface.  See ARM document DUI0348B.
     12 //
     13 // Each NEON instruction is implemented in terms of 1 or more functions which
     14 // are suffixed with the element type of the input vectors.  Functions may be
     15 // implemented in terms of generic vector operations such as +, *, -, etc. or
     16 // by calling a __builtin_-prefixed function which will be handled by clang's
     17 // CodeGen library.
     18 //
     19 // Additional validation code can be generated by this file when runHeader() is
     20 // called, rather than the normal run() entry point.
     21 //
     22 // See also the documentation in include/clang/Basic/arm_neon.td.
     23 //
     24 //===----------------------------------------------------------------------===//
     25 
     26 #include "TableGenBackends.h"
     27 #include "llvm/ADT/ArrayRef.h"
     28 #include "llvm/ADT/DenseMap.h"
     29 #include "llvm/ADT/None.h"
     30 #include "llvm/ADT/Optional.h"
     31 #include "llvm/ADT/STLExtras.h"
     32 #include "llvm/ADT/SmallVector.h"
     33 #include "llvm/ADT/StringExtras.h"
     34 #include "llvm/ADT/StringRef.h"
     35 #include "llvm/Support/Casting.h"
     36 #include "llvm/Support/ErrorHandling.h"
     37 #include "llvm/Support/raw_ostream.h"
     38 #include "llvm/TableGen/Error.h"
     39 #include "llvm/TableGen/Record.h"
     40 #include "llvm/TableGen/SetTheory.h"
     41 #include <algorithm>
     42 #include <cassert>
     43 #include <cctype>
     44 #include <cstddef>
     45 #include <cstdint>
     46 #include <deque>
     47 #include <map>
     48 #include <set>
     49 #include <sstream>
     50 #include <string>
     51 #include <utility>
     52 #include <vector>
     53 
     54 using namespace llvm;
     55 
     56 namespace {
     57 
     58 // While globals are generally bad, this one allows us to perform assertions
     59 // liberally and somehow still trace them back to the def they indirectly
     60 // came from.
     61 static Record *CurrentRecord = nullptr;
     62 static void assert_with_loc(bool Assertion, const std::string &Str) {
     63   if (!Assertion) {
     64     if (CurrentRecord)
     65       PrintFatalError(CurrentRecord->getLoc(), Str);
     66     else
     67       PrintFatalError(Str);
     68   }
     69 }
     70 
     71 enum ClassKind {
     72   ClassNone,
     73   ClassI,     // generic integer instruction, e.g., "i8" suffix
     74   ClassS,     // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
     75   ClassW,     // width-specific instruction, e.g., "8" suffix
     76   ClassB,     // bitcast arguments with enum argument to specify type
     77   ClassL,     // Logical instructions which are op instructions
     78               // but we need to not emit any suffix for in our
     79               // tests.
     80   ClassNoTest // Instructions which we do not test since they are
     81               // not TRUE instructions.
     82 };
     83 
     84 /// NeonTypeFlags - Flags to identify the types for overloaded Neon
     85 /// builtins.  These must be kept in sync with the flags in
     86 /// include/clang/Basic/TargetBuiltins.h.
     87 namespace NeonTypeFlags {
     88 
     89 enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
     90 
     91 enum EltType {
     92   Int8,
     93   Int16,
     94   Int32,
     95   Int64,
     96   Poly8,
     97   Poly16,
     98   Poly64,
     99   Poly128,
    100   Float16,
    101   Float32,
    102   Float64,
    103   BFloat16
    104 };
    105 
    106 } // end namespace NeonTypeFlags
    107 
    108 class NeonEmitter;
    109 
    110 //===----------------------------------------------------------------------===//
    111 // TypeSpec
    112 //===----------------------------------------------------------------------===//
    113 
    114 /// A TypeSpec is just a simple wrapper around a string, but gets its own type
    115 /// for strong typing purposes.
    116 ///
    117 /// A TypeSpec can be used to create a type.
    118 class TypeSpec : public std::string {
    119 public:
    120   static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
    121     std::vector<TypeSpec> Ret;
    122     TypeSpec Acc;
    123     for (char I : Str.str()) {
    124       if (islower(I)) {
    125         Acc.push_back(I);
    126         Ret.push_back(TypeSpec(Acc));
    127         Acc.clear();
    128       } else {
    129         Acc.push_back(I);
    130       }
    131     }
    132     return Ret;
    133   }
    134 };
    135 
    136 //===----------------------------------------------------------------------===//
    137 // Type
    138 //===----------------------------------------------------------------------===//
    139 
    140 /// A Type. Not much more to say here.
    141 class Type {
    142 private:
    143   TypeSpec TS;
    144 
    145   enum TypeKind {
    146     Void,
    147     Float,
    148     SInt,
    149     UInt,
    150     Poly,
    151     BFloat16,
    152   };
    153   TypeKind Kind;
    154   bool Immediate, Constant, Pointer;
    155   // ScalarForMangling and NoManglingQ are really not suited to live here as
    156   // they are not related to the type. But they live in the TypeSpec (not the
    157   // prototype), so this is really the only place to store them.
    158   bool ScalarForMangling, NoManglingQ;
    159   unsigned Bitwidth, ElementBitwidth, NumVectors;
    160 
    161 public:
    162   Type()
    163       : Kind(Void), Immediate(false), Constant(false),
    164         Pointer(false), ScalarForMangling(false), NoManglingQ(false),
    165         Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
    166 
    167   Type(TypeSpec TS, StringRef CharMods)
    168       : TS(std::move(TS)), Kind(Void), Immediate(false),
    169         Constant(false), Pointer(false), ScalarForMangling(false),
    170         NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {
    171     applyModifiers(CharMods);
    172   }
    173 
    174   /// Returns a type representing "void".
    175   static Type getVoid() { return Type(); }
    176 
    177   bool operator==(const Type &Other) const { return str() == Other.str(); }
    178   bool operator!=(const Type &Other) const { return !operator==(Other); }
    179 
    180   //
    181   // Query functions
    182   //
    183   bool isScalarForMangling() const { return ScalarForMangling; }
    184   bool noManglingQ() const { return NoManglingQ; }
    185 
    186   bool isPointer() const { return Pointer; }
    187   bool isValue() const { return !isVoid() && !isPointer(); }
    188   bool isScalar() const { return isValue() && NumVectors == 0; }
    189   bool isVector() const { return isValue() && NumVectors > 0; }
    190   bool isConstPointer() const { return Constant; }
    191   bool isFloating() const { return Kind == Float; }
    192   bool isInteger() const { return Kind == SInt || Kind == UInt; }
    193   bool isPoly() const { return Kind == Poly; }
    194   bool isSigned() const { return Kind == SInt; }
    195   bool isImmediate() const { return Immediate; }
    196   bool isFloat() const { return isFloating() && ElementBitwidth == 32; }
    197   bool isDouble() const { return isFloating() && ElementBitwidth == 64; }
    198   bool isHalf() const { return isFloating() && ElementBitwidth == 16; }
    199   bool isChar() const { return ElementBitwidth == 8; }
    200   bool isShort() const { return isInteger() && ElementBitwidth == 16; }
    201   bool isInt() const { return isInteger() && ElementBitwidth == 32; }
    202   bool isLong() const { return isInteger() && ElementBitwidth == 64; }
    203   bool isVoid() const { return Kind == Void; }
    204   bool isBFloat16() const { return Kind == BFloat16; }
    205   unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
    206   unsigned getSizeInBits() const { return Bitwidth; }
    207   unsigned getElementSizeInBits() const { return ElementBitwidth; }
    208   unsigned getNumVectors() const { return NumVectors; }
    209 
    210   //
    211   // Mutator functions
    212   //
    213   void makeUnsigned() {
    214     assert(!isVoid() && "not a potentially signed type");
    215     Kind = UInt;
    216   }
    217   void makeSigned() {
    218     assert(!isVoid() && "not a potentially signed type");
    219     Kind = SInt;
    220   }
    221 
    222   void makeInteger(unsigned ElemWidth, bool Sign) {
    223     assert(!isVoid() && "converting void to int probably not useful");
    224     Kind = Sign ? SInt : UInt;
    225     Immediate = false;
    226     ElementBitwidth = ElemWidth;
    227   }
    228 
    229   void makeImmediate(unsigned ElemWidth) {
    230     Kind = SInt;
    231     Immediate = true;
    232     ElementBitwidth = ElemWidth;
    233   }
    234 
    235   void makeScalar() {
    236     Bitwidth = ElementBitwidth;
    237     NumVectors = 0;
    238   }
    239 
    240   void makeOneVector() {
    241     assert(isVector());
    242     NumVectors = 1;
    243   }
    244 
    245   void make32BitElement() {
    246     assert_with_loc(Bitwidth > 32, "Not enough bits to make it 32!");
    247     ElementBitwidth = 32;
    248   }
    249 
    250   void doubleLanes() {
    251     assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
    252     Bitwidth = 128;
    253   }
    254 
    255   void halveLanes() {
    256     assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
    257     Bitwidth = 64;
    258   }
    259 
    260   /// Return the C string representation of a type, which is the typename
    261   /// defined in stdint.h or arm_neon.h.
    262   std::string str() const;
    263 
    264   /// Return the string representation of a type, which is an encoded
    265   /// string for passing to the BUILTIN() macro in Builtins.def.
    266   std::string builtin_str() const;
    267 
    268   /// Return the value in NeonTypeFlags for this type.
    269   unsigned getNeonEnum() const;
    270 
    271   /// Parse a type from a stdint.h or arm_neon.h typedef name,
    272   /// for example uint32x2_t or int64_t.
    273   static Type fromTypedefName(StringRef Name);
    274 
    275 private:
    276   /// Creates the type based on the typespec string in TS.
    277   /// Sets "Quad" to true if the "Q" or "H" modifiers were
    278   /// seen. This is needed by applyModifier as some modifiers
    279   /// only take effect if the type size was changed by "Q" or "H".
    280   void applyTypespec(bool &Quad);
    281   /// Applies prototype modifiers to the type.
    282   void applyModifiers(StringRef Mods);
    283 };
    284 
    285 //===----------------------------------------------------------------------===//
    286 // Variable
    287 //===----------------------------------------------------------------------===//
    288 
    289 /// A variable is a simple class that just has a type and a name.
    290 class Variable {
    291   Type T;
    292   std::string N;
    293 
    294 public:
    295   Variable() : T(Type::getVoid()), N("") {}
    296   Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
    297 
    298   Type getType() const { return T; }
    299   std::string getName() const { return "__" + N; }
    300 };
    301 
    302 //===----------------------------------------------------------------------===//
    303 // Intrinsic
    304 //===----------------------------------------------------------------------===//
    305 
    306 /// The main grunt class. This represents an instantiation of an intrinsic with
    307 /// a particular typespec and prototype.
    308 class Intrinsic {
    309   /// The Record this intrinsic was created from.
    310   Record *R;
    311   /// The unmangled name.
    312   std::string Name;
    313   /// The input and output typespecs. InTS == OutTS except when
    314   /// CartesianProductWith is non-empty - this is the case for vreinterpret.
    315   TypeSpec OutTS, InTS;
    316   /// The base class kind. Most intrinsics use ClassS, which has full type
    317   /// info for integers (s32/u32). Some use ClassI, which doesn't care about
    318   /// signedness (i32), while some (ClassB) have no type at all, only a width
    319   /// (32).
    320   ClassKind CK;
    321   /// The list of DAGs for the body. May be empty, in which case we should
    322   /// emit a builtin call.
    323   ListInit *Body;
    324   /// The architectural #ifdef guard.
    325   std::string Guard;
    326   /// Set if the Unavailable bit is 1. This means we don't generate a body,
    327   /// just an "unavailable" attribute on a declaration.
    328   bool IsUnavailable;
    329   /// Is this intrinsic safe for big-endian? or does it need its arguments
    330   /// reversing?
    331   bool BigEndianSafe;
    332 
    333   /// The types of return value [0] and parameters [1..].
    334   std::vector<Type> Types;
    335   /// The index of the key type passed to CGBuiltin.cpp for polymorphic calls.
    336   int PolymorphicKeyType;
    337   /// The local variables defined.
    338   std::map<std::string, Variable> Variables;
    339   /// NeededEarly - set if any other intrinsic depends on this intrinsic.
    340   bool NeededEarly;
    341   /// UseMacro - set if we should implement using a macro or unset for a
    342   ///            function.
    343   bool UseMacro;
    344   /// The set of intrinsics that this intrinsic uses/requires.
    345   std::set<Intrinsic *> Dependencies;
    346   /// The "base type", which is Type('d', OutTS). InBaseType is only
    347   /// different if CartesianProductWith is non-empty (for vreinterpret).
    348   Type BaseType, InBaseType;
    349   /// The return variable.
    350   Variable RetVar;
    351   /// A postfix to apply to every variable. Defaults to "".
    352   std::string VariablePostfix;
    353 
    354   NeonEmitter &Emitter;
    355   std::stringstream OS;
    356 
    357   bool isBigEndianSafe() const {
    358     if (BigEndianSafe)
    359       return true;
    360 
    361     for (const auto &T : Types){
    362       if (T.isVector() && T.getNumElements() > 1)
    363         return false;
    364     }
    365     return true;
    366   }
    367 
    368 public:
    369   Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
    370             TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
    371             StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
    372       : R(R), Name(Name.str()), OutTS(OutTS), InTS(InTS), CK(CK), Body(Body),
    373         Guard(Guard.str()), IsUnavailable(IsUnavailable),
    374         BigEndianSafe(BigEndianSafe), PolymorphicKeyType(0), NeededEarly(false),
    375         UseMacro(false), BaseType(OutTS, "."), InBaseType(InTS, "."),
    376         Emitter(Emitter) {
    377     // Modify the TypeSpec per-argument to get a concrete Type, and create
    378     // known variables for each.
    379     // Types[0] is the return value.
    380     unsigned Pos = 0;
    381     Types.emplace_back(OutTS, getNextModifiers(Proto, Pos));
    382     StringRef Mods = getNextModifiers(Proto, Pos);
    383     while (!Mods.empty()) {
    384       Types.emplace_back(InTS, Mods);
    385       if (Mods.find('!') != StringRef::npos)
    386         PolymorphicKeyType = Types.size() - 1;
    387 
    388       Mods = getNextModifiers(Proto, Pos);
    389     }
    390 
    391     for (auto Type : Types) {
    392       // If this builtin takes an immediate argument, we need to #define it rather
    393       // than use a standard declaration, so that SemaChecking can range check
    394       // the immediate passed by the user.
    395 
    396       // Pointer arguments need to use macros to avoid hiding aligned attributes
    397       // from the pointer type.
    398 
    399       // It is not permitted to pass or return an __fp16 by value, so intrinsics
    400       // taking a scalar float16_t must be implemented as macros.
    401       if (Type.isImmediate() || Type.isPointer() ||
    402           (Type.isScalar() && Type.isHalf()))
    403         UseMacro = true;
    404     }
    405   }
    406 
    407   /// Get the Record that this intrinsic is based off.
    408   Record *getRecord() const { return R; }
    409   /// Get the set of Intrinsics that this intrinsic calls.
    410   /// this is the set of immediate dependencies, NOT the
    411   /// transitive closure.
    412   const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
    413   /// Get the architectural guard string (#ifdef).
    414   std::string getGuard() const { return Guard; }
    415   /// Get the non-mangled name.
    416   std::string getName() const { return Name; }
    417 
    418   /// Return true if the intrinsic takes an immediate operand.
    419   bool hasImmediate() const {
    420     return std::any_of(Types.begin(), Types.end(),
    421                        [](const Type &T) { return T.isImmediate(); });
    422   }
    423 
    424   /// Return the parameter index of the immediate operand.
    425   unsigned getImmediateIdx() const {
    426     for (unsigned Idx = 0; Idx < Types.size(); ++Idx)
    427       if (Types[Idx].isImmediate())
    428         return Idx - 1;
    429     llvm_unreachable("Intrinsic has no immediate");
    430   }
    431 
    432 
    433   unsigned getNumParams() const { return Types.size() - 1; }
    434   Type getReturnType() const { return Types[0]; }
    435   Type getParamType(unsigned I) const { return Types[I + 1]; }
    436   Type getBaseType() const { return BaseType; }
    437   Type getPolymorphicKeyType() const { return Types[PolymorphicKeyType]; }
    438 
    439   /// Return true if the prototype has a scalar argument.
    440   bool protoHasScalar() const;
    441 
    442   /// Return the index that parameter PIndex will sit at
    443   /// in a generated function call. This is often just PIndex,
    444   /// but may not be as things such as multiple-vector operands
    445   /// and sret parameters need to be taken into accont.
    446   unsigned getGeneratedParamIdx(unsigned PIndex) {
    447     unsigned Idx = 0;
    448     if (getReturnType().getNumVectors() > 1)
    449       // Multiple vectors are passed as sret.
    450       ++Idx;
    451 
    452     for (unsigned I = 0; I < PIndex; ++I)
    453       Idx += std::max(1U, getParamType(I).getNumVectors());
    454 
    455     return Idx;
    456   }
    457 
    458   bool hasBody() const { return Body && !Body->getValues().empty(); }
    459 
    460   void setNeededEarly() { NeededEarly = true; }
    461 
    462   bool operator<(const Intrinsic &Other) const {
    463     // Sort lexicographically on a two-tuple (Guard, Name)
    464     if (Guard != Other.Guard)
    465       return Guard < Other.Guard;
    466     return Name < Other.Name;
    467   }
    468 
    469   ClassKind getClassKind(bool UseClassBIfScalar = false) {
    470     if (UseClassBIfScalar && !protoHasScalar())
    471       return ClassB;
    472     return CK;
    473   }
    474 
    475   /// Return the name, mangled with type information.
    476   /// If ForceClassS is true, use ClassS (u32/s32) instead
    477   /// of the intrinsic's own type class.
    478   std::string getMangledName(bool ForceClassS = false) const;
    479   /// Return the type code for a builtin function call.
    480   std::string getInstTypeCode(Type T, ClassKind CK) const;
    481   /// Return the type string for a BUILTIN() macro in Builtins.def.
    482   std::string getBuiltinTypeStr();
    483 
    484   /// Generate the intrinsic, returning code.
    485   std::string generate();
    486   /// Perform type checking and populate the dependency graph, but
    487   /// don't generate code yet.
    488   void indexBody();
    489 
    490 private:
    491   StringRef getNextModifiers(StringRef Proto, unsigned &Pos) const;
    492 
    493   std::string mangleName(std::string Name, ClassKind CK) const;
    494 
    495   void initVariables();
    496   std::string replaceParamsIn(std::string S);
    497 
    498   void emitBodyAsBuiltinCall();
    499 
    500   void generateImpl(bool ReverseArguments,
    501                     StringRef NamePrefix, StringRef CallPrefix);
    502   void emitReturn();
    503   void emitBody(StringRef CallPrefix);
    504   void emitShadowedArgs();
    505   void emitArgumentReversal();
    506   void emitReturnReversal();
    507   void emitReverseVariable(Variable &Dest, Variable &Src);
    508   void emitNewLine();
    509   void emitClosingBrace();
    510   void emitOpeningBrace();
    511   void emitPrototype(StringRef NamePrefix);
    512 
    513   class DagEmitter {
    514     Intrinsic &Intr;
    515     StringRef CallPrefix;
    516 
    517   public:
    518     DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
    519       Intr(Intr), CallPrefix(CallPrefix) {
    520     }
    521     std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
    522     std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
    523     std::pair<Type, std::string> emitDagSplat(DagInit *DI);
    524     std::pair<Type, std::string> emitDagDup(DagInit *DI);
    525     std::pair<Type, std::string> emitDagDupTyped(DagInit *DI);
    526     std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
    527     std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
    528     std::pair<Type, std::string> emitDagCall(DagInit *DI,
    529                                              bool MatchMangledName);
    530     std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
    531     std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
    532     std::pair<Type, std::string> emitDagOp(DagInit *DI);
    533     std::pair<Type, std::string> emitDag(DagInit *DI);
    534   };
    535 };
    536 
    537 //===----------------------------------------------------------------------===//
    538 // NeonEmitter
    539 //===----------------------------------------------------------------------===//
    540 
    541 class NeonEmitter {
    542   RecordKeeper &Records;
    543   DenseMap<Record *, ClassKind> ClassMap;
    544   std::map<std::string, std::deque<Intrinsic>> IntrinsicMap;
    545   unsigned UniqueNumber;
    546 
    547   void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
    548   void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
    549   void genOverloadTypeCheckCode(raw_ostream &OS,
    550                                 SmallVectorImpl<Intrinsic *> &Defs);
    551   void genIntrinsicRangeCheckCode(raw_ostream &OS,
    552                                   SmallVectorImpl<Intrinsic *> &Defs);
    553 
    554 public:
    555   /// Called by Intrinsic - this attempts to get an intrinsic that takes
    556   /// the given types as arguments.
    557   Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types,
    558                           Optional<std::string> MangledName);
    559 
    560   /// Called by Intrinsic - returns a globally-unique number.
    561   unsigned getUniqueNumber() { return UniqueNumber++; }
    562 
    563   NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
    564     Record *SI = R.getClass("SInst");
    565     Record *II = R.getClass("IInst");
    566     Record *WI = R.getClass("WInst");
    567     Record *SOpI = R.getClass("SOpInst");
    568     Record *IOpI = R.getClass("IOpInst");
    569     Record *WOpI = R.getClass("WOpInst");
    570     Record *LOpI = R.getClass("LOpInst");
    571     Record *NoTestOpI = R.getClass("NoTestOpInst");
    572 
    573     ClassMap[SI] = ClassS;
    574     ClassMap[II] = ClassI;
    575     ClassMap[WI] = ClassW;
    576     ClassMap[SOpI] = ClassS;
    577     ClassMap[IOpI] = ClassI;
    578     ClassMap[WOpI] = ClassW;
    579     ClassMap[LOpI] = ClassL;
    580     ClassMap[NoTestOpI] = ClassNoTest;
    581   }
    582 
    583   // Emit arm_neon.h.inc
    584   void run(raw_ostream &o);
    585 
    586   // Emit arm_fp16.h.inc
    587   void runFP16(raw_ostream &o);
    588 
    589   // Emit arm_bf16.h.inc
    590   void runBF16(raw_ostream &o);
    591 
    592   // Emit all the __builtin prototypes used in arm_neon.h, arm_fp16.h and
    593   // arm_bf16.h
    594   void runHeader(raw_ostream &o);
    595 };
    596 
    597 } // end anonymous namespace
    598 
    599 //===----------------------------------------------------------------------===//
    600 // Type implementation
    601 //===----------------------------------------------------------------------===//
    602 
    603 std::string Type::str() const {
    604   if (isVoid())
    605     return "void";
    606   std::string S;
    607 
    608   if (isInteger() && !isSigned())
    609     S += "u";
    610 
    611   if (isPoly())
    612     S += "poly";
    613   else if (isFloating())
    614     S += "float";
    615   else if (isBFloat16())
    616     S += "bfloat";
    617   else
    618     S += "int";
    619 
    620   S += utostr(ElementBitwidth);
    621   if (isVector())
    622     S += "x" + utostr(getNumElements());
    623   if (NumVectors > 1)
    624     S += "x" + utostr(NumVectors);
    625   S += "_t";
    626 
    627   if (Constant)
    628     S += " const";
    629   if (Pointer)
    630     S += " *";
    631 
    632   return S;
    633 }
    634 
    635 std::string Type::builtin_str() const {
    636   std::string S;
    637   if (isVoid())
    638     return "v";
    639 
    640   if (isPointer()) {
    641     // All pointers are void pointers.
    642     S = "v";
    643     if (isConstPointer())
    644       S += "C";
    645     S += "*";
    646     return S;
    647   } else if (isInteger())
    648     switch (ElementBitwidth) {
    649     case 8: S += "c"; break;
    650     case 16: S += "s"; break;
    651     case 32: S += "i"; break;
    652     case 64: S += "Wi"; break;
    653     case 128: S += "LLLi"; break;
    654     default: llvm_unreachable("Unhandled case!");
    655     }
    656   else if (isBFloat16()) {
    657     assert(ElementBitwidth == 16 && "BFloat16 can only be 16 bits");
    658     S += "y";
    659   } else
    660     switch (ElementBitwidth) {
    661     case 16: S += "h"; break;
    662     case 32: S += "f"; break;
    663     case 64: S += "d"; break;
    664     default: llvm_unreachable("Unhandled case!");
    665     }
    666 
    667   // FIXME: NECESSARY???????????????????????????????????????????????????????????????????????
    668   if (isChar() && !isPointer() && isSigned())
    669     // Make chars explicitly signed.
    670     S = "S" + S;
    671   else if (isInteger() && !isSigned())
    672     S = "U" + S;
    673 
    674   // Constant indices are "int", but have the "constant expression" modifier.
    675   if (isImmediate()) {
    676     assert(isInteger() && isSigned());
    677     S = "I" + S;
    678   }
    679 
    680   if (isScalar())
    681     return S;
    682 
    683   std::string Ret;
    684   for (unsigned I = 0; I < NumVectors; ++I)
    685     Ret += "V" + utostr(getNumElements()) + S;
    686 
    687   return Ret;
    688 }
    689 
    690 unsigned Type::getNeonEnum() const {
    691   unsigned Addend;
    692   switch (ElementBitwidth) {
    693   case 8: Addend = 0; break;
    694   case 16: Addend = 1; break;
    695   case 32: Addend = 2; break;
    696   case 64: Addend = 3; break;
    697   case 128: Addend = 4; break;
    698   default: llvm_unreachable("Unhandled element bitwidth!");
    699   }
    700 
    701   unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
    702   if (isPoly()) {
    703     // Adjustment needed because Poly32 doesn't exist.
    704     if (Addend >= 2)
    705       --Addend;
    706     Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
    707   }
    708   if (isFloating()) {
    709     assert(Addend != 0 && "Float8 doesn't exist!");
    710     Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
    711   }
    712 
    713   if (isBFloat16()) {
    714     assert(Addend == 1 && "BFloat16 is only 16 bit");
    715     Base = (unsigned)NeonTypeFlags::BFloat16;
    716   }
    717 
    718   if (Bitwidth == 128)
    719     Base |= (unsigned)NeonTypeFlags::QuadFlag;
    720   if (isInteger() && !isSigned())
    721     Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
    722 
    723   return Base;
    724 }
    725 
    726 Type Type::fromTypedefName(StringRef Name) {
    727   Type T;
    728   T.Kind = SInt;
    729 
    730   if (Name.front() == 'u') {
    731     T.Kind = UInt;
    732     Name = Name.drop_front();
    733   }
    734 
    735   if (Name.startswith("float")) {
    736     T.Kind = Float;
    737     Name = Name.drop_front(5);
    738   } else if (Name.startswith("poly")) {
    739     T.Kind = Poly;
    740     Name = Name.drop_front(4);
    741   } else if (Name.startswith("bfloat")) {
    742     T.Kind = BFloat16;
    743     Name = Name.drop_front(6);
    744   } else {
    745     assert(Name.startswith("int"));
    746     Name = Name.drop_front(3);
    747   }
    748 
    749   unsigned I = 0;
    750   for (I = 0; I < Name.size(); ++I) {
    751     if (!isdigit(Name[I]))
    752       break;
    753   }
    754   Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
    755   Name = Name.drop_front(I);
    756 
    757   T.Bitwidth = T.ElementBitwidth;
    758   T.NumVectors = 1;
    759 
    760   if (Name.front() == 'x') {
    761     Name = Name.drop_front();
    762     unsigned I = 0;
    763     for (I = 0; I < Name.size(); ++I) {
    764       if (!isdigit(Name[I]))
    765         break;
    766     }
    767     unsigned NumLanes;
    768     Name.substr(0, I).getAsInteger(10, NumLanes);
    769     Name = Name.drop_front(I);
    770     T.Bitwidth = T.ElementBitwidth * NumLanes;
    771   } else {
    772     // Was scalar.
    773     T.NumVectors = 0;
    774   }
    775   if (Name.front() == 'x') {
    776     Name = Name.drop_front();
    777     unsigned I = 0;
    778     for (I = 0; I < Name.size(); ++I) {
    779       if (!isdigit(Name[I]))
    780         break;
    781     }
    782     Name.substr(0, I).getAsInteger(10, T.NumVectors);
    783     Name = Name.drop_front(I);
    784   }
    785 
    786   assert(Name.startswith("_t") && "Malformed typedef!");
    787   return T;
    788 }
    789 
    790 void Type::applyTypespec(bool &Quad) {
    791   std::string S = TS;
    792   ScalarForMangling = false;
    793   Kind = SInt;
    794   ElementBitwidth = ~0U;
    795   NumVectors = 1;
    796 
    797   for (char I : S) {
    798     switch (I) {
    799     case 'S':
    800       ScalarForMangling = true;
    801       break;
    802     case 'H':
    803       NoManglingQ = true;
    804       Quad = true;
    805       break;
    806     case 'Q':
    807       Quad = true;
    808       break;
    809     case 'P':
    810       Kind = Poly;
    811       break;
    812     case 'U':
    813       Kind = UInt;
    814       break;
    815     case 'c':
    816       ElementBitwidth = 8;
    817       break;
    818     case 'h':
    819       Kind = Float;
    820       LLVM_FALLTHROUGH;
    821     case 's':
    822       ElementBitwidth = 16;
    823       break;
    824     case 'f':
    825       Kind = Float;
    826       LLVM_FALLTHROUGH;
    827     case 'i':
    828       ElementBitwidth = 32;
    829       break;
    830     case 'd':
    831       Kind = Float;
    832       LLVM_FALLTHROUGH;
    833     case 'l':
    834       ElementBitwidth = 64;
    835       break;
    836     case 'k':
    837       ElementBitwidth = 128;
    838       // Poly doesn't have a 128x1 type.
    839       if (isPoly())
    840         NumVectors = 0;
    841       break;
    842     case 'b':
    843       Kind = BFloat16;
    844       ElementBitwidth = 16;
    845       break;
    846     default:
    847       llvm_unreachable("Unhandled type code!");
    848     }
    849   }
    850   assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
    851 
    852   Bitwidth = Quad ? 128 : 64;
    853 }
    854 
    855 void Type::applyModifiers(StringRef Mods) {
    856   bool AppliedQuad = false;
    857   applyTypespec(AppliedQuad);
    858 
    859   for (char Mod : Mods) {
    860     switch (Mod) {
    861     case '.':
    862       break;
    863     case 'v':
    864       Kind = Void;
    865       break;
    866     case 'S':
    867       Kind = SInt;
    868       break;
    869     case 'U':
    870       Kind = UInt;
    871       break;
    872     case 'B':
    873       Kind = BFloat16;
    874       ElementBitwidth = 16;
    875       break;
    876     case 'F':
    877       Kind = Float;
    878       break;
    879     case 'P':
    880       Kind = Poly;
    881       break;
    882     case '>':
    883       assert(ElementBitwidth < 128);
    884       ElementBitwidth *= 2;
    885       break;
    886     case '<':
    887       assert(ElementBitwidth > 8);
    888       ElementBitwidth /= 2;
    889       break;
    890     case '1':
    891       NumVectors = 0;
    892       break;
    893     case '2':
    894       NumVectors = 2;
    895       break;
    896     case '3':
    897       NumVectors = 3;
    898       break;
    899     case '4':
    900       NumVectors = 4;
    901       break;
    902     case '*':
    903       Pointer = true;
    904       break;
    905     case 'c':
    906       Constant = true;
    907       break;
    908     case 'Q':
    909       Bitwidth = 128;
    910       break;
    911     case 'q':
    912       Bitwidth = 64;
    913       break;
    914     case 'I':
    915       Kind = SInt;
    916       ElementBitwidth = Bitwidth = 32;
    917       NumVectors = 0;
    918       Immediate = true;
    919       break;
    920     case 'p':
    921       if (isPoly())
    922         Kind = UInt;
    923       break;
    924     case '!':
    925       // Key type, handled elsewhere.
    926       break;
    927     default:
    928       llvm_unreachable("Unhandled character!");
    929     }
    930   }
    931 }
    932 
    933 //===----------------------------------------------------------------------===//
    934 // Intrinsic implementation
    935 //===----------------------------------------------------------------------===//
    936 
    937 StringRef Intrinsic::getNextModifiers(StringRef Proto, unsigned &Pos) const {
    938   if (Proto.size() == Pos)
    939     return StringRef();
    940   else if (Proto[Pos] != '(')
    941     return Proto.substr(Pos++, 1);
    942 
    943   size_t Start = Pos + 1;
    944   size_t End = Proto.find(')', Start);
    945   assert_with_loc(End != StringRef::npos, "unmatched modifier group paren");
    946   Pos = End + 1;
    947   return Proto.slice(Start, End);
    948 }
    949 
    950 std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
    951   char typeCode = '\0';
    952   bool printNumber = true;
    953 
    954   if (CK == ClassB)
    955     return "";
    956 
    957   if (T.isBFloat16())
    958     return "bf16";
    959 
    960   if (T.isPoly())
    961     typeCode = 'p';
    962   else if (T.isInteger())
    963     typeCode = T.isSigned() ? 's' : 'u';
    964   else
    965     typeCode = 'f';
    966 
    967   if (CK == ClassI) {
    968     switch (typeCode) {
    969     default:
    970       break;
    971     case 's':
    972     case 'u':
    973     case 'p':
    974       typeCode = 'i';
    975       break;
    976     }
    977   }
    978   if (CK == ClassB) {
    979     typeCode = '\0';
    980   }
    981 
    982   std::string S;
    983   if (typeCode != '\0')
    984     S.push_back(typeCode);
    985   if (printNumber)
    986     S += utostr(T.getElementSizeInBits());
    987 
    988   return S;
    989 }
    990 
    991 std::string Intrinsic::getBuiltinTypeStr() {
    992   ClassKind LocalCK = getClassKind(true);
    993   std::string S;
    994 
    995   Type RetT = getReturnType();
    996   if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
    997       !RetT.isFloating() && !RetT.isBFloat16())
    998     RetT.makeInteger(RetT.getElementSizeInBits(), false);
    999 
   1000   // Since the return value must be one type, return a vector type of the
   1001   // appropriate width which we will bitcast.  An exception is made for
   1002   // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
   1003   // fashion, storing them to a pointer arg.
   1004   if (RetT.getNumVectors() > 1) {
   1005     S += "vv*"; // void result with void* first argument
   1006   } else {
   1007     if (RetT.isPoly())
   1008       RetT.makeInteger(RetT.getElementSizeInBits(), false);
   1009     if (!RetT.isScalar() && RetT.isInteger() && !RetT.isSigned())
   1010       RetT.makeSigned();
   1011 
   1012     if (LocalCK == ClassB && RetT.isValue() && !RetT.isScalar())
   1013       // Cast to vector of 8-bit elements.
   1014       RetT.makeInteger(8, true);
   1015 
   1016     S += RetT.builtin_str();
   1017   }
   1018 
   1019   for (unsigned I = 0; I < getNumParams(); ++I) {
   1020     Type T = getParamType(I);
   1021     if (T.isPoly())
   1022       T.makeInteger(T.getElementSizeInBits(), false);
   1023 
   1024     if (LocalCK == ClassB && !T.isScalar())
   1025       T.makeInteger(8, true);
   1026     // Halves always get converted to 8-bit elements.
   1027     if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
   1028       T.makeInteger(8, true);
   1029 
   1030     if (LocalCK == ClassI && T.isInteger())
   1031       T.makeSigned();
   1032 
   1033     if (hasImmediate() && getImmediateIdx() == I)
   1034       T.makeImmediate(32);
   1035 
   1036     S += T.builtin_str();
   1037   }
   1038 
   1039   // Extra constant integer to hold type class enum for this function, e.g. s8
   1040   if (LocalCK == ClassB)
   1041     S += "i";
   1042 
   1043   return S;
   1044 }
   1045 
   1046 std::string Intrinsic::getMangledName(bool ForceClassS) const {
   1047   // Check if the prototype has a scalar operand with the type of the vector
   1048   // elements.  If not, bitcasting the args will take care of arg checking.
   1049   // The actual signedness etc. will be taken care of with special enums.
   1050   ClassKind LocalCK = CK;
   1051   if (!protoHasScalar())
   1052     LocalCK = ClassB;
   1053 
   1054   return mangleName(Name, ForceClassS ? ClassS : LocalCK);
   1055 }
   1056 
   1057 std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
   1058   std::string typeCode = getInstTypeCode(BaseType, LocalCK);
   1059   std::string S = Name;
   1060 
   1061   if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
   1062       Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32" ||
   1063       Name == "vcvt_f32_bf16")
   1064     return Name;
   1065 
   1066   if (!typeCode.empty()) {
   1067     // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
   1068     if (Name.size() >= 3 && isdigit(Name.back()) &&
   1069         Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
   1070       S.insert(S.length() - 3, "_" + typeCode);
   1071     else
   1072       S += "_" + typeCode;
   1073   }
   1074 
   1075   if (BaseType != InBaseType) {
   1076     // A reinterpret - out the input base type at the end.
   1077     S += "_" + getInstTypeCode(InBaseType, LocalCK);
   1078   }
   1079 
   1080   if (LocalCK == ClassB)
   1081     S += "_v";
   1082 
   1083   // Insert a 'q' before the first '_' character so that it ends up before
   1084   // _lane or _n on vector-scalar operations.
   1085   if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
   1086     size_t Pos = S.find('_');
   1087     S.insert(Pos, "q");
   1088   }
   1089 
   1090   char Suffix = '\0';
   1091   if (BaseType.isScalarForMangling()) {
   1092     switch (BaseType.getElementSizeInBits()) {
   1093     case 8: Suffix = 'b'; break;
   1094     case 16: Suffix = 'h'; break;
   1095     case 32: Suffix = 's'; break;
   1096     case 64: Suffix = 'd'; break;
   1097     default: llvm_unreachable("Bad suffix!");
   1098     }
   1099   }
   1100   if (Suffix != '\0') {
   1101     size_t Pos = S.find('_');
   1102     S.insert(Pos, &Suffix, 1);
   1103   }
   1104 
   1105   return S;
   1106 }
   1107 
   1108 std::string Intrinsic::replaceParamsIn(std::string S) {
   1109   while (S.find('$') != std::string::npos) {
   1110     size_t Pos = S.find('$');
   1111     size_t End = Pos + 1;
   1112     while (isalpha(S[End]))
   1113       ++End;
   1114 
   1115     std::string VarName = S.substr(Pos + 1, End - Pos - 1);
   1116     assert_with_loc(Variables.find(VarName) != Variables.end(),
   1117                     "Variable not defined!");
   1118     S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
   1119   }
   1120 
   1121   return S;
   1122 }
   1123 
   1124 void Intrinsic::initVariables() {
   1125   Variables.clear();
   1126 
   1127   // Modify the TypeSpec per-argument to get a concrete Type, and create
   1128   // known variables for each.
   1129   for (unsigned I = 1; I < Types.size(); ++I) {
   1130     char NameC = '0' + (I - 1);
   1131     std::string Name = "p";
   1132     Name.push_back(NameC);
   1133 
   1134     Variables[Name] = Variable(Types[I], Name + VariablePostfix);
   1135   }
   1136   RetVar = Variable(Types[0], "ret" + VariablePostfix);
   1137 }
   1138 
   1139 void Intrinsic::emitPrototype(StringRef NamePrefix) {
   1140   if (UseMacro)
   1141     OS << "#define ";
   1142   else
   1143     OS << "__ai " << Types[0].str() << " ";
   1144 
   1145   OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
   1146 
   1147   for (unsigned I = 0; I < getNumParams(); ++I) {
   1148     if (I != 0)
   1149       OS << ", ";
   1150 
   1151     char NameC = '0' + I;
   1152     std::string Name = "p";
   1153     Name.push_back(NameC);
   1154     assert(Variables.find(Name) != Variables.end());
   1155     Variable &V = Variables[Name];
   1156 
   1157     if (!UseMacro)
   1158       OS << V.getType().str() << " ";
   1159     OS << V.getName();
   1160   }
   1161 
   1162   OS << ")";
   1163 }
   1164 
   1165 void Intrinsic::emitOpeningBrace() {
   1166   if (UseMacro)
   1167     OS << " __extension__ ({";
   1168   else
   1169     OS << " {";
   1170   emitNewLine();
   1171 }
   1172 
   1173 void Intrinsic::emitClosingBrace() {
   1174   if (UseMacro)
   1175     OS << "})";
   1176   else
   1177     OS << "}";
   1178 }
   1179 
   1180 void Intrinsic::emitNewLine() {
   1181   if (UseMacro)
   1182     OS << " \\\n";
   1183   else
   1184     OS << "\n";
   1185 }
   1186 
   1187 void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
   1188   if (Dest.getType().getNumVectors() > 1) {
   1189     emitNewLine();
   1190 
   1191     for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
   1192       OS << "  " << Dest.getName() << ".val[" << K << "] = "
   1193          << "__builtin_shufflevector("
   1194          << Src.getName() << ".val[" << K << "], "
   1195          << Src.getName() << ".val[" << K << "]";
   1196       for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
   1197         OS << ", " << J;
   1198       OS << ");";
   1199       emitNewLine();
   1200     }
   1201   } else {
   1202     OS << "  " << Dest.getName()
   1203        << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
   1204     for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
   1205       OS << ", " << J;
   1206     OS << ");";
   1207     emitNewLine();
   1208   }
   1209 }
   1210 
   1211 void Intrinsic::emitArgumentReversal() {
   1212   if (isBigEndianSafe())
   1213     return;
   1214 
   1215   // Reverse all vector arguments.
   1216   for (unsigned I = 0; I < getNumParams(); ++I) {
   1217     std::string Name = "p" + utostr(I);
   1218     std::string NewName = "rev" + utostr(I);
   1219 
   1220     Variable &V = Variables[Name];
   1221     Variable NewV(V.getType(), NewName + VariablePostfix);
   1222 
   1223     if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
   1224       continue;
   1225 
   1226     OS << "  " << NewV.getType().str() << " " << NewV.getName() << ";";
   1227     emitReverseVariable(NewV, V);
   1228     V = NewV;
   1229   }
   1230 }
   1231 
   1232 void Intrinsic::emitReturnReversal() {
   1233   if (isBigEndianSafe())
   1234     return;
   1235   if (!getReturnType().isVector() || getReturnType().isVoid() ||
   1236       getReturnType().getNumElements() == 1)
   1237     return;
   1238   emitReverseVariable(RetVar, RetVar);
   1239 }
   1240 
   1241 void Intrinsic::emitShadowedArgs() {
   1242   // Macro arguments are not type-checked like inline function arguments,
   1243   // so assign them to local temporaries to get the right type checking.
   1244   if (!UseMacro)
   1245     return;
   1246 
   1247   for (unsigned I = 0; I < getNumParams(); ++I) {
   1248     // Do not create a temporary for an immediate argument.
   1249     // That would defeat the whole point of using a macro!
   1250     if (getParamType(I).isImmediate())
   1251       continue;
   1252     // Do not create a temporary for pointer arguments. The input
   1253     // pointer may have an alignment hint.
   1254     if (getParamType(I).isPointer())
   1255       continue;
   1256 
   1257     std::string Name = "p" + utostr(I);
   1258 
   1259     assert(Variables.find(Name) != Variables.end());
   1260     Variable &V = Variables[Name];
   1261 
   1262     std::string NewName = "s" + utostr(I);
   1263     Variable V2(V.getType(), NewName + VariablePostfix);
   1264 
   1265     OS << "  " << V2.getType().str() << " " << V2.getName() << " = "
   1266        << V.getName() << ";";
   1267     emitNewLine();
   1268 
   1269     V = V2;
   1270   }
   1271 }
   1272 
   1273 bool Intrinsic::protoHasScalar() const {
   1274   return std::any_of(Types.begin(), Types.end(), [](const Type &T) {
   1275     return T.isScalar() && !T.isImmediate();
   1276   });
   1277 }
   1278 
   1279 void Intrinsic::emitBodyAsBuiltinCall() {
   1280   std::string S;
   1281 
   1282   // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
   1283   // sret-like argument.
   1284   bool SRet = getReturnType().getNumVectors() >= 2;
   1285 
   1286   StringRef N = Name;
   1287   ClassKind LocalCK = CK;
   1288   if (!protoHasScalar())
   1289     LocalCK = ClassB;
   1290 
   1291   if (!getReturnType().isVoid() && !SRet)
   1292     S += "(" + RetVar.getType().str() + ") ";
   1293 
   1294   S += "__builtin_neon_" + mangleName(std::string(N), LocalCK) + "(";
   1295 
   1296   if (SRet)
   1297     S += "&" + RetVar.getName() + ", ";
   1298 
   1299   for (unsigned I = 0; I < getNumParams(); ++I) {
   1300     Variable &V = Variables["p" + utostr(I)];
   1301     Type T = V.getType();
   1302 
   1303     // Handle multiple-vector values specially, emitting each subvector as an
   1304     // argument to the builtin.
   1305     if (T.getNumVectors() > 1) {
   1306       // Check if an explicit cast is needed.
   1307       std::string Cast;
   1308       if (LocalCK == ClassB) {
   1309         Type T2 = T;
   1310         T2.makeOneVector();
   1311         T2.makeInteger(8, /*Signed=*/true);
   1312         Cast = "(" + T2.str() + ")";
   1313       }
   1314 
   1315       for (unsigned J = 0; J < T.getNumVectors(); ++J)
   1316         S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
   1317       continue;
   1318     }
   1319 
   1320     std::string Arg = V.getName();
   1321     Type CastToType = T;
   1322 
   1323     // Check if an explicit cast is needed.
   1324     if (CastToType.isVector() &&
   1325         (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) {
   1326       CastToType.makeInteger(8, true);
   1327       Arg = "(" + CastToType.str() + ")" + Arg;
   1328     } else if (CastToType.isVector() && LocalCK == ClassI) {
   1329       if (CastToType.isInteger())
   1330         CastToType.makeSigned();
   1331       Arg = "(" + CastToType.str() + ")" + Arg;
   1332     }
   1333 
   1334     S += Arg + ", ";
   1335   }
   1336 
   1337   // Extra constant integer to hold type class enum for this function, e.g. s8
   1338   if (getClassKind(true) == ClassB) {
   1339     S += utostr(getPolymorphicKeyType().getNeonEnum());
   1340   } else {
   1341     // Remove extraneous ", ".
   1342     S.pop_back();
   1343     S.pop_back();
   1344   }
   1345   S += ");";
   1346 
   1347   std::string RetExpr;
   1348   if (!SRet && !RetVar.getType().isVoid())
   1349     RetExpr = RetVar.getName() + " = ";
   1350 
   1351   OS << "  " << RetExpr << S;
   1352   emitNewLine();
   1353 }
   1354 
   1355 void Intrinsic::emitBody(StringRef CallPrefix) {
   1356   std::vector<std::string> Lines;
   1357 
   1358   assert(RetVar.getType() == Types[0]);
   1359   // Create a return variable, if we're not void.
   1360   if (!RetVar.getType().isVoid()) {
   1361     OS << "  " << RetVar.getType().str() << " " << RetVar.getName() << ";";
   1362     emitNewLine();
   1363   }
   1364 
   1365   if (!Body || Body->getValues().empty()) {
   1366     // Nothing specific to output - must output a builtin.
   1367     emitBodyAsBuiltinCall();
   1368     return;
   1369   }
   1370 
   1371   // We have a list of "things to output". The last should be returned.
   1372   for (auto *I : Body->getValues()) {
   1373     if (StringInit *SI = dyn_cast<StringInit>(I)) {
   1374       Lines.push_back(replaceParamsIn(SI->getAsString()));
   1375     } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
   1376       DagEmitter DE(*this, CallPrefix);
   1377       Lines.push_back(DE.emitDag(DI).second + ";");
   1378     }
   1379   }
   1380 
   1381   assert(!Lines.empty() && "Empty def?");
   1382   if (!RetVar.getType().isVoid())
   1383     Lines.back().insert(0, RetVar.getName() + " = ");
   1384 
   1385   for (auto &L : Lines) {
   1386     OS << "  " << L;
   1387     emitNewLine();
   1388   }
   1389 }
   1390 
   1391 void Intrinsic::emitReturn() {
   1392   if (RetVar.getType().isVoid())
   1393     return;
   1394   if (UseMacro)
   1395     OS << "  " << RetVar.getName() << ";";
   1396   else
   1397     OS << "  return " << RetVar.getName() << ";";
   1398   emitNewLine();
   1399 }
   1400 
   1401 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
   1402   // At this point we should only be seeing a def.
   1403   DefInit *DefI = cast<DefInit>(DI->getOperator());
   1404   std::string Op = DefI->getAsString();
   1405 
   1406   if (Op == "cast" || Op == "bitcast")
   1407     return emitDagCast(DI, Op == "bitcast");
   1408   if (Op == "shuffle")
   1409     return emitDagShuffle(DI);
   1410   if (Op == "dup")
   1411     return emitDagDup(DI);
   1412   if (Op == "dup_typed")
   1413     return emitDagDupTyped(DI);
   1414   if (Op == "splat")
   1415     return emitDagSplat(DI);
   1416   if (Op == "save_temp")
   1417     return emitDagSaveTemp(DI);
   1418   if (Op == "op")
   1419     return emitDagOp(DI);
   1420   if (Op == "call" || Op == "call_mangled")
   1421     return emitDagCall(DI, Op == "call_mangled");
   1422   if (Op == "name_replace")
   1423     return emitDagNameReplace(DI);
   1424   if (Op == "literal")
   1425     return emitDagLiteral(DI);
   1426   assert_with_loc(false, "Unknown operation!");
   1427   return std::make_pair(Type::getVoid(), "");
   1428 }
   1429 
   1430 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
   1431   std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
   1432   if (DI->getNumArgs() == 2) {
   1433     // Unary op.
   1434     std::pair<Type, std::string> R =
   1435         emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
   1436     return std::make_pair(R.first, Op + R.second);
   1437   } else {
   1438     assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
   1439     std::pair<Type, std::string> R1 =
   1440         emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
   1441     std::pair<Type, std::string> R2 =
   1442         emitDagArg(DI->getArg(2), std::string(DI->getArgNameStr(2)));
   1443     assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
   1444     return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
   1445   }
   1446 }
   1447 
   1448 std::pair<Type, std::string>
   1449 Intrinsic::DagEmitter::emitDagCall(DagInit *DI, bool MatchMangledName) {
   1450   std::vector<Type> Types;
   1451   std::vector<std::string> Values;
   1452   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
   1453     std::pair<Type, std::string> R =
   1454         emitDagArg(DI->getArg(I + 1), std::string(DI->getArgNameStr(I + 1)));
   1455     Types.push_back(R.first);
   1456     Values.push_back(R.second);
   1457   }
   1458 
   1459   // Look up the called intrinsic.
   1460   std::string N;
   1461   if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
   1462     N = SI->getAsUnquotedString();
   1463   else
   1464     N = emitDagArg(DI->getArg(0), "").second;
   1465   Optional<std::string> MangledName;
   1466   if (MatchMangledName) {
   1467     if (Intr.getRecord()->getValueAsBit("isLaneQ"))
   1468       N += "q";
   1469     MangledName = Intr.mangleName(N, ClassS);
   1470   }
   1471   Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types, MangledName);
   1472 
   1473   // Make sure the callee is known as an early def.
   1474   Callee.setNeededEarly();
   1475   Intr.Dependencies.insert(&Callee);
   1476 
   1477   // Now create the call itself.
   1478   std::string S = "";
   1479   if (!Callee.isBigEndianSafe())
   1480     S += CallPrefix.str();
   1481   S += Callee.getMangledName(true) + "(";
   1482   for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
   1483     if (I != 0)
   1484       S += ", ";
   1485     S += Values[I];
   1486   }
   1487   S += ")";
   1488 
   1489   return std::make_pair(Callee.getReturnType(), S);
   1490 }
   1491 
   1492 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
   1493                                                                 bool IsBitCast){
   1494   // (cast MOD* VAL) -> cast VAL to type given by MOD.
   1495   std::pair<Type, std::string> R =
   1496       emitDagArg(DI->getArg(DI->getNumArgs() - 1),
   1497                  std::string(DI->getArgNameStr(DI->getNumArgs() - 1)));
   1498   Type castToType = R.first;
   1499   for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
   1500 
   1501     // MOD can take several forms:
   1502     //   1. $X - take the type of parameter / variable X.
   1503     //   2. The value "R" - take the type of the return type.
   1504     //   3. a type string
   1505     //   4. The value "U" or "S" to switch the signedness.
   1506     //   5. The value "H" or "D" to half or double the bitwidth.
   1507     //   6. The value "8" to convert to 8-bit (signed) integer lanes.
   1508     if (!DI->getArgNameStr(ArgIdx).empty()) {
   1509       assert_with_loc(Intr.Variables.find(std::string(
   1510                           DI->getArgNameStr(ArgIdx))) != Intr.Variables.end(),
   1511                       "Variable not found");
   1512       castToType =
   1513           Intr.Variables[std::string(DI->getArgNameStr(ArgIdx))].getType();
   1514     } else {
   1515       StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
   1516       assert_with_loc(SI, "Expected string type or $Name for cast type");
   1517 
   1518       if (SI->getAsUnquotedString() == "R") {
   1519         castToType = Intr.getReturnType();
   1520       } else if (SI->getAsUnquotedString() == "U") {
   1521         castToType.makeUnsigned();
   1522       } else if (SI->getAsUnquotedString() == "S") {
   1523         castToType.makeSigned();
   1524       } else if (SI->getAsUnquotedString() == "H") {
   1525         castToType.halveLanes();
   1526       } else if (SI->getAsUnquotedString() == "D") {
   1527         castToType.doubleLanes();
   1528       } else if (SI->getAsUnquotedString() == "8") {
   1529         castToType.makeInteger(8, true);
   1530       } else if (SI->getAsUnquotedString() == "32") {
   1531         castToType.make32BitElement();
   1532       } else {
   1533         castToType = Type::fromTypedefName(SI->getAsUnquotedString());
   1534         assert_with_loc(!castToType.isVoid(), "Unknown typedef");
   1535       }
   1536     }
   1537   }
   1538 
   1539   std::string S;
   1540   if (IsBitCast) {
   1541     // Emit a reinterpret cast. The second operand must be an lvalue, so create
   1542     // a temporary.
   1543     std::string N = "reint";
   1544     unsigned I = 0;
   1545     while (Intr.Variables.find(N) != Intr.Variables.end())
   1546       N = "reint" + utostr(++I);
   1547     Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
   1548 
   1549     Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
   1550             << R.second << ";";
   1551     Intr.emitNewLine();
   1552 
   1553     S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
   1554   } else {
   1555     // Emit a normal (static) cast.
   1556     S = "(" + castToType.str() + ")(" + R.second + ")";
   1557   }
   1558 
   1559   return std::make_pair(castToType, S);
   1560 }
   1561 
   1562 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
   1563   // See the documentation in arm_neon.td for a description of these operators.
   1564   class LowHalf : public SetTheory::Operator {
   1565   public:
   1566     void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
   1567                ArrayRef<SMLoc> Loc) override {
   1568       SetTheory::RecSet Elts2;
   1569       ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
   1570       Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
   1571     }
   1572   };
   1573 
   1574   class HighHalf : public SetTheory::Operator {
   1575   public:
   1576     void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
   1577                ArrayRef<SMLoc> Loc) override {
   1578       SetTheory::RecSet Elts2;
   1579       ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
   1580       Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
   1581     }
   1582   };
   1583 
   1584   class Rev : public SetTheory::Operator {
   1585     unsigned ElementSize;
   1586 
   1587   public:
   1588     Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
   1589 
   1590     void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
   1591                ArrayRef<SMLoc> Loc) override {
   1592       SetTheory::RecSet Elts2;
   1593       ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
   1594 
   1595       int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
   1596       VectorSize /= ElementSize;
   1597 
   1598       std::vector<Record *> Revved;
   1599       for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
   1600         for (int LI = VectorSize - 1; LI >= 0; --LI) {
   1601           Revved.push_back(Elts2[VI + LI]);
   1602         }
   1603       }
   1604 
   1605       Elts.insert(Revved.begin(), Revved.end());
   1606     }
   1607   };
   1608 
   1609   class MaskExpander : public SetTheory::Expander {
   1610     unsigned N;
   1611 
   1612   public:
   1613     MaskExpander(unsigned N) : N(N) {}
   1614 
   1615     void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {
   1616       unsigned Addend = 0;
   1617       if (R->getName() == "mask0")
   1618         Addend = 0;
   1619       else if (R->getName() == "mask1")
   1620         Addend = N;
   1621       else
   1622         return;
   1623       for (unsigned I = 0; I < N; ++I)
   1624         Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
   1625     }
   1626   };
   1627 
   1628   // (shuffle arg1, arg2, sequence)
   1629   std::pair<Type, std::string> Arg1 =
   1630       emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
   1631   std::pair<Type, std::string> Arg2 =
   1632       emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
   1633   assert_with_loc(Arg1.first == Arg2.first,
   1634                   "Different types in arguments to shuffle!");
   1635 
   1636   SetTheory ST;
   1637   SetTheory::RecSet Elts;
   1638   ST.addOperator("lowhalf", std::make_unique<LowHalf>());
   1639   ST.addOperator("highhalf", std::make_unique<HighHalf>());
   1640   ST.addOperator("rev",
   1641                  std::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
   1642   ST.addExpander("MaskExpand",
   1643                  std::make_unique<MaskExpander>(Arg1.first.getNumElements()));
   1644   ST.evaluate(DI->getArg(2), Elts, None);
   1645 
   1646   std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
   1647   for (auto &E : Elts) {
   1648     StringRef Name = E->getName();
   1649     assert_with_loc(Name.startswith("sv"),
   1650                     "Incorrect element kind in shuffle mask!");
   1651     S += ", " + Name.drop_front(2).str();
   1652   }
   1653   S += ")";
   1654 
   1655   // Recalculate the return type - the shuffle may have halved or doubled it.
   1656   Type T(Arg1.first);
   1657   if (Elts.size() > T.getNumElements()) {
   1658     assert_with_loc(
   1659         Elts.size() == T.getNumElements() * 2,
   1660         "Can only double or half the number of elements in a shuffle!");
   1661     T.doubleLanes();
   1662   } else if (Elts.size() < T.getNumElements()) {
   1663     assert_with_loc(
   1664         Elts.size() == T.getNumElements() / 2,
   1665         "Can only double or half the number of elements in a shuffle!");
   1666     T.halveLanes();
   1667   }
   1668 
   1669   return std::make_pair(T, S);
   1670 }
   1671 
   1672 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
   1673   assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
   1674   std::pair<Type, std::string> A =
   1675       emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
   1676   assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
   1677 
   1678   Type T = Intr.getBaseType();
   1679   assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
   1680   std::string S = "(" + T.str() + ") {";
   1681   for (unsigned I = 0; I < T.getNumElements(); ++I) {
   1682     if (I != 0)
   1683       S += ", ";
   1684     S += A.second;
   1685   }
   1686   S += "}";
   1687 
   1688   return std::make_pair(T, S);
   1689 }
   1690 
   1691 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) {
   1692   assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");
   1693   std::pair<Type, std::string> B =
   1694       emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
   1695   assert_with_loc(B.first.isScalar(),
   1696                   "dup_typed() requires a scalar as the second argument");
   1697   Type T;
   1698   // If the type argument is a constant string, construct the type directly.
   1699   if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0))) {
   1700     T = Type::fromTypedefName(SI->getAsUnquotedString());
   1701     assert_with_loc(!T.isVoid(), "Unknown typedef");
   1702   } else
   1703     T = emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0))).first;
   1704 
   1705   assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!");
   1706   std::string S = "(" + T.str() + ") {";
   1707   for (unsigned I = 0; I < T.getNumElements(); ++I) {
   1708     if (I != 0)
   1709       S += ", ";
   1710     S += B.second;
   1711   }
   1712   S += "}";
   1713 
   1714   return std::make_pair(T, S);
   1715 }
   1716 
   1717 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
   1718   assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
   1719   std::pair<Type, std::string> A =
   1720       emitDagArg(DI->getArg(0), std::string(DI->getArgNameStr(0)));
   1721   std::pair<Type, std::string> B =
   1722       emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
   1723 
   1724   assert_with_loc(B.first.isScalar(),
   1725                   "splat() requires a scalar int as the second argument");
   1726 
   1727   std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
   1728   for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
   1729     S += ", " + B.second;
   1730   }
   1731   S += ")";
   1732 
   1733   return std::make_pair(Intr.getBaseType(), S);
   1734 }
   1735 
   1736 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
   1737   assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
   1738   std::pair<Type, std::string> A =
   1739       emitDagArg(DI->getArg(1), std::string(DI->getArgNameStr(1)));
   1740 
   1741   assert_with_loc(!A.first.isVoid(),
   1742                   "Argument to save_temp() must have non-void type!");
   1743 
   1744   std::string N = std::string(DI->getArgNameStr(0));
   1745   assert_with_loc(!N.empty(),
   1746                   "save_temp() expects a name as the first argument");
   1747 
   1748   assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
   1749                   "Variable already defined!");
   1750   Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
   1751 
   1752   std::string S =
   1753       A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
   1754 
   1755   return std::make_pair(Type::getVoid(), S);
   1756 }
   1757 
   1758 std::pair<Type, std::string>
   1759 Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
   1760   std::string S = Intr.Name;
   1761 
   1762   assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
   1763   std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
   1764   std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
   1765 
   1766   size_t Idx = S.find(ToReplace);
   1767 
   1768   assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
   1769   S.replace(Idx, ToReplace.size(), ReplaceWith);
   1770 
   1771   return std::make_pair(Type::getVoid(), S);
   1772 }
   1773 
   1774 std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
   1775   std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
   1776   std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
   1777   return std::make_pair(Type::fromTypedefName(Ty), Value);
   1778 }
   1779 
   1780 std::pair<Type, std::string>
   1781 Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
   1782   if (!ArgName.empty()) {
   1783     assert_with_loc(!Arg->isComplete(),
   1784                     "Arguments must either be DAGs or names, not both!");
   1785     assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
   1786                     "Variable not defined!");
   1787     Variable &V = Intr.Variables[ArgName];
   1788     return std::make_pair(V.getType(), V.getName());
   1789   }
   1790 
   1791   assert(Arg && "Neither ArgName nor Arg?!");
   1792   DagInit *DI = dyn_cast<DagInit>(Arg);
   1793   assert_with_loc(DI, "Arguments must either be DAGs or names!");
   1794 
   1795   return emitDag(DI);
   1796 }
   1797 
   1798 std::string Intrinsic::generate() {
   1799   // Avoid duplicated code for big and little endian
   1800   if (isBigEndianSafe()) {
   1801     generateImpl(false, "", "");
   1802     return OS.str();
   1803   }
   1804   // Little endian intrinsics are simple and don't require any argument
   1805   // swapping.
   1806   OS << "#ifdef __LITTLE_ENDIAN__\n";
   1807 
   1808   generateImpl(false, "", "");
   1809 
   1810   OS << "#else\n";
   1811 
   1812   // Big endian intrinsics are more complex. The user intended these
   1813   // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
   1814   // but we load as-if (V)LD1. So we should swap all arguments and
   1815   // swap the return value too.
   1816   //
   1817   // If we call sub-intrinsics, we should call a version that does
   1818   // not re-swap the arguments!
   1819   generateImpl(true, "", "__noswap_");
   1820 
   1821   // If we're needed early, create a non-swapping variant for
   1822   // big-endian.
   1823   if (NeededEarly) {
   1824     generateImpl(false, "__noswap_", "__noswap_");
   1825   }
   1826   OS << "#endif\n\n";
   1827 
   1828   return OS.str();
   1829 }
   1830 
   1831 void Intrinsic::generateImpl(bool ReverseArguments,
   1832                              StringRef NamePrefix, StringRef CallPrefix) {
   1833   CurrentRecord = R;
   1834 
   1835   // If we call a macro, our local variables may be corrupted due to
   1836   // lack of proper lexical scoping. So, add a globally unique postfix
   1837   // to every variable.
   1838   //
   1839   // indexBody() should have set up the Dependencies set by now.
   1840   for (auto *I : Dependencies)
   1841     if (I->UseMacro) {
   1842       VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
   1843       break;
   1844     }
   1845 
   1846   initVariables();
   1847 
   1848   emitPrototype(NamePrefix);
   1849 
   1850   if (IsUnavailable) {
   1851     OS << " __attribute__((unavailable));";
   1852   } else {
   1853     emitOpeningBrace();
   1854     emitShadowedArgs();
   1855     if (ReverseArguments)
   1856       emitArgumentReversal();
   1857     emitBody(CallPrefix);
   1858     if (ReverseArguments)
   1859       emitReturnReversal();
   1860     emitReturn();
   1861     emitClosingBrace();
   1862   }
   1863   OS << "\n";
   1864 
   1865   CurrentRecord = nullptr;
   1866 }
   1867 
   1868 void Intrinsic::indexBody() {
   1869   CurrentRecord = R;
   1870 
   1871   initVariables();
   1872   emitBody("");
   1873   OS.str("");
   1874 
   1875   CurrentRecord = nullptr;
   1876 }
   1877 
   1878 //===----------------------------------------------------------------------===//
   1879 // NeonEmitter implementation
   1880 //===----------------------------------------------------------------------===//
   1881 
   1882 Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types,
   1883                                      Optional<std::string> MangledName) {
   1884   // First, look up the name in the intrinsic map.
   1885   assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
   1886                   ("Intrinsic '" + Name + "' not found!").str());
   1887   auto &V = IntrinsicMap.find(Name.str())->second;
   1888   std::vector<Intrinsic *> GoodVec;
   1889 
   1890   // Create a string to print if we end up failing.
   1891   std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
   1892   for (unsigned I = 0; I < Types.size(); ++I) {
   1893     if (I != 0)
   1894       ErrMsg += ", ";
   1895     ErrMsg += Types[I].str();
   1896   }
   1897   ErrMsg += ")'\n";
   1898   ErrMsg += "Available overloads:\n";
   1899 
   1900   // Now, look through each intrinsic implementation and see if the types are
   1901   // compatible.
   1902   for (auto &I : V) {
   1903     ErrMsg += "  - " + I.getReturnType().str() + " " + I.getMangledName();
   1904     ErrMsg += "(";
   1905     for (unsigned A = 0; A < I.getNumParams(); ++A) {
   1906       if (A != 0)
   1907         ErrMsg += ", ";
   1908       ErrMsg += I.getParamType(A).str();
   1909     }
   1910     ErrMsg += ")\n";
   1911 
   1912     if (MangledName && MangledName != I.getMangledName(true))
   1913       continue;
   1914 
   1915     if (I.getNumParams() != Types.size())
   1916       continue;
   1917 
   1918     unsigned ArgNum = 0;
   1919     bool MatchingArgumentTypes =
   1920         std::all_of(Types.begin(), Types.end(), [&](const auto &Type) {
   1921           return Type == I.getParamType(ArgNum++);
   1922         });
   1923 
   1924     if (MatchingArgumentTypes)
   1925       GoodVec.push_back(&I);
   1926   }
   1927 
   1928   assert_with_loc(!GoodVec.empty(),
   1929                   "No compatible intrinsic found - " + ErrMsg);
   1930   assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
   1931 
   1932   return *GoodVec.front();
   1933 }
   1934 
   1935 void NeonEmitter::createIntrinsic(Record *R,
   1936                                   SmallVectorImpl<Intrinsic *> &Out) {
   1937   std::string Name = std::string(R->getValueAsString("Name"));
   1938   std::string Proto = std::string(R->getValueAsString("Prototype"));
   1939   std::string Types = std::string(R->getValueAsString("Types"));
   1940   Record *OperationRec = R->getValueAsDef("Operation");
   1941   bool BigEndianSafe  = R->getValueAsBit("BigEndianSafe");
   1942   std::string Guard = std::string(R->getValueAsString("ArchGuard"));
   1943   bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
   1944   std::string CartesianProductWith = std::string(R->getValueAsString("CartesianProductWith"));
   1945 
   1946   // Set the global current record. This allows assert_with_loc to produce
   1947   // decent location information even when highly nested.
   1948   CurrentRecord = R;
   1949 
   1950   ListInit *Body = OperationRec->getValueAsListInit("Ops");
   1951 
   1952   std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
   1953 
   1954   ClassKind CK = ClassNone;
   1955   if (R->getSuperClasses().size() >= 2)
   1956     CK = ClassMap[R->getSuperClasses()[1].first];
   1957 
   1958   std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
   1959   if (!CartesianProductWith.empty()) {
   1960     std::vector<TypeSpec> ProductTypeSpecs = TypeSpec::fromTypeSpecs(CartesianProductWith);
   1961     for (auto TS : TypeSpecs) {
   1962       Type DefaultT(TS, ".");
   1963       for (auto SrcTS : ProductTypeSpecs) {
   1964         Type DefaultSrcT(SrcTS, ".");
   1965         if (TS == SrcTS ||
   1966             DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
   1967           continue;
   1968         NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
   1969       }
   1970     }
   1971   } else {
   1972     for (auto TS : TypeSpecs) {
   1973       NewTypeSpecs.push_back(std::make_pair(TS, TS));
   1974     }
   1975   }
   1976 
   1977   llvm::sort(NewTypeSpecs);
   1978   NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
   1979 		     NewTypeSpecs.end());
   1980   auto &Entry = IntrinsicMap[Name];
   1981 
   1982   for (auto &I : NewTypeSpecs) {
   1983     Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
   1984                        Guard, IsUnavailable, BigEndianSafe);
   1985     Out.push_back(&Entry.back());
   1986   }
   1987 
   1988   CurrentRecord = nullptr;
   1989 }
   1990 
   1991 /// genBuiltinsDef: Generate the BuiltinsARM.def and  BuiltinsAArch64.def
   1992 /// declaration of builtins, checking for unique builtin declarations.
   1993 void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
   1994                                  SmallVectorImpl<Intrinsic *> &Defs) {
   1995   OS << "#ifdef GET_NEON_BUILTINS\n";
   1996 
   1997   // We only want to emit a builtin once, and we want to emit them in
   1998   // alphabetical order, so use a std::set.
   1999   std::set<std::string> Builtins;
   2000 
   2001   for (auto *Def : Defs) {
   2002     if (Def->hasBody())
   2003       continue;
   2004 
   2005     std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
   2006 
   2007     S += Def->getBuiltinTypeStr();
   2008     S += "\", \"n\")";
   2009 
   2010     Builtins.insert(S);
   2011   }
   2012 
   2013   for (auto &S : Builtins)
   2014     OS << S << "\n";
   2015   OS << "#endif\n\n";
   2016 }
   2017 
   2018 /// Generate the ARM and AArch64 overloaded type checking code for
   2019 /// SemaChecking.cpp, checking for unique builtin declarations.
   2020 void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
   2021                                            SmallVectorImpl<Intrinsic *> &Defs) {
   2022   OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
   2023 
   2024   // We record each overload check line before emitting because subsequent Inst
   2025   // definitions may extend the number of permitted types (i.e. augment the
   2026   // Mask). Use std::map to avoid sorting the table by hash number.
   2027   struct OverloadInfo {
   2028     uint64_t Mask;
   2029     int PtrArgNum;
   2030     bool HasConstPtr;
   2031     OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
   2032   };
   2033   std::map<std::string, OverloadInfo> OverloadMap;
   2034 
   2035   for (auto *Def : Defs) {
   2036     // If the def has a body (that is, it has Operation DAGs), it won't call
   2037     // __builtin_neon_* so we don't need to generate a definition for it.
   2038     if (Def->hasBody())
   2039       continue;
   2040     // Functions which have a scalar argument cannot be overloaded, no need to
   2041     // check them if we are emitting the type checking code.
   2042     if (Def->protoHasScalar())
   2043       continue;
   2044 
   2045     uint64_t Mask = 0ULL;
   2046     Mask |= 1ULL << Def->getPolymorphicKeyType().getNeonEnum();
   2047 
   2048     // Check if the function has a pointer or const pointer argument.
   2049     int PtrArgNum = -1;
   2050     bool HasConstPtr = false;
   2051     for (unsigned I = 0; I < Def->getNumParams(); ++I) {
   2052       const auto &Type = Def->getParamType(I);
   2053       if (Type.isPointer()) {
   2054         PtrArgNum = I;
   2055         HasConstPtr = Type.isConstPointer();
   2056       }
   2057     }
   2058 
   2059     // For sret builtins, adjust the pointer argument index.
   2060     if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
   2061       PtrArgNum += 1;
   2062 
   2063     std::string Name = Def->getName();
   2064     // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
   2065     // and vst1_lane intrinsics.  Using a pointer to the vector element
   2066     // type with one of those operations causes codegen to select an aligned
   2067     // load/store instruction.  If you want an unaligned operation,
   2068     // the pointer argument needs to have less alignment than element type,
   2069     // so just accept any pointer type.
   2070     if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
   2071       PtrArgNum = -1;
   2072       HasConstPtr = false;
   2073     }
   2074 
   2075     if (Mask) {
   2076       std::string Name = Def->getMangledName();
   2077       OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
   2078       OverloadInfo &OI = OverloadMap[Name];
   2079       OI.Mask |= Mask;
   2080       OI.PtrArgNum |= PtrArgNum;
   2081       OI.HasConstPtr = HasConstPtr;
   2082     }
   2083   }
   2084 
   2085   for (auto &I : OverloadMap) {
   2086     OverloadInfo &OI = I.second;
   2087 
   2088     OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
   2089     OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
   2090     if (OI.PtrArgNum >= 0)
   2091       OS << "; PtrArgNum = " << OI.PtrArgNum;
   2092     if (OI.HasConstPtr)
   2093       OS << "; HasConstPtr = true";
   2094     OS << "; break;\n";
   2095   }
   2096   OS << "#endif\n\n";
   2097 }
   2098 
   2099 void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
   2100                                         SmallVectorImpl<Intrinsic *> &Defs) {
   2101   OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
   2102 
   2103   std::set<std::string> Emitted;
   2104 
   2105   for (auto *Def : Defs) {
   2106     if (Def->hasBody())
   2107       continue;
   2108     // Functions which do not have an immediate do not need to have range
   2109     // checking code emitted.
   2110     if (!Def->hasImmediate())
   2111       continue;
   2112     if (Emitted.find(Def->getMangledName()) != Emitted.end())
   2113       continue;
   2114 
   2115     std::string LowerBound, UpperBound;
   2116 
   2117     Record *R = Def->getRecord();
   2118     if (R->getValueAsBit("isVXAR")) {
   2119       //VXAR takes an immediate in the range [0, 63]
   2120       LowerBound = "0";
   2121       UpperBound = "63";
   2122     } else if (R->getValueAsBit("isVCVT_N")) {
   2123       // VCVT between floating- and fixed-point values takes an immediate
   2124       // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
   2125       LowerBound = "1";
   2126 	  if (Def->getBaseType().getElementSizeInBits() == 16 ||
   2127 		  Def->getName().find('h') != std::string::npos)
   2128 		// VCVTh operating on FP16 intrinsics in range [1, 16)
   2129 		UpperBound = "15";
   2130 	  else if (Def->getBaseType().getElementSizeInBits() == 32)
   2131         UpperBound = "31";
   2132 	  else
   2133         UpperBound = "63";
   2134     } else if (R->getValueAsBit("isScalarShift")) {
   2135       // Right shifts have an 'r' in the name, left shifts do not. Convert
   2136       // instructions have the same bounds and right shifts.
   2137       if (Def->getName().find('r') != std::string::npos ||
   2138           Def->getName().find("cvt") != std::string::npos)
   2139         LowerBound = "1";
   2140 
   2141       UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
   2142     } else if (R->getValueAsBit("isShift")) {
   2143       // Builtins which are overloaded by type will need to have their upper
   2144       // bound computed at Sema time based on the type constant.
   2145 
   2146       // Right shifts have an 'r' in the name, left shifts do not.
   2147       if (Def->getName().find('r') != std::string::npos)
   2148         LowerBound = "1";
   2149       UpperBound = "RFT(TV, true)";
   2150     } else if (Def->getClassKind(true) == ClassB) {
   2151       // ClassB intrinsics have a type (and hence lane number) that is only
   2152       // known at runtime.
   2153       if (R->getValueAsBit("isLaneQ"))
   2154         UpperBound = "RFT(TV, false, true)";
   2155       else
   2156         UpperBound = "RFT(TV, false, false)";
   2157     } else {
   2158       // The immediate generally refers to a lane in the preceding argument.
   2159       assert(Def->getImmediateIdx() > 0);
   2160       Type T = Def->getParamType(Def->getImmediateIdx() - 1);
   2161       UpperBound = utostr(T.getNumElements() - 1);
   2162     }
   2163 
   2164     // Calculate the index of the immediate that should be range checked.
   2165     unsigned Idx = Def->getNumParams();
   2166     if (Def->hasImmediate())
   2167       Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
   2168 
   2169     OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
   2170        << "i = " << Idx << ";";
   2171     if (!LowerBound.empty())
   2172       OS << " l = " << LowerBound << ";";
   2173     if (!UpperBound.empty())
   2174       OS << " u = " << UpperBound << ";";
   2175     OS << " break;\n";
   2176 
   2177     Emitted.insert(Def->getMangledName());
   2178   }
   2179 
   2180   OS << "#endif\n\n";
   2181 }
   2182 
   2183 /// runHeader - Emit a file with sections defining:
   2184 /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
   2185 /// 2. the SemaChecking code for the type overload checking.
   2186 /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
   2187 void NeonEmitter::runHeader(raw_ostream &OS) {
   2188   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2189 
   2190   SmallVector<Intrinsic *, 128> Defs;
   2191   for (auto *R : RV)
   2192     createIntrinsic(R, Defs);
   2193 
   2194   // Generate shared BuiltinsXXX.def
   2195   genBuiltinsDef(OS, Defs);
   2196 
   2197   // Generate ARM overloaded type checking code for SemaChecking.cpp
   2198   genOverloadTypeCheckCode(OS, Defs);
   2199 
   2200   // Generate ARM range checking code for shift/lane immediates.
   2201   genIntrinsicRangeCheckCode(OS, Defs);
   2202 }
   2203 
   2204 static void emitNeonTypeDefs(const std::string& types, raw_ostream &OS) {
   2205   std::string TypedefTypes(types);
   2206   std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
   2207 
   2208   // Emit vector typedefs.
   2209   bool InIfdef = false;
   2210   for (auto &TS : TDTypeVec) {
   2211     bool IsA64 = false;
   2212     Type T(TS, ".");
   2213     if (T.isDouble())
   2214       IsA64 = true;
   2215 
   2216     if (InIfdef && !IsA64) {
   2217       OS << "#endif\n";
   2218       InIfdef = false;
   2219     }
   2220     if (!InIfdef && IsA64) {
   2221       OS << "#ifdef __aarch64__\n";
   2222       InIfdef = true;
   2223     }
   2224 
   2225     if (T.isPoly())
   2226       OS << "typedef __attribute__((neon_polyvector_type(";
   2227     else
   2228       OS << "typedef __attribute__((neon_vector_type(";
   2229 
   2230     Type T2 = T;
   2231     T2.makeScalar();
   2232     OS << T.getNumElements() << "))) ";
   2233     OS << T2.str();
   2234     OS << " " << T.str() << ";\n";
   2235   }
   2236   if (InIfdef)
   2237     OS << "#endif\n";
   2238   OS << "\n";
   2239 
   2240   // Emit struct typedefs.
   2241   InIfdef = false;
   2242   for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
   2243     for (auto &TS : TDTypeVec) {
   2244       bool IsA64 = false;
   2245       Type T(TS, ".");
   2246       if (T.isDouble())
   2247         IsA64 = true;
   2248 
   2249       if (InIfdef && !IsA64) {
   2250         OS << "#endif\n";
   2251         InIfdef = false;
   2252       }
   2253       if (!InIfdef && IsA64) {
   2254         OS << "#ifdef __aarch64__\n";
   2255         InIfdef = true;
   2256       }
   2257 
   2258       const char Mods[] = { static_cast<char>('2' + (NumMembers - 2)), 0};
   2259       Type VT(TS, Mods);
   2260       OS << "typedef struct " << VT.str() << " {\n";
   2261       OS << "  " << T.str() << " val";
   2262       OS << "[" << NumMembers << "]";
   2263       OS << ";\n} ";
   2264       OS << VT.str() << ";\n";
   2265       OS << "\n";
   2266     }
   2267   }
   2268   if (InIfdef)
   2269     OS << "#endif\n";
   2270 }
   2271 
   2272 /// run - Read the records in arm_neon.td and output arm_neon.h.  arm_neon.h
   2273 /// is comprised of type definitions and function declarations.
   2274 void NeonEmitter::run(raw_ostream &OS) {
   2275   OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
   2276         "------------------------------"
   2277         "---===\n"
   2278         " *\n"
   2279         " * Permission is hereby granted, free of charge, to any person "
   2280         "obtaining "
   2281         "a copy\n"
   2282         " * of this software and associated documentation files (the "
   2283         "\"Software\"),"
   2284         " to deal\n"
   2285         " * in the Software without restriction, including without limitation "
   2286         "the "
   2287         "rights\n"
   2288         " * to use, copy, modify, merge, publish, distribute, sublicense, "
   2289         "and/or sell\n"
   2290         " * copies of the Software, and to permit persons to whom the Software "
   2291         "is\n"
   2292         " * furnished to do so, subject to the following conditions:\n"
   2293         " *\n"
   2294         " * The above copyright notice and this permission notice shall be "
   2295         "included in\n"
   2296         " * all copies or substantial portions of the Software.\n"
   2297         " *\n"
   2298         " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
   2299         "EXPRESS OR\n"
   2300         " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
   2301         "MERCHANTABILITY,\n"
   2302         " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
   2303         "SHALL THE\n"
   2304         " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
   2305         "OTHER\n"
   2306         " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
   2307         "ARISING FROM,\n"
   2308         " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
   2309         "DEALINGS IN\n"
   2310         " * THE SOFTWARE.\n"
   2311         " *\n"
   2312         " *===-----------------------------------------------------------------"
   2313         "---"
   2314         "---===\n"
   2315         " */\n\n";
   2316 
   2317   OS << "#ifndef __ARM_NEON_H\n";
   2318   OS << "#define __ARM_NEON_H\n\n";
   2319 
   2320   OS << "#ifndef __ARM_FP\n";
   2321   OS << "#error \"NEON intrinsics not available with the soft-float ABI. "
   2322         "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
   2323   OS << "#else\n\n";
   2324 
   2325   OS << "#if !defined(__ARM_NEON)\n";
   2326   OS << "#error \"NEON support not enabled\"\n";
   2327   OS << "#else\n\n";
   2328 
   2329   OS << "#include <stdint.h>\n\n";
   2330 
   2331   OS << "#ifdef __ARM_FEATURE_BF16\n";
   2332   OS << "#include <arm_bf16.h>\n";
   2333   OS << "typedef __bf16 bfloat16_t;\n";
   2334   OS << "#endif\n\n";
   2335 
   2336   // Emit NEON-specific scalar typedefs.
   2337   OS << "typedef float float32_t;\n";
   2338   OS << "typedef __fp16 float16_t;\n";
   2339 
   2340   OS << "#ifdef __aarch64__\n";
   2341   OS << "typedef double float64_t;\n";
   2342   OS << "#endif\n\n";
   2343 
   2344   // For now, signedness of polynomial types depends on target
   2345   OS << "#ifdef __aarch64__\n";
   2346   OS << "typedef uint8_t poly8_t;\n";
   2347   OS << "typedef uint16_t poly16_t;\n";
   2348   OS << "typedef uint64_t poly64_t;\n";
   2349   OS << "typedef __uint128_t poly128_t;\n";
   2350   OS << "#else\n";
   2351   OS << "typedef int8_t poly8_t;\n";
   2352   OS << "typedef int16_t poly16_t;\n";
   2353   OS << "typedef int64_t poly64_t;\n";
   2354   OS << "#endif\n";
   2355 
   2356   emitNeonTypeDefs("cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl", OS);
   2357 
   2358   OS << "#ifdef __ARM_FEATURE_BF16\n";
   2359   emitNeonTypeDefs("bQb", OS);
   2360   OS << "#endif\n\n";
   2361 
   2362   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
   2363         "__nodebug__))\n\n";
   2364 
   2365   SmallVector<Intrinsic *, 128> Defs;
   2366   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2367   for (auto *R : RV)
   2368     createIntrinsic(R, Defs);
   2369 
   2370   for (auto *I : Defs)
   2371     I->indexBody();
   2372 
   2373   llvm::stable_sort(Defs, llvm::deref<std::less<>>());
   2374 
   2375   // Only emit a def when its requirements have been met.
   2376   // FIXME: This loop could be made faster, but it's fast enough for now.
   2377   bool MadeProgress = true;
   2378   std::string InGuard;
   2379   while (!Defs.empty() && MadeProgress) {
   2380     MadeProgress = false;
   2381 
   2382     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
   2383          I != Defs.end(); /*No step*/) {
   2384       bool DependenciesSatisfied = true;
   2385       for (auto *II : (*I)->getDependencies()) {
   2386         if (llvm::is_contained(Defs, II))
   2387           DependenciesSatisfied = false;
   2388       }
   2389       if (!DependenciesSatisfied) {
   2390         // Try the next one.
   2391         ++I;
   2392         continue;
   2393       }
   2394 
   2395       // Emit #endif/#if pair if needed.
   2396       if ((*I)->getGuard() != InGuard) {
   2397         if (!InGuard.empty())
   2398           OS << "#endif\n";
   2399         InGuard = (*I)->getGuard();
   2400         if (!InGuard.empty())
   2401           OS << "#if " << InGuard << "\n";
   2402       }
   2403 
   2404       // Actually generate the intrinsic code.
   2405       OS << (*I)->generate();
   2406 
   2407       MadeProgress = true;
   2408       I = Defs.erase(I);
   2409     }
   2410   }
   2411   assert(Defs.empty() && "Some requirements were not satisfied!");
   2412   if (!InGuard.empty())
   2413     OS << "#endif\n";
   2414 
   2415   OS << "\n";
   2416   OS << "#undef __ai\n\n";
   2417   OS << "#endif /* if !defined(__ARM_NEON) */\n";
   2418   OS << "#endif /* ifndef __ARM_FP */\n";
   2419   OS << "#endif /* __ARM_NEON_H */\n";
   2420 }
   2421 
   2422 /// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h
   2423 /// is comprised of type definitions and function declarations.
   2424 void NeonEmitter::runFP16(raw_ostream &OS) {
   2425   OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
   2426         "------------------------------"
   2427         "---===\n"
   2428         " *\n"
   2429         " * Permission is hereby granted, free of charge, to any person "
   2430         "obtaining a copy\n"
   2431         " * of this software and associated documentation files (the "
   2432 				"\"Software\"), to deal\n"
   2433         " * in the Software without restriction, including without limitation "
   2434 				"the rights\n"
   2435         " * to use, copy, modify, merge, publish, distribute, sublicense, "
   2436 				"and/or sell\n"
   2437         " * copies of the Software, and to permit persons to whom the Software "
   2438 				"is\n"
   2439         " * furnished to do so, subject to the following conditions:\n"
   2440         " *\n"
   2441         " * The above copyright notice and this permission notice shall be "
   2442         "included in\n"
   2443         " * all copies or substantial portions of the Software.\n"
   2444         " *\n"
   2445         " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
   2446         "EXPRESS OR\n"
   2447         " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
   2448         "MERCHANTABILITY,\n"
   2449         " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
   2450         "SHALL THE\n"
   2451         " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
   2452         "OTHER\n"
   2453         " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
   2454         "ARISING FROM,\n"
   2455         " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
   2456         "DEALINGS IN\n"
   2457         " * THE SOFTWARE.\n"
   2458         " *\n"
   2459         " *===-----------------------------------------------------------------"
   2460         "---"
   2461         "---===\n"
   2462         " */\n\n";
   2463 
   2464   OS << "#ifndef __ARM_FP16_H\n";
   2465   OS << "#define __ARM_FP16_H\n\n";
   2466 
   2467   OS << "#include <stdint.h>\n\n";
   2468 
   2469   OS << "typedef __fp16 float16_t;\n";
   2470 
   2471   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
   2472         "__nodebug__))\n\n";
   2473 
   2474   SmallVector<Intrinsic *, 128> Defs;
   2475   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2476   for (auto *R : RV)
   2477     createIntrinsic(R, Defs);
   2478 
   2479   for (auto *I : Defs)
   2480     I->indexBody();
   2481 
   2482   llvm::stable_sort(Defs, llvm::deref<std::less<>>());
   2483 
   2484   // Only emit a def when its requirements have been met.
   2485   // FIXME: This loop could be made faster, but it's fast enough for now.
   2486   bool MadeProgress = true;
   2487   std::string InGuard;
   2488   while (!Defs.empty() && MadeProgress) {
   2489     MadeProgress = false;
   2490 
   2491     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
   2492          I != Defs.end(); /*No step*/) {
   2493       bool DependenciesSatisfied = true;
   2494       for (auto *II : (*I)->getDependencies()) {
   2495         if (llvm::is_contained(Defs, II))
   2496           DependenciesSatisfied = false;
   2497       }
   2498       if (!DependenciesSatisfied) {
   2499         // Try the next one.
   2500         ++I;
   2501         continue;
   2502       }
   2503 
   2504       // Emit #endif/#if pair if needed.
   2505       if ((*I)->getGuard() != InGuard) {
   2506         if (!InGuard.empty())
   2507           OS << "#endif\n";
   2508         InGuard = (*I)->getGuard();
   2509         if (!InGuard.empty())
   2510           OS << "#if " << InGuard << "\n";
   2511       }
   2512 
   2513       // Actually generate the intrinsic code.
   2514       OS << (*I)->generate();
   2515 
   2516       MadeProgress = true;
   2517       I = Defs.erase(I);
   2518     }
   2519   }
   2520   assert(Defs.empty() && "Some requirements were not satisfied!");
   2521   if (!InGuard.empty())
   2522     OS << "#endif\n";
   2523 
   2524   OS << "\n";
   2525   OS << "#undef __ai\n\n";
   2526   OS << "#endif /* __ARM_FP16_H */\n";
   2527 }
   2528 
   2529 void NeonEmitter::runBF16(raw_ostream &OS) {
   2530   OS << "/*===---- arm_bf16.h - ARM BF16 intrinsics "
   2531         "-----------------------------------===\n"
   2532         " *\n"
   2533         " *\n"
   2534         " * Part of the LLVM Project, under the Apache License v2.0 with LLVM "
   2535         "Exceptions.\n"
   2536         " * See https://llvm.org/LICENSE.txt for license information.\n"
   2537         " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n"
   2538         " *\n"
   2539         " *===-----------------------------------------------------------------"
   2540         "------===\n"
   2541         " */\n\n";
   2542 
   2543   OS << "#ifndef __ARM_BF16_H\n";
   2544   OS << "#define __ARM_BF16_H\n\n";
   2545 
   2546   OS << "typedef __bf16 bfloat16_t;\n";
   2547 
   2548   OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
   2549         "__nodebug__))\n\n";
   2550 
   2551   SmallVector<Intrinsic *, 128> Defs;
   2552   std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
   2553   for (auto *R : RV)
   2554     createIntrinsic(R, Defs);
   2555 
   2556   for (auto *I : Defs)
   2557     I->indexBody();
   2558 
   2559   llvm::stable_sort(Defs, llvm::deref<std::less<>>());
   2560 
   2561   // Only emit a def when its requirements have been met.
   2562   // FIXME: This loop could be made faster, but it's fast enough for now.
   2563   bool MadeProgress = true;
   2564   std::string InGuard;
   2565   while (!Defs.empty() && MadeProgress) {
   2566     MadeProgress = false;
   2567 
   2568     for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
   2569          I != Defs.end(); /*No step*/) {
   2570       bool DependenciesSatisfied = true;
   2571       for (auto *II : (*I)->getDependencies()) {
   2572         if (llvm::is_contained(Defs, II))
   2573           DependenciesSatisfied = false;
   2574       }
   2575       if (!DependenciesSatisfied) {
   2576         // Try the next one.
   2577         ++I;
   2578         continue;
   2579       }
   2580 
   2581       // Emit #endif/#if pair if needed.
   2582       if ((*I)->getGuard() != InGuard) {
   2583         if (!InGuard.empty())
   2584           OS << "#endif\n";
   2585         InGuard = (*I)->getGuard();
   2586         if (!InGuard.empty())
   2587           OS << "#if " << InGuard << "\n";
   2588       }
   2589 
   2590       // Actually generate the intrinsic code.
   2591       OS << (*I)->generate();
   2592 
   2593       MadeProgress = true;
   2594       I = Defs.erase(I);
   2595     }
   2596   }
   2597   assert(Defs.empty() && "Some requirements were not satisfied!");
   2598   if (!InGuard.empty())
   2599     OS << "#endif\n";
   2600 
   2601   OS << "\n";
   2602   OS << "#undef __ai\n\n";
   2603 
   2604   OS << "#endif\n";
   2605 }
   2606 
   2607 void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
   2608   NeonEmitter(Records).run(OS);
   2609 }
   2610 
   2611 void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
   2612   NeonEmitter(Records).runFP16(OS);
   2613 }
   2614 
   2615 void clang::EmitBF16(RecordKeeper &Records, raw_ostream &OS) {
   2616   NeonEmitter(Records).runBF16(OS);
   2617 }
   2618 
   2619 void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
   2620   NeonEmitter(Records).runHeader(OS);
   2621 }
   2622 
   2623 void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
   2624   llvm_unreachable("Neon test generation no longer implemented!");
   2625 }
   2626