Home | History | Annotate | Line # | Download | only in AArch64
      1 //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 //
     10 //===----------------------------------------------------------------------===//
     11 
     12 #include "AArch64TargetMachine.h"
     13 #include "AArch64.h"
     14 #include "AArch64MachineFunctionInfo.h"
     15 #include "AArch64MacroFusion.h"
     16 #include "AArch64Subtarget.h"
     17 #include "AArch64TargetObjectFile.h"
     18 #include "AArch64TargetTransformInfo.h"
     19 #include "MCTargetDesc/AArch64MCTargetDesc.h"
     20 #include "TargetInfo/AArch64TargetInfo.h"
     21 #include "llvm/ADT/STLExtras.h"
     22 #include "llvm/ADT/Triple.h"
     23 #include "llvm/Analysis/TargetTransformInfo.h"
     24 #include "llvm/CodeGen/CSEConfigBase.h"
     25 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
     26 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
     27 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
     28 #include "llvm/CodeGen/GlobalISel/Localizer.h"
     29 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
     30 #include "llvm/CodeGen/MIRParser/MIParser.h"
     31 #include "llvm/CodeGen/MachineScheduler.h"
     32 #include "llvm/CodeGen/Passes.h"
     33 #include "llvm/CodeGen/TargetPassConfig.h"
     34 #include "llvm/IR/Attributes.h"
     35 #include "llvm/IR/Function.h"
     36 #include "llvm/InitializePasses.h"
     37 #include "llvm/MC/MCAsmInfo.h"
     38 #include "llvm/MC/MCTargetOptions.h"
     39 #include "llvm/Pass.h"
     40 #include "llvm/Support/CodeGen.h"
     41 #include "llvm/Support/CommandLine.h"
     42 #include "llvm/Support/TargetRegistry.h"
     43 #include "llvm/Target/TargetLoweringObjectFile.h"
     44 #include "llvm/Target/TargetOptions.h"
     45 #include "llvm/Transforms/CFGuard.h"
     46 #include "llvm/Transforms/Scalar.h"
     47 #include <memory>
     48 #include <string>
     49 
     50 using namespace llvm;
     51 
     52 static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
     53                                 cl::desc("Enable the CCMP formation pass"),
     54                                 cl::init(true), cl::Hidden);
     55 
     56 static cl::opt<bool>
     57     EnableCondBrTuning("aarch64-enable-cond-br-tune",
     58                        cl::desc("Enable the conditional branch tuning pass"),
     59                        cl::init(true), cl::Hidden);
     60 
     61 static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
     62                                cl::desc("Enable the machine combiner pass"),
     63                                cl::init(true), cl::Hidden);
     64 
     65 static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress",
     66                                           cl::desc("Suppress STP for AArch64"),
     67                                           cl::init(true), cl::Hidden);
     68 
     69 static cl::opt<bool> EnableAdvSIMDScalar(
     70     "aarch64-enable-simd-scalar",
     71     cl::desc("Enable use of AdvSIMD scalar integer instructions"),
     72     cl::init(false), cl::Hidden);
     73 
     74 static cl::opt<bool>
     75     EnablePromoteConstant("aarch64-enable-promote-const",
     76                           cl::desc("Enable the promote constant pass"),
     77                           cl::init(true), cl::Hidden);
     78 
     79 static cl::opt<bool> EnableCollectLOH(
     80     "aarch64-enable-collect-loh",
     81     cl::desc("Enable the pass that emits the linker optimization hints (LOH)"),
     82     cl::init(true), cl::Hidden);
     83 
     84 static cl::opt<bool>
     85     EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden,
     86                                   cl::desc("Enable the pass that removes dead"
     87                                            " definitons and replaces stores to"
     88                                            " them with stores to the zero"
     89                                            " register"),
     90                                   cl::init(true));
     91 
     92 static cl::opt<bool> EnableRedundantCopyElimination(
     93     "aarch64-enable-copyelim",
     94     cl::desc("Enable the redundant copy elimination pass"), cl::init(true),
     95     cl::Hidden);
     96 
     97 static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt",
     98                                         cl::desc("Enable the load/store pair"
     99                                                  " optimization pass"),
    100                                         cl::init(true), cl::Hidden);
    101 
    102 static cl::opt<bool> EnableAtomicTidy(
    103     "aarch64-enable-atomic-cfg-tidy", cl::Hidden,
    104     cl::desc("Run SimplifyCFG after expanding atomic operations"
    105              " to make use of cmpxchg flow-based information"),
    106     cl::init(true));
    107 
    108 static cl::opt<bool>
    109 EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
    110                         cl::desc("Run early if-conversion"),
    111                         cl::init(true));
    112 
    113 static cl::opt<bool>
    114     EnableCondOpt("aarch64-enable-condopt",
    115                   cl::desc("Enable the condition optimizer pass"),
    116                   cl::init(true), cl::Hidden);
    117 
    118 static cl::opt<bool>
    119 EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
    120                 cl::desc("Work around Cortex-A53 erratum 835769"),
    121                 cl::init(false));
    122 
    123 static cl::opt<bool>
    124     EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
    125                  cl::desc("Enable optimizations on complex GEPs"),
    126                  cl::init(false));
    127 
    128 static cl::opt<bool>
    129     BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
    130                      cl::desc("Relax out of range conditional branches"));
    131 
    132 static cl::opt<bool> EnableCompressJumpTables(
    133     "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true),
    134     cl::desc("Use smallest entry possible for jump tables"));
    135 
    136 // FIXME: Unify control over GlobalMerge.
    137 static cl::opt<cl::boolOrDefault>
    138     EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
    139                       cl::desc("Enable the global merge pass"));
    140 
    141 static cl::opt<bool>
    142     EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
    143                            cl::desc("Enable the loop data prefetch pass"),
    144                            cl::init(true));
    145 
    146 static cl::opt<int> EnableGlobalISelAtO(
    147     "aarch64-enable-global-isel-at-O", cl::Hidden,
    148     cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
    149     cl::init(0));
    150 
    151 static cl::opt<bool>
    152     EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden,
    153                            cl::desc("Enable SVE intrinsic opts"),
    154                            cl::init(true));
    155 
    156 static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
    157                                          cl::init(true), cl::Hidden);
    158 
    159 static cl::opt<bool>
    160     EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
    161                         cl::desc("Enable the AAcrh64 branch target pass"),
    162                         cl::init(true));
    163 
    164 extern cl::opt<bool> EnableHomogeneousPrologEpilog;
    165 
    166 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
    167   // Register the target.
    168   RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
    169   RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
    170   RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
    171   RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target());
    172   RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target());
    173   auto PR = PassRegistry::getPassRegistry();
    174   initializeGlobalISel(*PR);
    175   initializeAArch64A53Fix835769Pass(*PR);
    176   initializeAArch64A57FPLoadBalancingPass(*PR);
    177   initializeAArch64AdvSIMDScalarPass(*PR);
    178   initializeAArch64BranchTargetsPass(*PR);
    179   initializeAArch64CollectLOHPass(*PR);
    180   initializeAArch64CompressJumpTablesPass(*PR);
    181   initializeAArch64ConditionalComparesPass(*PR);
    182   initializeAArch64ConditionOptimizerPass(*PR);
    183   initializeAArch64DeadRegisterDefinitionsPass(*PR);
    184   initializeAArch64ExpandPseudoPass(*PR);
    185   initializeAArch64LoadStoreOptPass(*PR);
    186   initializeAArch64SIMDInstrOptPass(*PR);
    187   initializeAArch64O0PreLegalizerCombinerPass(*PR);
    188   initializeAArch64PreLegalizerCombinerPass(*PR);
    189   initializeAArch64PostLegalizerCombinerPass(*PR);
    190   initializeAArch64PostLegalizerLoweringPass(*PR);
    191   initializeAArch64PostSelectOptimizePass(*PR);
    192   initializeAArch64PromoteConstantPass(*PR);
    193   initializeAArch64RedundantCopyEliminationPass(*PR);
    194   initializeAArch64StorePairSuppressPass(*PR);
    195   initializeFalkorHWPFFixPass(*PR);
    196   initializeFalkorMarkStridedAccessesLegacyPass(*PR);
    197   initializeLDTLSCleanupPass(*PR);
    198   initializeSVEIntrinsicOptsPass(*PR);
    199   initializeAArch64SpeculationHardeningPass(*PR);
    200   initializeAArch64SLSHardeningPass(*PR);
    201   initializeAArch64StackTaggingPass(*PR);
    202   initializeAArch64StackTaggingPreRAPass(*PR);
    203   initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
    204 }
    205 
    206 //===----------------------------------------------------------------------===//
    207 // AArch64 Lowering public interface.
    208 //===----------------------------------------------------------------------===//
    209 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
    210   if (TT.isOSBinFormatMachO())
    211     return std::make_unique<AArch64_MachoTargetObjectFile>();
    212   if (TT.isOSBinFormatCOFF())
    213     return std::make_unique<AArch64_COFFTargetObjectFile>();
    214 
    215   return std::make_unique<AArch64_ELFTargetObjectFile>();
    216 }
    217 
    218 // Helper function to build a DataLayout string
    219 static std::string computeDataLayout(const Triple &TT,
    220                                      const MCTargetOptions &Options,
    221                                      bool LittleEndian) {
    222   if (TT.isOSBinFormatMachO()) {
    223     if (TT.getArch() == Triple::aarch64_32)
    224       return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128";
    225     return "e-m:o-i64:64-i128:128-n32:64-S128";
    226   }
    227   if (TT.isOSBinFormatCOFF())
    228     return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
    229   std::string Endian = LittleEndian ? "e" : "E";
    230   std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : "";
    231   return Endian + "-m:e" + Ptr32 +
    232          "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
    233 }
    234 
    235 static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) {
    236   if (CPU.empty() && TT.isArm64e())
    237     return "apple-a12";
    238   return CPU;
    239 }
    240 
    241 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
    242                                            Optional<Reloc::Model> RM) {
    243   // AArch64 Darwin and Windows are always PIC.
    244   if (TT.isOSDarwin() || TT.isOSWindows())
    245     return Reloc::PIC_;
    246   // On ELF platforms the default static relocation model has a smart enough
    247   // linker to cope with referencing external symbols defined in a shared
    248   // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
    249   if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
    250     return Reloc::Static;
    251   return *RM;
    252 }
    253 
    254 static CodeModel::Model
    255 getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM,
    256                              bool JIT) {
    257   if (CM) {
    258     if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
    259         *CM != CodeModel::Large) {
    260       report_fatal_error(
    261           "Only small, tiny and large code models are allowed on AArch64");
    262     } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
    263       report_fatal_error("tiny code model is only supported on ELF");
    264     return *CM;
    265   }
    266   // The default MCJIT memory managers make no guarantees about where they can
    267   // find an executable page; JITed code needs to be able to refer to globals
    268   // no matter how far away they are.
    269   // We should set the CodeModel::Small for Windows ARM64 in JIT mode,
    270   // since with large code model LLVM generating 4 MOV instructions, and
    271   // Windows doesn't support relocating these long branch (4 MOVs).
    272   if (JIT && !TT.isOSWindows())
    273     return CodeModel::Large;
    274   return CodeModel::Small;
    275 }
    276 
    277 /// Create an AArch64 architecture model.
    278 ///
    279 AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
    280                                            StringRef CPU, StringRef FS,
    281                                            const TargetOptions &Options,
    282                                            Optional<Reloc::Model> RM,
    283                                            Optional<CodeModel::Model> CM,
    284                                            CodeGenOpt::Level OL, bool JIT,
    285                                            bool LittleEndian)
    286     : LLVMTargetMachine(T,
    287                         computeDataLayout(TT, Options.MCOptions, LittleEndian),
    288                         TT, computeDefaultCPU(TT, CPU), FS, Options,
    289                         getEffectiveRelocModel(TT, RM),
    290                         getEffectiveAArch64CodeModel(TT, CM, JIT), OL),
    291       TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
    292   initAsmInfo();
    293 
    294   if (TT.isOSBinFormatMachO()) {
    295     this->Options.TrapUnreachable = true;
    296     this->Options.NoTrapAfterNoreturn = true;
    297   }
    298 
    299   if (getMCAsmInfo()->usesWindowsCFI()) {
    300     // Unwinding can get confused if the last instruction in an
    301     // exception-handling region (function, funclet, try block, etc.)
    302     // is a call.
    303     //
    304     // FIXME: We could elide the trap if the next instruction would be in
    305     // the same region anyway.
    306     this->Options.TrapUnreachable = true;
    307   }
    308 
    309   if (this->Options.TLSSize == 0) // default
    310     this->Options.TLSSize = 24;
    311   if ((getCodeModel() == CodeModel::Small ||
    312        getCodeModel() == CodeModel::Kernel) &&
    313       this->Options.TLSSize > 32)
    314     // for the small (and kernel) code model, the maximum TLS size is 4GiB
    315     this->Options.TLSSize = 32;
    316   else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24)
    317     // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB)
    318     this->Options.TLSSize = 24;
    319 
    320   // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is
    321   // MachO/CodeModel::Large, which GlobalISel does not support.
    322   if (getOptLevel() <= EnableGlobalISelAtO &&
    323       TT.getArch() != Triple::aarch64_32 &&
    324       TT.getEnvironment() != Triple::GNUILP32 &&
    325       !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) {
    326     setGlobalISel(true);
    327     setGlobalISelAbort(GlobalISelAbortMode::Disable);
    328   }
    329 
    330   // AArch64 supports the MachineOutliner.
    331   setMachineOutliner(true);
    332 
    333   // AArch64 supports default outlining behaviour.
    334   setSupportsDefaultOutlining(true);
    335 
    336   // AArch64 supports the debug entry values.
    337   setSupportsDebugEntryValues(true);
    338 }
    339 
    340 AArch64TargetMachine::~AArch64TargetMachine() = default;
    341 
    342 const AArch64Subtarget *
    343 AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
    344   Attribute CPUAttr = F.getFnAttribute("target-cpu");
    345   Attribute FSAttr = F.getFnAttribute("target-features");
    346 
    347   std::string CPU =
    348       CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
    349   std::string FS =
    350       FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
    351 
    352   auto &I = SubtargetMap[CPU + FS];
    353   if (!I) {
    354     // This needs to be done before we create a new subtarget since any
    355     // creation will depend on the TM and the code generation flags on the
    356     // function that reside in TargetOptions.
    357     resetTargetOptions(F);
    358     I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
    359                                             isLittle);
    360   }
    361   return I.get();
    362 }
    363 
    364 void AArch64leTargetMachine::anchor() { }
    365 
    366 AArch64leTargetMachine::AArch64leTargetMachine(
    367     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
    368     const TargetOptions &Options, Optional<Reloc::Model> RM,
    369     Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
    370     : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
    371 
    372 void AArch64beTargetMachine::anchor() { }
    373 
    374 AArch64beTargetMachine::AArch64beTargetMachine(
    375     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
    376     const TargetOptions &Options, Optional<Reloc::Model> RM,
    377     Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
    378     : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
    379 
    380 namespace {
    381 
    382 /// AArch64 Code Generator Pass Configuration Options.
    383 class AArch64PassConfig : public TargetPassConfig {
    384 public:
    385   AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
    386       : TargetPassConfig(TM, PM) {
    387     if (TM.getOptLevel() != CodeGenOpt::None)
    388       substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
    389   }
    390 
    391   AArch64TargetMachine &getAArch64TargetMachine() const {
    392     return getTM<AArch64TargetMachine>();
    393   }
    394 
    395   ScheduleDAGInstrs *
    396   createMachineScheduler(MachineSchedContext *C) const override {
    397     const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
    398     ScheduleDAGMILive *DAG = createGenericSchedLive(C);
    399     DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
    400     DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
    401     if (ST.hasFusion())
    402       DAG->addMutation(createAArch64MacroFusionDAGMutation());
    403     return DAG;
    404   }
    405 
    406   ScheduleDAGInstrs *
    407   createPostMachineScheduler(MachineSchedContext *C) const override {
    408     const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
    409     if (ST.hasFusion()) {
    410       // Run the Macro Fusion after RA again since literals are expanded from
    411       // pseudos then (v. addPreSched2()).
    412       ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
    413       DAG->addMutation(createAArch64MacroFusionDAGMutation());
    414       return DAG;
    415     }
    416 
    417     return nullptr;
    418   }
    419 
    420   void addIRPasses()  override;
    421   bool addPreISel() override;
    422   bool addInstSelector() override;
    423   bool addIRTranslator() override;
    424   void addPreLegalizeMachineIR() override;
    425   bool addLegalizeMachineIR() override;
    426   void addPreRegBankSelect() override;
    427   bool addRegBankSelect() override;
    428   void addPreGlobalInstructionSelect() override;
    429   bool addGlobalInstructionSelect() override;
    430   bool addILPOpts() override;
    431   void addPreRegAlloc() override;
    432   void addPostRegAlloc() override;
    433   void addPreSched2() override;
    434   void addPreEmitPass() override;
    435   void addPreEmitPass2() override;
    436 
    437   std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
    438 };
    439 
    440 } // end anonymous namespace
    441 
    442 TargetTransformInfo
    443 AArch64TargetMachine::getTargetTransformInfo(const Function &F) {
    444   return TargetTransformInfo(AArch64TTIImpl(this, F));
    445 }
    446 
    447 TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
    448   return new AArch64PassConfig(*this, PM);
    449 }
    450 
    451 std::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const {
    452   return getStandardCSEConfigForOpt(TM->getOptLevel());
    453 }
    454 
    455 void AArch64PassConfig::addIRPasses() {
    456   // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
    457   // ourselves.
    458   addPass(createAtomicExpandPass());
    459 
    460   // Expand any SVE vector library calls that we can't code generate directly.
    461   if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive)
    462     addPass(createSVEIntrinsicOptsPass());
    463 
    464   // Cmpxchg instructions are often used with a subsequent comparison to
    465   // determine whether it succeeded. We can exploit existing control-flow in
    466   // ldrex/strex loops to simplify this, but it needs tidying up.
    467   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
    468     addPass(createCFGSimplificationPass(SimplifyCFGOptions()
    469                                             .forwardSwitchCondToPhi(true)
    470                                             .convertSwitchToLookupTable(true)
    471                                             .needCanonicalLoops(false)
    472                                             .hoistCommonInsts(true)
    473                                             .sinkCommonInsts(true)));
    474 
    475   // Run LoopDataPrefetch
    476   //
    477   // Run this before LSR to remove the multiplies involved in computing the
    478   // pointer values N iterations ahead.
    479   if (TM->getOptLevel() != CodeGenOpt::None) {
    480     if (EnableLoopDataPrefetch)
    481       addPass(createLoopDataPrefetchPass());
    482     if (EnableFalkorHWPFFix)
    483       addPass(createFalkorMarkStridedAccessesPass());
    484   }
    485 
    486   TargetPassConfig::addIRPasses();
    487 
    488   addPass(createAArch64StackTaggingPass(
    489       /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
    490 
    491   // Match interleaved memory accesses to ldN/stN intrinsics.
    492   if (TM->getOptLevel() != CodeGenOpt::None) {
    493     addPass(createInterleavedLoadCombinePass());
    494     addPass(createInterleavedAccessPass());
    495   }
    496 
    497   if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
    498     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
    499     // and lower a GEP with multiple indices to either arithmetic operations or
    500     // multiple GEPs with single index.
    501     addPass(createSeparateConstOffsetFromGEPPass(true));
    502     // Call EarlyCSE pass to find and remove subexpressions in the lowered
    503     // result.
    504     addPass(createEarlyCSEPass());
    505     // Do loop invariant code motion in case part of the lowered result is
    506     // invariant.
    507     addPass(createLICMPass());
    508   }
    509 
    510   // Add Control Flow Guard checks.
    511   if (TM->getTargetTriple().isOSWindows())
    512     addPass(createCFGuardCheckPass());
    513 }
    514 
    515 // Pass Pipeline Configuration
    516 bool AArch64PassConfig::addPreISel() {
    517   // Run promote constant before global merge, so that the promoted constants
    518   // get a chance to be merged
    519   if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
    520     addPass(createAArch64PromoteConstantPass());
    521   // FIXME: On AArch64, this depends on the type.
    522   // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
    523   // and the offset has to be a multiple of the related size in bytes.
    524   if ((TM->getOptLevel() != CodeGenOpt::None &&
    525        EnableGlobalMerge == cl::BOU_UNSET) ||
    526       EnableGlobalMerge == cl::BOU_TRUE) {
    527     bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
    528                                (EnableGlobalMerge == cl::BOU_UNSET);
    529 
    530     // Merging of extern globals is enabled by default on non-Mach-O as we
    531     // expect it to be generally either beneficial or harmless. On Mach-O it
    532     // is disabled as we emit the .subsections_via_symbols directive which
    533     // means that merging extern globals is not safe.
    534     bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
    535 
    536     // FIXME: extern global merging is only enabled when we optimise for size
    537     // because there are some regressions with it also enabled for performance.
    538     if (!OnlyOptimizeForSize)
    539       MergeExternalByDefault = false;
    540 
    541     addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize,
    542                                   MergeExternalByDefault));
    543   }
    544 
    545   return false;
    546 }
    547 
    548 bool AArch64PassConfig::addInstSelector() {
    549   addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
    550 
    551   // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
    552   // references to _TLS_MODULE_BASE_ as possible.
    553   if (TM->getTargetTriple().isOSBinFormatELF() &&
    554       getOptLevel() != CodeGenOpt::None)
    555     addPass(createAArch64CleanupLocalDynamicTLSPass());
    556 
    557   return false;
    558 }
    559 
    560 bool AArch64PassConfig::addIRTranslator() {
    561   addPass(new IRTranslator(getOptLevel()));
    562   return false;
    563 }
    564 
    565 void AArch64PassConfig::addPreLegalizeMachineIR() {
    566   if (getOptLevel() == CodeGenOpt::None)
    567     addPass(createAArch64O0PreLegalizerCombiner());
    568   else
    569     addPass(createAArch64PreLegalizerCombiner());
    570 }
    571 
    572 bool AArch64PassConfig::addLegalizeMachineIR() {
    573   addPass(new Legalizer());
    574   return false;
    575 }
    576 
    577 void AArch64PassConfig::addPreRegBankSelect() {
    578   bool IsOptNone = getOptLevel() == CodeGenOpt::None;
    579   if (!IsOptNone)
    580     addPass(createAArch64PostLegalizerCombiner(IsOptNone));
    581   addPass(createAArch64PostLegalizerLowering());
    582 }
    583 
    584 bool AArch64PassConfig::addRegBankSelect() {
    585   addPass(new RegBankSelect());
    586   return false;
    587 }
    588 
    589 void AArch64PassConfig::addPreGlobalInstructionSelect() {
    590   addPass(new Localizer());
    591 }
    592 
    593 bool AArch64PassConfig::addGlobalInstructionSelect() {
    594   addPass(new InstructionSelect(getOptLevel()));
    595   if (getOptLevel() != CodeGenOpt::None)
    596     addPass(createAArch64PostSelectOptimize());
    597   return false;
    598 }
    599 
    600 bool AArch64PassConfig::addILPOpts() {
    601   if (EnableCondOpt)
    602     addPass(createAArch64ConditionOptimizerPass());
    603   if (EnableCCMP)
    604     addPass(createAArch64ConditionalCompares());
    605   if (EnableMCR)
    606     addPass(&MachineCombinerID);
    607   if (EnableCondBrTuning)
    608     addPass(createAArch64CondBrTuning());
    609   if (EnableEarlyIfConversion)
    610     addPass(&EarlyIfConverterID);
    611   if (EnableStPairSuppress)
    612     addPass(createAArch64StorePairSuppressPass());
    613   addPass(createAArch64SIMDInstrOptPass());
    614   if (TM->getOptLevel() != CodeGenOpt::None)
    615     addPass(createAArch64StackTaggingPreRAPass());
    616   return true;
    617 }
    618 
    619 void AArch64PassConfig::addPreRegAlloc() {
    620   // Change dead register definitions to refer to the zero register.
    621   if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
    622     addPass(createAArch64DeadRegisterDefinitions());
    623 
    624   // Use AdvSIMD scalar instructions whenever profitable.
    625   if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
    626     addPass(createAArch64AdvSIMDScalar());
    627     // The AdvSIMD pass may produce copies that can be rewritten to
    628     // be register coalescer friendly.
    629     addPass(&PeepholeOptimizerID);
    630   }
    631 }
    632 
    633 void AArch64PassConfig::addPostRegAlloc() {
    634   // Remove redundant copy instructions.
    635   if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
    636     addPass(createAArch64RedundantCopyEliminationPass());
    637 
    638   if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
    639     // Improve performance for some FP/SIMD code for A57.
    640     addPass(createAArch64A57FPLoadBalancing());
    641 }
    642 
    643 void AArch64PassConfig::addPreSched2() {
    644   // Lower homogeneous frame instructions
    645   if (EnableHomogeneousPrologEpilog)
    646     addPass(createAArch64LowerHomogeneousPrologEpilogPass());
    647   // Expand some pseudo instructions to allow proper scheduling.
    648   addPass(createAArch64ExpandPseudoPass());
    649   // Use load/store pair instructions when possible.
    650   if (TM->getOptLevel() != CodeGenOpt::None) {
    651     if (EnableLoadStoreOpt)
    652       addPass(createAArch64LoadStoreOptimizationPass());
    653   }
    654 
    655   // The AArch64SpeculationHardeningPass destroys dominator tree and natural
    656   // loop info, which is needed for the FalkorHWPFFixPass and also later on.
    657   // Therefore, run the AArch64SpeculationHardeningPass before the
    658   // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop
    659   // info.
    660   addPass(createAArch64SpeculationHardeningPass());
    661 
    662   addPass(createAArch64IndirectThunks());
    663   addPass(createAArch64SLSHardeningPass());
    664 
    665   if (TM->getOptLevel() != CodeGenOpt::None) {
    666     if (EnableFalkorHWPFFix)
    667       addPass(createFalkorHWPFFixPass());
    668   }
    669 }
    670 
    671 void AArch64PassConfig::addPreEmitPass() {
    672   // Machine Block Placement might have created new opportunities when run
    673   // at O3, where the Tail Duplication Threshold is set to 4 instructions.
    674   // Run the load/store optimizer once more.
    675   if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt)
    676     addPass(createAArch64LoadStoreOptimizationPass());
    677 
    678   if (EnableA53Fix835769)
    679     addPass(createAArch64A53Fix835769());
    680 
    681   if (EnableBranchTargets)
    682     addPass(createAArch64BranchTargetsPass());
    683 
    684   // Relax conditional branch instructions if they're otherwise out of
    685   // range of their destination.
    686   if (BranchRelaxation)
    687     addPass(&BranchRelaxationPassID);
    688 
    689   if (TM->getTargetTriple().isOSWindows()) {
    690     // Identify valid longjmp targets for Windows Control Flow Guard.
    691     addPass(createCFGuardLongjmpPass());
    692     // Identify valid eh continuation targets for Windows EHCont Guard.
    693     addPass(createEHContGuardCatchretPass());
    694   }
    695 
    696   if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
    697     addPass(createAArch64CompressJumpTablesPass());
    698 
    699   if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
    700       TM->getTargetTriple().isOSBinFormatMachO())
    701     addPass(createAArch64CollectLOHPass());
    702 }
    703 
    704 void AArch64PassConfig::addPreEmitPass2() {
    705   // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo
    706   // instructions are lowered to bundles as well.
    707   addPass(createUnpackMachineBundles(nullptr));
    708 }
    709 
    710 yaml::MachineFunctionInfo *
    711 AArch64TargetMachine::createDefaultFuncInfoYAML() const {
    712   return new yaml::AArch64FunctionInfo();
    713 }
    714 
    715 yaml::MachineFunctionInfo *
    716 AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
    717   const auto *MFI = MF.getInfo<AArch64FunctionInfo>();
    718   return new yaml::AArch64FunctionInfo(*MFI);
    719 }
    720 
    721 bool AArch64TargetMachine::parseMachineFunctionInfo(
    722     const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS,
    723     SMDiagnostic &Error, SMRange &SourceRange) const {
    724   const auto &YamlMFI =
    725       reinterpret_cast<const yaml::AArch64FunctionInfo &>(MFI);
    726   MachineFunction &MF = PFS.MF;
    727   MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI);
    728   return false;
    729 }
    730