Home | History | Annotate | Line # | Download | only in MC
      1 //===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #include "llvm/ADT/DenseMap.h"
     10 #include "llvm/ADT/Twine.h"
     11 #include "llvm/ADT/iterator_range.h"
     12 #include "llvm/BinaryFormat/MachO.h"
     13 #include "llvm/MC/MCAsmBackend.h"
     14 #include "llvm/MC/MCAsmLayout.h"
     15 #include "llvm/MC/MCAssembler.h"
     16 #include "llvm/MC/MCContext.h"
     17 #include "llvm/MC/MCDirectives.h"
     18 #include "llvm/MC/MCExpr.h"
     19 #include "llvm/MC/MCFixupKindInfo.h"
     20 #include "llvm/MC/MCFragment.h"
     21 #include "llvm/MC/MCMachObjectWriter.h"
     22 #include "llvm/MC/MCObjectWriter.h"
     23 #include "llvm/MC/MCSection.h"
     24 #include "llvm/MC/MCSectionMachO.h"
     25 #include "llvm/MC/MCSymbol.h"
     26 #include "llvm/MC/MCSymbolMachO.h"
     27 #include "llvm/MC/MCValue.h"
     28 #include "llvm/Support/Alignment.h"
     29 #include "llvm/Support/Casting.h"
     30 #include "llvm/Support/Debug.h"
     31 #include "llvm/Support/ErrorHandling.h"
     32 #include "llvm/Support/MathExtras.h"
     33 #include "llvm/Support/raw_ostream.h"
     34 #include <algorithm>
     35 #include <cassert>
     36 #include <cstdint>
     37 #include <string>
     38 #include <utility>
     39 #include <vector>
     40 
     41 using namespace llvm;
     42 
     43 #define DEBUG_TYPE "mc"
     44 
     45 void MachObjectWriter::reset() {
     46   Relocations.clear();
     47   IndirectSymBase.clear();
     48   StringTable.clear();
     49   LocalSymbolData.clear();
     50   ExternalSymbolData.clear();
     51   UndefinedSymbolData.clear();
     52   MCObjectWriter::reset();
     53 }
     54 
     55 bool MachObjectWriter::doesSymbolRequireExternRelocation(const MCSymbol &S) {
     56   // Undefined symbols are always extern.
     57   if (S.isUndefined())
     58     return true;
     59 
     60   // References to weak definitions require external relocation entries; the
     61   // definition may not always be the one in the same object file.
     62   if (cast<MCSymbolMachO>(S).isWeakDefinition())
     63     return true;
     64 
     65   // Otherwise, we can use an internal relocation.
     66   return false;
     67 }
     68 
     69 bool MachObjectWriter::
     70 MachSymbolData::operator<(const MachSymbolData &RHS) const {
     71   return Symbol->getName() < RHS.Symbol->getName();
     72 }
     73 
     74 bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
     75   const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
     76     (MCFixupKind) Kind);
     77 
     78   return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
     79 }
     80 
     81 uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
     82                                               const MCAsmLayout &Layout) const {
     83   return getSectionAddress(Fragment->getParent()) +
     84          Layout.getFragmentOffset(Fragment);
     85 }
     86 
     87 uint64_t MachObjectWriter::getSymbolAddress(const MCSymbol &S,
     88                                             const MCAsmLayout &Layout) const {
     89   // If this is a variable, then recursively evaluate now.
     90   if (S.isVariable()) {
     91     if (const MCConstantExpr *C =
     92           dyn_cast<const MCConstantExpr>(S.getVariableValue()))
     93       return C->getValue();
     94 
     95     MCValue Target;
     96     if (!S.getVariableValue()->evaluateAsRelocatable(Target, &Layout, nullptr))
     97       report_fatal_error("unable to evaluate offset for variable '" +
     98                          S.getName() + "'");
     99 
    100     // Verify that any used symbols are defined.
    101     if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
    102       report_fatal_error("unable to evaluate offset to undefined symbol '" +
    103                          Target.getSymA()->getSymbol().getName() + "'");
    104     if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
    105       report_fatal_error("unable to evaluate offset to undefined symbol '" +
    106                          Target.getSymB()->getSymbol().getName() + "'");
    107 
    108     uint64_t Address = Target.getConstant();
    109     if (Target.getSymA())
    110       Address += getSymbolAddress(Target.getSymA()->getSymbol(), Layout);
    111     if (Target.getSymB())
    112       Address += getSymbolAddress(Target.getSymB()->getSymbol(), Layout);
    113     return Address;
    114   }
    115 
    116   return getSectionAddress(S.getFragment()->getParent()) +
    117          Layout.getSymbolOffset(S);
    118 }
    119 
    120 uint64_t MachObjectWriter::getPaddingSize(const MCSection *Sec,
    121                                           const MCAsmLayout &Layout) const {
    122   uint64_t EndAddr = getSectionAddress(Sec) + Layout.getSectionAddressSize(Sec);
    123   unsigned Next = Sec->getLayoutOrder() + 1;
    124   if (Next >= Layout.getSectionOrder().size())
    125     return 0;
    126 
    127   const MCSection &NextSec = *Layout.getSectionOrder()[Next];
    128   if (NextSec.isVirtualSection())
    129     return 0;
    130   return offsetToAlignment(EndAddr, Align(NextSec.getAlignment()));
    131 }
    132 
    133 void MachObjectWriter::writeHeader(MachO::HeaderFileType Type,
    134                                    unsigned NumLoadCommands,
    135                                    unsigned LoadCommandsSize,
    136                                    bool SubsectionsViaSymbols) {
    137   uint32_t Flags = 0;
    138 
    139   if (SubsectionsViaSymbols)
    140     Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
    141 
    142   // struct mach_header (28 bytes) or
    143   // struct mach_header_64 (32 bytes)
    144 
    145   uint64_t Start = W.OS.tell();
    146   (void) Start;
    147 
    148   W.write<uint32_t>(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC);
    149 
    150   W.write<uint32_t>(TargetObjectWriter->getCPUType());
    151   W.write<uint32_t>(TargetObjectWriter->getCPUSubtype());
    152 
    153   W.write<uint32_t>(Type);
    154   W.write<uint32_t>(NumLoadCommands);
    155   W.write<uint32_t>(LoadCommandsSize);
    156   W.write<uint32_t>(Flags);
    157   if (is64Bit())
    158     W.write<uint32_t>(0); // reserved
    159 
    160   assert(W.OS.tell() - Start == (is64Bit() ? sizeof(MachO::mach_header_64)
    161                                            : sizeof(MachO::mach_header)));
    162 }
    163 
    164 void MachObjectWriter::writeWithPadding(StringRef Str, uint64_t Size) {
    165   assert(Size >= Str.size());
    166   W.OS << Str;
    167   W.OS.write_zeros(Size - Str.size());
    168 }
    169 
    170 /// writeSegmentLoadCommand - Write a segment load command.
    171 ///
    172 /// \param NumSections The number of sections in this segment.
    173 /// \param SectionDataSize The total size of the sections.
    174 void MachObjectWriter::writeSegmentLoadCommand(
    175     StringRef Name, unsigned NumSections, uint64_t VMAddr, uint64_t VMSize,
    176     uint64_t SectionDataStartOffset, uint64_t SectionDataSize, uint32_t MaxProt,
    177     uint32_t InitProt) {
    178   // struct segment_command (56 bytes) or
    179   // struct segment_command_64 (72 bytes)
    180 
    181   uint64_t Start = W.OS.tell();
    182   (void) Start;
    183 
    184   unsigned SegmentLoadCommandSize =
    185     is64Bit() ? sizeof(MachO::segment_command_64):
    186     sizeof(MachO::segment_command);
    187   W.write<uint32_t>(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT);
    188   W.write<uint32_t>(SegmentLoadCommandSize +
    189           NumSections * (is64Bit() ? sizeof(MachO::section_64) :
    190                          sizeof(MachO::section)));
    191 
    192   writeWithPadding(Name, 16);
    193   if (is64Bit()) {
    194     W.write<uint64_t>(VMAddr);                 // vmaddr
    195     W.write<uint64_t>(VMSize); // vmsize
    196     W.write<uint64_t>(SectionDataStartOffset); // file offset
    197     W.write<uint64_t>(SectionDataSize); // file size
    198   } else {
    199     W.write<uint32_t>(VMAddr);                 // vmaddr
    200     W.write<uint32_t>(VMSize); // vmsize
    201     W.write<uint32_t>(SectionDataStartOffset); // file offset
    202     W.write<uint32_t>(SectionDataSize); // file size
    203   }
    204   // maxprot
    205   W.write<uint32_t>(MaxProt);
    206   // initprot
    207   W.write<uint32_t>(InitProt);
    208   W.write<uint32_t>(NumSections);
    209   W.write<uint32_t>(0); // flags
    210 
    211   assert(W.OS.tell() - Start == SegmentLoadCommandSize);
    212 }
    213 
    214 void MachObjectWriter::writeSection(const MCAsmLayout &Layout,
    215                                     const MCSection &Sec, uint64_t VMAddr,
    216                                     uint64_t FileOffset, unsigned Flags,
    217                                     uint64_t RelocationsStart,
    218                                     unsigned NumRelocations) {
    219   uint64_t SectionSize = Layout.getSectionAddressSize(&Sec);
    220   const MCSectionMachO &Section = cast<MCSectionMachO>(Sec);
    221 
    222   // The offset is unused for virtual sections.
    223   if (Section.isVirtualSection()) {
    224     assert(Layout.getSectionFileSize(&Sec) == 0 && "Invalid file size!");
    225     FileOffset = 0;
    226   }
    227 
    228   // struct section (68 bytes) or
    229   // struct section_64 (80 bytes)
    230 
    231   uint64_t Start = W.OS.tell();
    232   (void) Start;
    233 
    234   writeWithPadding(Section.getName(), 16);
    235   writeWithPadding(Section.getSegmentName(), 16);
    236   if (is64Bit()) {
    237     W.write<uint64_t>(VMAddr);      // address
    238     W.write<uint64_t>(SectionSize); // size
    239   } else {
    240     W.write<uint32_t>(VMAddr);      // address
    241     W.write<uint32_t>(SectionSize); // size
    242   }
    243   W.write<uint32_t>(FileOffset);
    244 
    245   assert(isPowerOf2_32(Section.getAlignment()) && "Invalid alignment!");
    246   W.write<uint32_t>(Log2_32(Section.getAlignment()));
    247   W.write<uint32_t>(NumRelocations ? RelocationsStart : 0);
    248   W.write<uint32_t>(NumRelocations);
    249   W.write<uint32_t>(Flags);
    250   W.write<uint32_t>(IndirectSymBase.lookup(&Sec)); // reserved1
    251   W.write<uint32_t>(Section.getStubSize()); // reserved2
    252   if (is64Bit())
    253     W.write<uint32_t>(0); // reserved3
    254 
    255   assert(W.OS.tell() - Start ==
    256          (is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section)));
    257 }
    258 
    259 void MachObjectWriter::writeSymtabLoadCommand(uint32_t SymbolOffset,
    260                                               uint32_t NumSymbols,
    261                                               uint32_t StringTableOffset,
    262                                               uint32_t StringTableSize) {
    263   // struct symtab_command (24 bytes)
    264 
    265   uint64_t Start = W.OS.tell();
    266   (void) Start;
    267 
    268   W.write<uint32_t>(MachO::LC_SYMTAB);
    269   W.write<uint32_t>(sizeof(MachO::symtab_command));
    270   W.write<uint32_t>(SymbolOffset);
    271   W.write<uint32_t>(NumSymbols);
    272   W.write<uint32_t>(StringTableOffset);
    273   W.write<uint32_t>(StringTableSize);
    274 
    275   assert(W.OS.tell() - Start == sizeof(MachO::symtab_command));
    276 }
    277 
    278 void MachObjectWriter::writeDysymtabLoadCommand(uint32_t FirstLocalSymbol,
    279                                                 uint32_t NumLocalSymbols,
    280                                                 uint32_t FirstExternalSymbol,
    281                                                 uint32_t NumExternalSymbols,
    282                                                 uint32_t FirstUndefinedSymbol,
    283                                                 uint32_t NumUndefinedSymbols,
    284                                                 uint32_t IndirectSymbolOffset,
    285                                                 uint32_t NumIndirectSymbols) {
    286   // struct dysymtab_command (80 bytes)
    287 
    288   uint64_t Start = W.OS.tell();
    289   (void) Start;
    290 
    291   W.write<uint32_t>(MachO::LC_DYSYMTAB);
    292   W.write<uint32_t>(sizeof(MachO::dysymtab_command));
    293   W.write<uint32_t>(FirstLocalSymbol);
    294   W.write<uint32_t>(NumLocalSymbols);
    295   W.write<uint32_t>(FirstExternalSymbol);
    296   W.write<uint32_t>(NumExternalSymbols);
    297   W.write<uint32_t>(FirstUndefinedSymbol);
    298   W.write<uint32_t>(NumUndefinedSymbols);
    299   W.write<uint32_t>(0); // tocoff
    300   W.write<uint32_t>(0); // ntoc
    301   W.write<uint32_t>(0); // modtaboff
    302   W.write<uint32_t>(0); // nmodtab
    303   W.write<uint32_t>(0); // extrefsymoff
    304   W.write<uint32_t>(0); // nextrefsyms
    305   W.write<uint32_t>(IndirectSymbolOffset);
    306   W.write<uint32_t>(NumIndirectSymbols);
    307   W.write<uint32_t>(0); // extreloff
    308   W.write<uint32_t>(0); // nextrel
    309   W.write<uint32_t>(0); // locreloff
    310   W.write<uint32_t>(0); // nlocrel
    311 
    312   assert(W.OS.tell() - Start == sizeof(MachO::dysymtab_command));
    313 }
    314 
    315 MachObjectWriter::MachSymbolData *
    316 MachObjectWriter::findSymbolData(const MCSymbol &Sym) {
    317   for (auto *SymbolData :
    318        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
    319     for (MachSymbolData &Entry : *SymbolData)
    320       if (Entry.Symbol == &Sym)
    321         return &Entry;
    322 
    323   return nullptr;
    324 }
    325 
    326 const MCSymbol &MachObjectWriter::findAliasedSymbol(const MCSymbol &Sym) const {
    327   const MCSymbol *S = &Sym;
    328   while (S->isVariable()) {
    329     const MCExpr *Value = S->getVariableValue();
    330     const auto *Ref = dyn_cast<MCSymbolRefExpr>(Value);
    331     if (!Ref)
    332       return *S;
    333     S = &Ref->getSymbol();
    334   }
    335   return *S;
    336 }
    337 
    338 void MachObjectWriter::writeNlist(MachSymbolData &MSD,
    339                                   const MCAsmLayout &Layout) {
    340   const MCSymbol *Symbol = MSD.Symbol;
    341   const MCSymbol &Data = *Symbol;
    342   const MCSymbol *AliasedSymbol = &findAliasedSymbol(*Symbol);
    343   uint8_t SectionIndex = MSD.SectionIndex;
    344   uint8_t Type = 0;
    345   uint64_t Address = 0;
    346   bool IsAlias = Symbol != AliasedSymbol;
    347 
    348   const MCSymbol &OrigSymbol = *Symbol;
    349   MachSymbolData *AliaseeInfo;
    350   if (IsAlias) {
    351     AliaseeInfo = findSymbolData(*AliasedSymbol);
    352     if (AliaseeInfo)
    353       SectionIndex = AliaseeInfo->SectionIndex;
    354     Symbol = AliasedSymbol;
    355     // FIXME: Should this update Data as well?
    356   }
    357 
    358   // Set the N_TYPE bits. See <mach-o/nlist.h>.
    359   //
    360   // FIXME: Are the prebound or indirect fields possible here?
    361   if (IsAlias && Symbol->isUndefined())
    362     Type = MachO::N_INDR;
    363   else if (Symbol->isUndefined())
    364     Type = MachO::N_UNDF;
    365   else if (Symbol->isAbsolute())
    366     Type = MachO::N_ABS;
    367   else
    368     Type = MachO::N_SECT;
    369 
    370   // FIXME: Set STAB bits.
    371 
    372   if (Data.isPrivateExtern())
    373     Type |= MachO::N_PEXT;
    374 
    375   // Set external bit.
    376   if (Data.isExternal() || (!IsAlias && Symbol->isUndefined()))
    377     Type |= MachO::N_EXT;
    378 
    379   // Compute the symbol address.
    380   if (IsAlias && Symbol->isUndefined())
    381     Address = AliaseeInfo->StringIndex;
    382   else if (Symbol->isDefined())
    383     Address = getSymbolAddress(OrigSymbol, Layout);
    384   else if (Symbol->isCommon()) {
    385     // Common symbols are encoded with the size in the address
    386     // field, and their alignment in the flags.
    387     Address = Symbol->getCommonSize();
    388   }
    389 
    390   // struct nlist (12 bytes)
    391 
    392   W.write<uint32_t>(MSD.StringIndex);
    393   W.OS << char(Type);
    394   W.OS << char(SectionIndex);
    395 
    396   // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
    397   // value.
    398   bool EncodeAsAltEntry =
    399     IsAlias && cast<MCSymbolMachO>(OrigSymbol).isAltEntry();
    400   W.write<uint16_t>(cast<MCSymbolMachO>(Symbol)->getEncodedFlags(EncodeAsAltEntry));
    401   if (is64Bit())
    402     W.write<uint64_t>(Address);
    403   else
    404     W.write<uint32_t>(Address);
    405 }
    406 
    407 void MachObjectWriter::writeLinkeditLoadCommand(uint32_t Type,
    408                                                 uint32_t DataOffset,
    409                                                 uint32_t DataSize) {
    410   uint64_t Start = W.OS.tell();
    411   (void) Start;
    412 
    413   W.write<uint32_t>(Type);
    414   W.write<uint32_t>(sizeof(MachO::linkedit_data_command));
    415   W.write<uint32_t>(DataOffset);
    416   W.write<uint32_t>(DataSize);
    417 
    418   assert(W.OS.tell() - Start == sizeof(MachO::linkedit_data_command));
    419 }
    420 
    421 static unsigned ComputeLinkerOptionsLoadCommandSize(
    422   const std::vector<std::string> &Options, bool is64Bit)
    423 {
    424   unsigned Size = sizeof(MachO::linker_option_command);
    425   for (const std::string &Option : Options)
    426     Size += Option.size() + 1;
    427   return alignTo(Size, is64Bit ? 8 : 4);
    428 }
    429 
    430 void MachObjectWriter::writeLinkerOptionsLoadCommand(
    431   const std::vector<std::string> &Options)
    432 {
    433   unsigned Size = ComputeLinkerOptionsLoadCommandSize(Options, is64Bit());
    434   uint64_t Start = W.OS.tell();
    435   (void) Start;
    436 
    437   W.write<uint32_t>(MachO::LC_LINKER_OPTION);
    438   W.write<uint32_t>(Size);
    439   W.write<uint32_t>(Options.size());
    440   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
    441   for (const std::string &Option : Options) {
    442     // Write each string, including the null byte.
    443     W.OS << Option << '\0';
    444     BytesWritten += Option.size() + 1;
    445   }
    446 
    447   // Pad to a multiple of the pointer size.
    448   W.OS.write_zeros(
    449       offsetToAlignment(BytesWritten, is64Bit() ? Align(8) : Align(4)));
    450 
    451   assert(W.OS.tell() - Start == Size);
    452 }
    453 
    454 static bool isFixupTargetValid(const MCValue &Target) {
    455   // Target is (LHS - RHS + cst).
    456   // We don't support the form where LHS is null: -RHS + cst
    457   if (!Target.getSymA() && Target.getSymB())
    458     return false;
    459   return true;
    460 }
    461 
    462 void MachObjectWriter::recordRelocation(MCAssembler &Asm,
    463                                         const MCAsmLayout &Layout,
    464                                         const MCFragment *Fragment,
    465                                         const MCFixup &Fixup, MCValue Target,
    466                                         uint64_t &FixedValue) {
    467   if (!isFixupTargetValid(Target)) {
    468     Asm.getContext().reportError(Fixup.getLoc(),
    469                                  "unsupported relocation expression");
    470     return;
    471   }
    472 
    473   TargetObjectWriter->recordRelocation(this, Asm, Layout, Fragment, Fixup,
    474                                        Target, FixedValue);
    475 }
    476 
    477 void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
    478   // This is the point where 'as' creates actual symbols for indirect symbols
    479   // (in the following two passes). It would be easier for us to do this sooner
    480   // when we see the attribute, but that makes getting the order in the symbol
    481   // table much more complicated than it is worth.
    482   //
    483   // FIXME: Revisit this when the dust settles.
    484 
    485   // Report errors for use of .indirect_symbol not in a symbol pointer section
    486   // or stub section.
    487   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
    488          ie = Asm.indirect_symbol_end(); it != ie; ++it) {
    489     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
    490 
    491     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
    492         Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
    493         Section.getType() != MachO::S_THREAD_LOCAL_VARIABLE_POINTERS &&
    494         Section.getType() != MachO::S_SYMBOL_STUBS) {
    495       MCSymbol &Symbol = *it->Symbol;
    496       report_fatal_error("indirect symbol '" + Symbol.getName() +
    497                          "' not in a symbol pointer or stub section");
    498     }
    499   }
    500 
    501   // Bind non-lazy symbol pointers first.
    502   unsigned IndirectIndex = 0;
    503   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
    504          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
    505     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
    506 
    507     if (Section.getType() != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
    508         Section.getType() !=  MachO::S_THREAD_LOCAL_VARIABLE_POINTERS)
    509       continue;
    510 
    511     // Initialize the section indirect symbol base, if necessary.
    512     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
    513 
    514     Asm.registerSymbol(*it->Symbol);
    515   }
    516 
    517   // Then lazy symbol pointers and symbol stubs.
    518   IndirectIndex = 0;
    519   for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
    520          ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
    521     const MCSectionMachO &Section = cast<MCSectionMachO>(*it->Section);
    522 
    523     if (Section.getType() != MachO::S_LAZY_SYMBOL_POINTERS &&
    524         Section.getType() != MachO::S_SYMBOL_STUBS)
    525       continue;
    526 
    527     // Initialize the section indirect symbol base, if necessary.
    528     IndirectSymBase.insert(std::make_pair(it->Section, IndirectIndex));
    529 
    530     // Set the symbol type to undefined lazy, but only on construction.
    531     //
    532     // FIXME: Do not hardcode.
    533     bool Created;
    534     Asm.registerSymbol(*it->Symbol, &Created);
    535     if (Created)
    536       cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
    537   }
    538 }
    539 
    540 /// computeSymbolTable - Compute the symbol table data
    541 void MachObjectWriter::computeSymbolTable(
    542     MCAssembler &Asm, std::vector<MachSymbolData> &LocalSymbolData,
    543     std::vector<MachSymbolData> &ExternalSymbolData,
    544     std::vector<MachSymbolData> &UndefinedSymbolData) {
    545   // Build section lookup table.
    546   DenseMap<const MCSection*, uint8_t> SectionIndexMap;
    547   unsigned Index = 1;
    548   for (MCAssembler::iterator it = Asm.begin(),
    549          ie = Asm.end(); it != ie; ++it, ++Index)
    550     SectionIndexMap[&*it] = Index;
    551   assert(Index <= 256 && "Too many sections!");
    552 
    553   // Build the string table.
    554   for (const MCSymbol &Symbol : Asm.symbols()) {
    555     if (!Asm.isSymbolLinkerVisible(Symbol))
    556       continue;
    557 
    558     StringTable.add(Symbol.getName());
    559   }
    560   StringTable.finalize();
    561 
    562   // Build the symbol arrays but only for non-local symbols.
    563   //
    564   // The particular order that we collect and then sort the symbols is chosen to
    565   // match 'as'. Even though it doesn't matter for correctness, this is
    566   // important for letting us diff .o files.
    567   for (const MCSymbol &Symbol : Asm.symbols()) {
    568     // Ignore non-linker visible symbols.
    569     if (!Asm.isSymbolLinkerVisible(Symbol))
    570       continue;
    571 
    572     if (!Symbol.isExternal() && !Symbol.isUndefined())
    573       continue;
    574 
    575     MachSymbolData MSD;
    576     MSD.Symbol = &Symbol;
    577     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
    578 
    579     if (Symbol.isUndefined()) {
    580       MSD.SectionIndex = 0;
    581       UndefinedSymbolData.push_back(MSD);
    582     } else if (Symbol.isAbsolute()) {
    583       MSD.SectionIndex = 0;
    584       ExternalSymbolData.push_back(MSD);
    585     } else {
    586       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
    587       assert(MSD.SectionIndex && "Invalid section index!");
    588       ExternalSymbolData.push_back(MSD);
    589     }
    590   }
    591 
    592   // Now add the data for local symbols.
    593   for (const MCSymbol &Symbol : Asm.symbols()) {
    594     // Ignore non-linker visible symbols.
    595     if (!Asm.isSymbolLinkerVisible(Symbol))
    596       continue;
    597 
    598     if (Symbol.isExternal() || Symbol.isUndefined())
    599       continue;
    600 
    601     MachSymbolData MSD;
    602     MSD.Symbol = &Symbol;
    603     MSD.StringIndex = StringTable.getOffset(Symbol.getName());
    604 
    605     if (Symbol.isAbsolute()) {
    606       MSD.SectionIndex = 0;
    607       LocalSymbolData.push_back(MSD);
    608     } else {
    609       MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
    610       assert(MSD.SectionIndex && "Invalid section index!");
    611       LocalSymbolData.push_back(MSD);
    612     }
    613   }
    614 
    615   // External and undefined symbols are required to be in lexicographic order.
    616   llvm::sort(ExternalSymbolData);
    617   llvm::sort(UndefinedSymbolData);
    618 
    619   // Set the symbol indices.
    620   Index = 0;
    621   for (auto *SymbolData :
    622        {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
    623     for (MachSymbolData &Entry : *SymbolData)
    624       Entry.Symbol->setIndex(Index++);
    625 
    626   for (const MCSection &Section : Asm) {
    627     for (RelAndSymbol &Rel : Relocations[&Section]) {
    628       if (!Rel.Sym)
    629         continue;
    630 
    631       // Set the Index and the IsExtern bit.
    632       unsigned Index = Rel.Sym->getIndex();
    633       assert(isInt<24>(Index));
    634       if (W.Endian == support::little)
    635         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & (~0U << 24)) | Index | (1 << 27);
    636       else
    637         Rel.MRE.r_word1 = (Rel.MRE.r_word1 & 0xff) | Index << 8 | (1 << 4);
    638     }
    639   }
    640 }
    641 
    642 void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
    643                                                const MCAsmLayout &Layout) {
    644   uint64_t StartAddress = 0;
    645   for (const MCSection *Sec : Layout.getSectionOrder()) {
    646     StartAddress = alignTo(StartAddress, Sec->getAlignment());
    647     SectionAddress[Sec] = StartAddress;
    648     StartAddress += Layout.getSectionAddressSize(Sec);
    649 
    650     // Explicitly pad the section to match the alignment requirements of the
    651     // following one. This is for 'gas' compatibility, it shouldn't
    652     /// strictly be necessary.
    653     StartAddress += getPaddingSize(Sec, Layout);
    654   }
    655 }
    656 
    657 void MachObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
    658                                                 const MCAsmLayout &Layout) {
    659   computeSectionAddresses(Asm, Layout);
    660 
    661   // Create symbol data for any indirect symbols.
    662   bindIndirectSymbols(Asm);
    663 }
    664 
    665 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
    666     const MCAssembler &Asm, const MCSymbol &A, const MCSymbol &B,
    667     bool InSet) const {
    668   // FIXME: We don't handle things like
    669   // foo = .
    670   // creating atoms.
    671   if (A.isVariable() || B.isVariable())
    672     return false;
    673   return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, A, B,
    674                                                                 InSet);
    675 }
    676 
    677 bool MachObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
    678     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
    679     bool InSet, bool IsPCRel) const {
    680   if (InSet)
    681     return true;
    682 
    683   // The effective address is
    684   //     addr(atom(A)) + offset(A)
    685   //   - addr(atom(B)) - offset(B)
    686   // and the offsets are not relocatable, so the fixup is fully resolved when
    687   //  addr(atom(A)) - addr(atom(B)) == 0.
    688   const MCSymbol &SA = findAliasedSymbol(SymA);
    689   const MCSection &SecA = SA.getSection();
    690   const MCSection &SecB = *FB.getParent();
    691 
    692   if (IsPCRel) {
    693     // The simple (Darwin, except on x86_64) way of dealing with this was to
    694     // assume that any reference to a temporary symbol *must* be a temporary
    695     // symbol in the same atom, unless the sections differ. Therefore, any PCrel
    696     // relocation to a temporary symbol (in the same section) is fully
    697     // resolved. This also works in conjunction with absolutized .set, which
    698     // requires the compiler to use .set to absolutize the differences between
    699     // symbols which the compiler knows to be assembly time constants, so we
    700     // don't need to worry about considering symbol differences fully resolved.
    701     //
    702     // If the file isn't using sub-sections-via-symbols, we can make the
    703     // same assumptions about any symbol that we normally make about
    704     // assembler locals.
    705 
    706     bool hasReliableSymbolDifference = isX86_64();
    707     if (!hasReliableSymbolDifference) {
    708       if (!SA.isInSection() || &SecA != &SecB ||
    709           (!SA.isTemporary() && FB.getAtom() != SA.getFragment()->getAtom() &&
    710            Asm.getSubsectionsViaSymbols()))
    711         return false;
    712       return true;
    713     }
    714     // For Darwin x86_64, there is one special case when the reference IsPCRel.
    715     // If the fragment with the reference does not have a base symbol but meets
    716     // the simple way of dealing with this, in that it is a temporary symbol in
    717     // the same atom then it is assumed to be fully resolved.  This is needed so
    718     // a relocation entry is not created and so the static linker does not
    719     // mess up the reference later.
    720     else if(!FB.getAtom() &&
    721             SA.isTemporary() && SA.isInSection() && &SecA == &SecB){
    722       return true;
    723     }
    724   }
    725 
    726   // If they are not in the same section, we can't compute the diff.
    727   if (&SecA != &SecB)
    728     return false;
    729 
    730   const MCFragment *FA = SA.getFragment();
    731 
    732   // Bail if the symbol has no fragment.
    733   if (!FA)
    734     return false;
    735 
    736   // If the atoms are the same, they are guaranteed to have the same address.
    737   if (FA->getAtom() == FB.getAtom())
    738     return true;
    739 
    740   // Otherwise, we can't prove this is fully resolved.
    741   return false;
    742 }
    743 
    744 static MachO::LoadCommandType getLCFromMCVM(MCVersionMinType Type) {
    745   switch (Type) {
    746   case MCVM_OSXVersionMin:     return MachO::LC_VERSION_MIN_MACOSX;
    747   case MCVM_IOSVersionMin:     return MachO::LC_VERSION_MIN_IPHONEOS;
    748   case MCVM_TvOSVersionMin:    return MachO::LC_VERSION_MIN_TVOS;
    749   case MCVM_WatchOSVersionMin: return MachO::LC_VERSION_MIN_WATCHOS;
    750   }
    751   llvm_unreachable("Invalid mc version min type");
    752 }
    753 
    754 uint64_t MachObjectWriter::writeObject(MCAssembler &Asm,
    755                                        const MCAsmLayout &Layout) {
    756   uint64_t StartOffset = W.OS.tell();
    757 
    758   // Compute symbol table information and bind symbol indices.
    759   computeSymbolTable(Asm, LocalSymbolData, ExternalSymbolData,
    760                      UndefinedSymbolData);
    761 
    762   unsigned NumSections = Asm.size();
    763   const MCAssembler::VersionInfoType &VersionInfo =
    764     Layout.getAssembler().getVersionInfo();
    765 
    766   // The section data starts after the header, the segment load command (and
    767   // section headers) and the symbol table.
    768   unsigned NumLoadCommands = 1;
    769   uint64_t LoadCommandsSize = is64Bit() ?
    770     sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64):
    771     sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section);
    772 
    773   // Add the deployment target version info load command size, if used.
    774   if (VersionInfo.Major != 0) {
    775     ++NumLoadCommands;
    776     if (VersionInfo.EmitBuildVersion)
    777       LoadCommandsSize += sizeof(MachO::build_version_command);
    778     else
    779       LoadCommandsSize += sizeof(MachO::version_min_command);
    780   }
    781 
    782   // Add the data-in-code load command size, if used.
    783   unsigned NumDataRegions = Asm.getDataRegions().size();
    784   if (NumDataRegions) {
    785     ++NumLoadCommands;
    786     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
    787   }
    788 
    789   // Add the loh load command size, if used.
    790   uint64_t LOHRawSize = Asm.getLOHContainer().getEmitSize(*this, Layout);
    791   uint64_t LOHSize = alignTo(LOHRawSize, is64Bit() ? 8 : 4);
    792   if (LOHSize) {
    793     ++NumLoadCommands;
    794     LoadCommandsSize += sizeof(MachO::linkedit_data_command);
    795   }
    796 
    797   // Add the symbol table load command sizes, if used.
    798   unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
    799     UndefinedSymbolData.size();
    800   if (NumSymbols) {
    801     NumLoadCommands += 2;
    802     LoadCommandsSize += (sizeof(MachO::symtab_command) +
    803                          sizeof(MachO::dysymtab_command));
    804   }
    805 
    806   // Add the linker option load commands sizes.
    807   for (const auto &Option : Asm.getLinkerOptions()) {
    808     ++NumLoadCommands;
    809     LoadCommandsSize += ComputeLinkerOptionsLoadCommandSize(Option, is64Bit());
    810   }
    811 
    812   // Compute the total size of the section data, as well as its file size and vm
    813   // size.
    814   uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) :
    815                                sizeof(MachO::mach_header)) + LoadCommandsSize;
    816   uint64_t SectionDataSize = 0;
    817   uint64_t SectionDataFileSize = 0;
    818   uint64_t VMSize = 0;
    819   for (const MCSection &Sec : Asm) {
    820     uint64_t Address = getSectionAddress(&Sec);
    821     uint64_t Size = Layout.getSectionAddressSize(&Sec);
    822     uint64_t FileSize = Layout.getSectionFileSize(&Sec);
    823     FileSize += getPaddingSize(&Sec, Layout);
    824 
    825     VMSize = std::max(VMSize, Address + Size);
    826 
    827     if (Sec.isVirtualSection())
    828       continue;
    829 
    830     SectionDataSize = std::max(SectionDataSize, Address + Size);
    831     SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
    832   }
    833 
    834   // The section data is padded to pointer size bytes.
    835   //
    836   // FIXME: Is this machine dependent?
    837   unsigned SectionDataPadding =
    838       offsetToAlignment(SectionDataFileSize, is64Bit() ? Align(8) : Align(4));
    839   SectionDataFileSize += SectionDataPadding;
    840 
    841   // Write the prolog, starting with the header and load command...
    842   writeHeader(MachO::MH_OBJECT, NumLoadCommands, LoadCommandsSize,
    843               Asm.getSubsectionsViaSymbols());
    844   uint32_t Prot =
    845       MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE;
    846   writeSegmentLoadCommand("", NumSections, 0, VMSize, SectionDataStart,
    847                           SectionDataSize, Prot, Prot);
    848 
    849   // ... and then the section headers.
    850   uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
    851   for (const MCSection &Section : Asm) {
    852     const auto &Sec = cast<MCSectionMachO>(Section);
    853     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
    854     unsigned NumRelocs = Relocs.size();
    855     uint64_t SectionStart = SectionDataStart + getSectionAddress(&Sec);
    856     unsigned Flags = Sec.getTypeAndAttributes();
    857     if (Sec.hasInstructions())
    858       Flags |= MachO::S_ATTR_SOME_INSTRUCTIONS;
    859     writeSection(Layout, Sec, getSectionAddress(&Sec), SectionStart, Flags,
    860                  RelocTableEnd, NumRelocs);
    861     RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info);
    862   }
    863 
    864   // Write out the deployment target information, if it's available.
    865   if (VersionInfo.Major != 0) {
    866     auto EncodeVersion = [](VersionTuple V) -> uint32_t {
    867       assert(!V.empty() && "empty version");
    868       unsigned Update = V.getSubminor() ? *V.getSubminor() : 0;
    869       unsigned Minor = V.getMinor() ? *V.getMinor() : 0;
    870       assert(Update < 256 && "unencodable update target version");
    871       assert(Minor < 256 && "unencodable minor target version");
    872       assert(V.getMajor() < 65536 && "unencodable major target version");
    873       return Update | (Minor << 8) | (V.getMajor() << 16);
    874     };
    875     uint32_t EncodedVersion = EncodeVersion(
    876         VersionTuple(VersionInfo.Major, VersionInfo.Minor, VersionInfo.Update));
    877     uint32_t SDKVersion = !VersionInfo.SDKVersion.empty()
    878                               ? EncodeVersion(VersionInfo.SDKVersion)
    879                               : 0;
    880     if (VersionInfo.EmitBuildVersion) {
    881       // FIXME: Currently empty tools. Add clang version in the future.
    882       W.write<uint32_t>(MachO::LC_BUILD_VERSION);
    883       W.write<uint32_t>(sizeof(MachO::build_version_command));
    884       W.write<uint32_t>(VersionInfo.TypeOrPlatform.Platform);
    885       W.write<uint32_t>(EncodedVersion);
    886       W.write<uint32_t>(SDKVersion);
    887       W.write<uint32_t>(0);         // Empty tools list.
    888     } else {
    889       MachO::LoadCommandType LCType
    890         = getLCFromMCVM(VersionInfo.TypeOrPlatform.Type);
    891       W.write<uint32_t>(LCType);
    892       W.write<uint32_t>(sizeof(MachO::version_min_command));
    893       W.write<uint32_t>(EncodedVersion);
    894       W.write<uint32_t>(SDKVersion);
    895     }
    896   }
    897 
    898   // Write the data-in-code load command, if used.
    899   uint64_t DataInCodeTableEnd = RelocTableEnd + NumDataRegions * 8;
    900   if (NumDataRegions) {
    901     uint64_t DataRegionsOffset = RelocTableEnd;
    902     uint64_t DataRegionsSize = NumDataRegions * 8;
    903     writeLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset,
    904                              DataRegionsSize);
    905   }
    906 
    907   // Write the loh load command, if used.
    908   uint64_t LOHTableEnd = DataInCodeTableEnd + LOHSize;
    909   if (LOHSize)
    910     writeLinkeditLoadCommand(MachO::LC_LINKER_OPTIMIZATION_HINT,
    911                              DataInCodeTableEnd, LOHSize);
    912 
    913   // Write the symbol table load command, if used.
    914   if (NumSymbols) {
    915     unsigned FirstLocalSymbol = 0;
    916     unsigned NumLocalSymbols = LocalSymbolData.size();
    917     unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
    918     unsigned NumExternalSymbols = ExternalSymbolData.size();
    919     unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
    920     unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
    921     unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
    922     unsigned NumSymTabSymbols =
    923       NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
    924     uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
    925     uint64_t IndirectSymbolOffset = 0;
    926 
    927     // If used, the indirect symbols are written after the section data.
    928     if (NumIndirectSymbols)
    929       IndirectSymbolOffset = LOHTableEnd;
    930 
    931     // The symbol table is written after the indirect symbol data.
    932     uint64_t SymbolTableOffset = LOHTableEnd + IndirectSymbolSize;
    933 
    934     // The string table is written after symbol table.
    935     uint64_t StringTableOffset =
    936       SymbolTableOffset + NumSymTabSymbols * (is64Bit() ?
    937                                               sizeof(MachO::nlist_64) :
    938                                               sizeof(MachO::nlist));
    939     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
    940                            StringTableOffset, StringTable.getSize());
    941 
    942     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
    943                              FirstExternalSymbol, NumExternalSymbols,
    944                              FirstUndefinedSymbol, NumUndefinedSymbols,
    945                              IndirectSymbolOffset, NumIndirectSymbols);
    946   }
    947 
    948   // Write the linker options load commands.
    949   for (const auto &Option : Asm.getLinkerOptions())
    950     writeLinkerOptionsLoadCommand(Option);
    951 
    952   // Write the actual section data.
    953   for (const MCSection &Sec : Asm) {
    954     Asm.writeSectionData(W.OS, &Sec, Layout);
    955 
    956     uint64_t Pad = getPaddingSize(&Sec, Layout);
    957     W.OS.write_zeros(Pad);
    958   }
    959 
    960   // Write the extra padding.
    961   W.OS.write_zeros(SectionDataPadding);
    962 
    963   // Write the relocation entries.
    964   for (const MCSection &Sec : Asm) {
    965     // Write the section relocation entries, in reverse order to match 'as'
    966     // (approximately, the exact algorithm is more complicated than this).
    967     std::vector<RelAndSymbol> &Relocs = Relocations[&Sec];
    968     for (const RelAndSymbol &Rel : make_range(Relocs.rbegin(), Relocs.rend())) {
    969       W.write<uint32_t>(Rel.MRE.r_word0);
    970       W.write<uint32_t>(Rel.MRE.r_word1);
    971     }
    972   }
    973 
    974   // Write out the data-in-code region payload, if there is one.
    975   for (MCAssembler::const_data_region_iterator
    976          it = Asm.data_region_begin(), ie = Asm.data_region_end();
    977          it != ie; ++it) {
    978     const DataRegionData *Data = &(*it);
    979     uint64_t Start = getSymbolAddress(*Data->Start, Layout);
    980     uint64_t End;
    981     if (Data->End)
    982       End = getSymbolAddress(*Data->End, Layout);
    983     else
    984       report_fatal_error("Data region not terminated");
    985 
    986     LLVM_DEBUG(dbgs() << "data in code region-- kind: " << Data->Kind
    987                       << "  start: " << Start << "(" << Data->Start->getName()
    988                       << ")"
    989                       << "  end: " << End << "(" << Data->End->getName() << ")"
    990                       << "  size: " << End - Start << "\n");
    991     W.write<uint32_t>(Start);
    992     W.write<uint16_t>(End - Start);
    993     W.write<uint16_t>(Data->Kind);
    994   }
    995 
    996   // Write out the loh commands, if there is one.
    997   if (LOHSize) {
    998 #ifndef NDEBUG
    999     unsigned Start = W.OS.tell();
   1000 #endif
   1001     Asm.getLOHContainer().emit(*this, Layout);
   1002     // Pad to a multiple of the pointer size.
   1003     W.OS.write_zeros(
   1004         offsetToAlignment(LOHRawSize, is64Bit() ? Align(8) : Align(4)));
   1005     assert(W.OS.tell() - Start == LOHSize);
   1006   }
   1007 
   1008   // Write the symbol table data, if used.
   1009   if (NumSymbols) {
   1010     // Write the indirect symbol entries.
   1011     for (MCAssembler::const_indirect_symbol_iterator
   1012            it = Asm.indirect_symbol_begin(),
   1013            ie = Asm.indirect_symbol_end(); it != ie; ++it) {
   1014       // Indirect symbols in the non-lazy symbol pointer section have some
   1015       // special handling.
   1016       const MCSectionMachO &Section =
   1017           static_cast<const MCSectionMachO &>(*it->Section);
   1018       if (Section.getType() == MachO::S_NON_LAZY_SYMBOL_POINTERS) {
   1019         // If this symbol is defined and internal, mark it as such.
   1020         if (it->Symbol->isDefined() && !it->Symbol->isExternal()) {
   1021           uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL;
   1022           if (it->Symbol->isAbsolute())
   1023             Flags |= MachO::INDIRECT_SYMBOL_ABS;
   1024           W.write<uint32_t>(Flags);
   1025           continue;
   1026         }
   1027       }
   1028 
   1029       W.write<uint32_t>(it->Symbol->getIndex());
   1030     }
   1031 
   1032     // FIXME: Check that offsets match computed ones.
   1033 
   1034     // Write the symbol table entries.
   1035     for (auto *SymbolData :
   1036          {&LocalSymbolData, &ExternalSymbolData, &UndefinedSymbolData})
   1037       for (MachSymbolData &Entry : *SymbolData)
   1038         writeNlist(Entry, Layout);
   1039 
   1040     // Write the string table.
   1041     StringTable.write(W.OS);
   1042   }
   1043 
   1044   return W.OS.tell() - StartOffset;
   1045 }
   1046 
   1047 std::unique_ptr<MCObjectWriter>
   1048 llvm::createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
   1049                              raw_pwrite_stream &OS, bool IsLittleEndian) {
   1050   return std::make_unique<MachObjectWriter>(std::move(MOTW), OS,
   1051                                              IsLittleEndian);
   1052 }
   1053