Home | History | Annotate | Line # | Download | only in Bitcode
      1 //===- llvm/Bitcode/BitcodeReader.h - Bitcode reader ------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This header defines interfaces to read LLVM bitcode files/streams.
     10 //
     11 //===----------------------------------------------------------------------===//
     12 
     13 #ifndef LLVM_BITCODE_BITCODEREADER_H
     14 #define LLVM_BITCODE_BITCODEREADER_H
     15 
     16 #include "llvm/ADT/ArrayRef.h"
     17 #include "llvm/ADT/StringRef.h"
     18 #include "llvm/Bitstream/BitCodes.h"
     19 #include "llvm/IR/ModuleSummaryIndex.h"
     20 #include "llvm/Support/Endian.h"
     21 #include "llvm/Support/Error.h"
     22 #include "llvm/Support/ErrorOr.h"
     23 #include "llvm/Support/MemoryBuffer.h"
     24 #include <cstdint>
     25 #include <memory>
     26 #include <string>
     27 #include <system_error>
     28 #include <vector>
     29 namespace llvm {
     30 
     31 class LLVMContext;
     32 class Module;
     33 
     34 typedef llvm::function_ref<Optional<std::string>(StringRef)>
     35     DataLayoutCallbackTy;
     36 
     37   // These functions are for converting Expected/Error values to
     38   // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
     39   // Remove these functions once no longer needed by the C and libLTO APIs.
     40 
     41   std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
     42 
     43   template <typename T>
     44   ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
     45     if (!Val)
     46       return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
     47     return std::move(*Val);
     48   }
     49 
     50   struct BitcodeFileContents;
     51 
     52   /// Basic information extracted from a bitcode module to be used for LTO.
     53   struct BitcodeLTOInfo {
     54     bool IsThinLTO;
     55     bool HasSummary;
     56     bool EnableSplitLTOUnit;
     57   };
     58 
     59   /// Represents a module in a bitcode file.
     60   class BitcodeModule {
     61     // This covers the identification (if present) and module blocks.
     62     ArrayRef<uint8_t> Buffer;
     63     StringRef ModuleIdentifier;
     64 
     65     // The string table used to interpret this module.
     66     StringRef Strtab;
     67 
     68     // The bitstream location of the IDENTIFICATION_BLOCK.
     69     uint64_t IdentificationBit;
     70 
     71     // The bitstream location of this module's MODULE_BLOCK.
     72     uint64_t ModuleBit;
     73 
     74     BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
     75                   uint64_t IdentificationBit, uint64_t ModuleBit)
     76         : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
     77           IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
     78 
     79     // Calls the ctor.
     80     friend Expected<BitcodeFileContents>
     81     getBitcodeFileContents(MemoryBufferRef Buffer);
     82 
     83     Expected<std::unique_ptr<Module>>
     84     getModuleImpl(LLVMContext &Context, bool MaterializeAll,
     85                   bool ShouldLazyLoadMetadata, bool IsImporting,
     86                   DataLayoutCallbackTy DataLayoutCallback);
     87 
     88   public:
     89     StringRef getBuffer() const {
     90       return StringRef((const char *)Buffer.begin(), Buffer.size());
     91     }
     92 
     93     StringRef getStrtab() const { return Strtab; }
     94 
     95     StringRef getModuleIdentifier() const { return ModuleIdentifier; }
     96 
     97     /// Read the bitcode module and prepare for lazy deserialization of function
     98     /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
     99     /// If IsImporting is true, this module is being parsed for ThinLTO
    100     /// importing into another module.
    101     Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
    102                                                     bool ShouldLazyLoadMetadata,
    103                                                     bool IsImporting);
    104 
    105     /// Read the entire bitcode module and return it.
    106     Expected<std::unique_ptr<Module>> parseModule(
    107         LLVMContext &Context, DataLayoutCallbackTy DataLayoutCallback =
    108                                   [](StringRef) { return None; });
    109 
    110     /// Returns information about the module to be used for LTO: whether to
    111     /// compile with ThinLTO, and whether it has a summary.
    112     Expected<BitcodeLTOInfo> getLTOInfo();
    113 
    114     /// Parse the specified bitcode buffer, returning the module summary index.
    115     Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
    116 
    117     /// Parse the specified bitcode buffer and merge its module summary index
    118     /// into CombinedIndex.
    119     Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
    120                       uint64_t ModuleId);
    121   };
    122 
    123   struct BitcodeFileContents {
    124     std::vector<BitcodeModule> Mods;
    125     StringRef Symtab, StrtabForSymtab;
    126   };
    127 
    128   /// Returns the contents of a bitcode file. This includes the raw contents of
    129   /// the symbol table embedded in the bitcode file. Clients which require a
    130   /// symbol table should prefer to use irsymtab::read instead of this function
    131   /// because it creates a reader for the irsymtab and handles upgrading bitcode
    132   /// files without a symbol table or with an old symbol table.
    133   Expected<BitcodeFileContents> getBitcodeFileContents(MemoryBufferRef Buffer);
    134 
    135   /// Returns a list of modules in the specified bitcode buffer.
    136   Expected<std::vector<BitcodeModule>>
    137   getBitcodeModuleList(MemoryBufferRef Buffer);
    138 
    139   /// Read the header of the specified bitcode buffer and prepare for lazy
    140   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
    141   /// lazily load metadata as well. If IsImporting is true, this module is
    142   /// being parsed for ThinLTO importing into another module.
    143   Expected<std::unique_ptr<Module>>
    144   getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
    145                        bool ShouldLazyLoadMetadata = false,
    146                        bool IsImporting = false);
    147 
    148   /// Like getLazyBitcodeModule, except that the module takes ownership of
    149   /// the memory buffer if successful. If successful, this moves Buffer. On
    150   /// error, this *does not* move Buffer. If IsImporting is true, this module is
    151   /// being parsed for ThinLTO importing into another module.
    152   Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
    153       std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
    154       bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
    155 
    156   /// Read the header of the specified bitcode buffer and extract just the
    157   /// triple information. If successful, this returns a string. On error, this
    158   /// returns "".
    159   Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
    160 
    161   /// Return true if \p Buffer contains a bitcode file with ObjC code (category
    162   /// or class) in it.
    163   Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
    164 
    165   /// Read the header of the specified bitcode buffer and extract just the
    166   /// producer string information. If successful, this returns a string. On
    167   /// error, this returns "".
    168   Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
    169 
    170   /// Read the specified bitcode file, returning the module.
    171   Expected<std::unique_ptr<Module>> parseBitcodeFile(
    172       MemoryBufferRef Buffer, LLVMContext &Context,
    173       DataLayoutCallbackTy DataLayoutCallback = [](StringRef) {
    174         return None;
    175       });
    176 
    177   /// Returns LTO information for the specified bitcode file.
    178   Expected<BitcodeLTOInfo> getBitcodeLTOInfo(MemoryBufferRef Buffer);
    179 
    180   /// Parse the specified bitcode buffer, returning the module summary index.
    181   Expected<std::unique_ptr<ModuleSummaryIndex>>
    182   getModuleSummaryIndex(MemoryBufferRef Buffer);
    183 
    184   /// Parse the specified bitcode buffer and merge the index into CombinedIndex.
    185   Error readModuleSummaryIndex(MemoryBufferRef Buffer,
    186                                ModuleSummaryIndex &CombinedIndex,
    187                                uint64_t ModuleId);
    188 
    189   /// Parse the module summary index out of an IR file and return the module
    190   /// summary index object if found, or an empty summary if not. If Path refers
    191   /// to an empty file and IgnoreEmptyThinLTOIndexFile is true, then
    192   /// this function will return nullptr.
    193   Expected<std::unique_ptr<ModuleSummaryIndex>>
    194   getModuleSummaryIndexForFile(StringRef Path,
    195                                bool IgnoreEmptyThinLTOIndexFile = false);
    196 
    197   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
    198   /// for an LLVM IR bitcode wrapper.
    199   inline bool isBitcodeWrapper(const unsigned char *BufPtr,
    200                                const unsigned char *BufEnd) {
    201     // See if you can find the hidden message in the magic bytes :-).
    202     // (Hint: it's a little-endian encoding.)
    203     return BufPtr != BufEnd &&
    204            BufPtr[0] == 0xDE &&
    205            BufPtr[1] == 0xC0 &&
    206            BufPtr[2] == 0x17 &&
    207            BufPtr[3] == 0x0B;
    208   }
    209 
    210   /// isRawBitcode - Return true if the given bytes are the magic bytes for
    211   /// raw LLVM IR bitcode (without a wrapper).
    212   inline bool isRawBitcode(const unsigned char *BufPtr,
    213                            const unsigned char *BufEnd) {
    214     // These bytes sort of have a hidden message, but it's not in
    215     // little-endian this time, and it's a little redundant.
    216     return BufPtr != BufEnd &&
    217            BufPtr[0] == 'B' &&
    218            BufPtr[1] == 'C' &&
    219            BufPtr[2] == 0xc0 &&
    220            BufPtr[3] == 0xde;
    221   }
    222 
    223   /// isBitcode - Return true if the given bytes are the magic bytes for
    224   /// LLVM IR bitcode, either with or without a wrapper.
    225   inline bool isBitcode(const unsigned char *BufPtr,
    226                         const unsigned char *BufEnd) {
    227     return isBitcodeWrapper(BufPtr, BufEnd) ||
    228            isRawBitcode(BufPtr, BufEnd);
    229   }
    230 
    231   /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special
    232   /// header for padding or other reasons.  The format of this header is:
    233   ///
    234   /// struct bc_header {
    235   ///   uint32_t Magic;         // 0x0B17C0DE
    236   ///   uint32_t Version;       // Version, currently always 0.
    237   ///   uint32_t BitcodeOffset; // Offset to traditional bitcode file.
    238   ///   uint32_t BitcodeSize;   // Size of traditional bitcode file.
    239   ///   ... potentially other gunk ...
    240   /// };
    241   ///
    242   /// This function is called when we find a file with a matching magic number.
    243   /// In this case, skip down to the subsection of the file that is actually a
    244   /// BC file.
    245   /// If 'VerifyBufferSize' is true, check that the buffer is large enough to
    246   /// contain the whole bitcode file.
    247   inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr,
    248                                        const unsigned char *&BufEnd,
    249                                        bool VerifyBufferSize) {
    250     // Must contain the offset and size field!
    251     if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4)
    252       return true;
    253 
    254     unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]);
    255     unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]);
    256     uint64_t BitcodeOffsetEnd = (uint64_t)Offset + (uint64_t)Size;
    257 
    258     // Verify that Offset+Size fits in the file.
    259     if (VerifyBufferSize && BitcodeOffsetEnd > uint64_t(BufEnd-BufPtr))
    260       return true;
    261     BufPtr += Offset;
    262     BufEnd = BufPtr+Size;
    263     return false;
    264   }
    265 
    266   APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits);
    267 
    268   const std::error_category &BitcodeErrorCategory();
    269   enum class BitcodeError { CorruptedBitcode = 1 };
    270   inline std::error_code make_error_code(BitcodeError E) {
    271     return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
    272   }
    273 
    274 } // end namespace llvm
    275 
    276 namespace std {
    277 
    278 template <> struct is_error_code_enum<llvm::BitcodeError> : std::true_type {};
    279 
    280 } // end namespace std
    281 
    282 #endif // LLVM_BITCODE_BITCODEREADER_H
    283