Home | History | Annotate | Line # | Download | only in Serialization
      1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 //  This file defines the Module class, which describes a module that has
     10 //  been loaded from an AST file.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H
     15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H
     16 
     17 #include "clang/Basic/FileManager.h"
     18 #include "clang/Basic/Module.h"
     19 #include "clang/Basic/SourceLocation.h"
     20 #include "clang/Serialization/ASTBitCodes.h"
     21 #include "clang/Serialization/ContinuousRangeMap.h"
     22 #include "clang/Serialization/ModuleFileExtension.h"
     23 #include "llvm/ADT/DenseMap.h"
     24 #include "llvm/ADT/PointerIntPair.h"
     25 #include "llvm/ADT/SetVector.h"
     26 #include "llvm/ADT/SmallVector.h"
     27 #include "llvm/ADT/StringRef.h"
     28 #include "llvm/Bitstream/BitstreamReader.h"
     29 #include "llvm/Support/Endian.h"
     30 #include <cassert>
     31 #include <cstdint>
     32 #include <memory>
     33 #include <string>
     34 #include <vector>
     35 
     36 namespace clang {
     37 
     38 namespace serialization {
     39 
     40 /// Specifies the kind of module that has been loaded.
     41 enum ModuleKind {
     42   /// File is an implicitly-loaded module.
     43   MK_ImplicitModule,
     44 
     45   /// File is an explicitly-loaded module.
     46   MK_ExplicitModule,
     47 
     48   /// File is a PCH file treated as such.
     49   MK_PCH,
     50 
     51   /// File is a PCH file treated as the preamble.
     52   MK_Preamble,
     53 
     54   /// File is a PCH file treated as the actual main file.
     55   MK_MainFile,
     56 
     57   /// File is from a prebuilt module path.
     58   MK_PrebuiltModule
     59 };
     60 
     61 /// The input file that has been loaded from this AST file, along with
     62 /// bools indicating whether this was an overridden buffer or if it was
     63 /// out-of-date or not-found.
     64 class InputFile {
     65   enum {
     66     Overridden = 1,
     67     OutOfDate = 2,
     68     NotFound = 3
     69   };
     70   llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val;
     71 
     72 public:
     73   InputFile() = default;
     74 
     75   InputFile(FileEntryRef File, bool isOverridden = false,
     76             bool isOutOfDate = false) {
     77     assert(!(isOverridden && isOutOfDate) &&
     78            "an overridden cannot be out-of-date");
     79     unsigned intVal = 0;
     80     if (isOverridden)
     81       intVal = Overridden;
     82     else if (isOutOfDate)
     83       intVal = OutOfDate;
     84     Val.setPointerAndInt(&File.getMapEntry(), intVal);
     85   }
     86 
     87   static InputFile getNotFound() {
     88     InputFile File;
     89     File.Val.setInt(NotFound);
     90     return File;
     91   }
     92 
     93   OptionalFileEntryRefDegradesToFileEntryPtr getFile() const {
     94     if (auto *P = Val.getPointer())
     95       return FileEntryRef(*P);
     96     return None;
     97   }
     98   bool isOverridden() const { return Val.getInt() == Overridden; }
     99   bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
    100   bool isNotFound() const { return Val.getInt() == NotFound; }
    101 };
    102 
    103 /// Information about a module that has been loaded by the ASTReader.
    104 ///
    105 /// Each instance of the Module class corresponds to a single AST file, which
    106 /// may be a precompiled header, precompiled preamble, a module, or an AST file
    107 /// of some sort loaded as the main file, all of which are specific formulations
    108 /// of the general notion of a "module". A module may depend on any number of
    109 /// other modules.
    110 class ModuleFile {
    111 public:
    112   ModuleFile(ModuleKind Kind, unsigned Generation)
    113       : Kind(Kind), Generation(Generation) {}
    114   ~ModuleFile();
    115 
    116   // === General information ===
    117 
    118   /// The index of this module in the list of modules.
    119   unsigned Index = 0;
    120 
    121   /// The type of this module.
    122   ModuleKind Kind;
    123 
    124   /// The file name of the module file.
    125   std::string FileName;
    126 
    127   /// The name of the module.
    128   std::string ModuleName;
    129 
    130   /// The base directory of the module.
    131   std::string BaseDirectory;
    132 
    133   std::string getTimestampFilename() const {
    134     return FileName + ".timestamp";
    135   }
    136 
    137   /// The original source file name that was used to build the
    138   /// primary AST file, which may have been modified for
    139   /// relocatable-pch support.
    140   std::string OriginalSourceFileName;
    141 
    142   /// The actual original source file name that was used to
    143   /// build this AST file.
    144   std::string ActualOriginalSourceFileName;
    145 
    146   /// The file ID for the original source file that was used to
    147   /// build this AST file.
    148   FileID OriginalSourceFileID;
    149 
    150   /// The directory that the PCH was originally created in. Used to
    151   /// allow resolving headers even after headers+PCH was moved to a new path.
    152   std::string OriginalDir;
    153 
    154   std::string ModuleMapPath;
    155 
    156   /// Whether this precompiled header is a relocatable PCH file.
    157   bool RelocatablePCH = false;
    158 
    159   /// Whether timestamps are included in this module file.
    160   bool HasTimestamps = false;
    161 
    162   /// Whether the top-level module has been read from the AST file.
    163   bool DidReadTopLevelSubmodule = false;
    164 
    165   /// The file entry for the module file.
    166   OptionalFileEntryRefDegradesToFileEntryPtr File;
    167 
    168   /// The signature of the module file, which may be used instead of the size
    169   /// and modification time to identify this particular file.
    170   ASTFileSignature Signature;
    171 
    172   /// The signature of the AST block of the module file, this can be used to
    173   /// unique module files based on AST contents.
    174   ASTFileSignature ASTBlockHash;
    175 
    176   /// Whether this module has been directly imported by the
    177   /// user.
    178   bool DirectlyImported = false;
    179 
    180   /// The generation of which this module file is a part.
    181   unsigned Generation;
    182 
    183   /// The memory buffer that stores the data associated with
    184   /// this AST file, owned by the InMemoryModuleCache.
    185   llvm::MemoryBuffer *Buffer;
    186 
    187   /// The size of this file, in bits.
    188   uint64_t SizeInBits = 0;
    189 
    190   /// The global bit offset (or base) of this module
    191   uint64_t GlobalBitOffset = 0;
    192 
    193   /// The bit offset of the AST block of this module.
    194   uint64_t ASTBlockStartOffset = 0;
    195 
    196   /// The serialized bitstream data for this file.
    197   StringRef Data;
    198 
    199   /// The main bitstream cursor for the main block.
    200   llvm::BitstreamCursor Stream;
    201 
    202   /// The source location where the module was explicitly or implicitly
    203   /// imported in the local translation unit.
    204   ///
    205   /// If module A depends on and imports module B, both modules will have the
    206   /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
    207   /// source location inside module A).
    208   ///
    209   /// WARNING: This is largely useless. It doesn't tell you when a module was
    210   /// made visible, just when the first submodule of that module was imported.
    211   SourceLocation DirectImportLoc;
    212 
    213   /// The source location where this module was first imported.
    214   SourceLocation ImportLoc;
    215 
    216   /// The first source location in this module.
    217   SourceLocation FirstLoc;
    218 
    219   /// The list of extension readers that are attached to this module
    220   /// file.
    221   std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
    222 
    223   /// The module offset map data for this file. If non-empty, the various
    224   /// ContinuousRangeMaps described below have not yet been populated.
    225   StringRef ModuleOffsetMap;
    226 
    227   // === Input Files ===
    228 
    229   /// The cursor to the start of the input-files block.
    230   llvm::BitstreamCursor InputFilesCursor;
    231 
    232   /// Offsets for all of the input file entries in the AST file.
    233   const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
    234 
    235   /// The input files that have been loaded from this AST file.
    236   std::vector<InputFile> InputFilesLoaded;
    237 
    238   // All user input files reside at the index range [0, NumUserInputFiles), and
    239   // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
    240   unsigned NumUserInputFiles = 0;
    241 
    242   /// If non-zero, specifies the time when we last validated input
    243   /// files.  Zero means we never validated them.
    244   ///
    245   /// The time is specified in seconds since the start of the Epoch.
    246   uint64_t InputFilesValidationTimestamp = 0;
    247 
    248   // === Source Locations ===
    249 
    250   /// Cursor used to read source location entries.
    251   llvm::BitstreamCursor SLocEntryCursor;
    252 
    253   /// The bit offset to the start of the SOURCE_MANAGER_BLOCK.
    254   uint64_t SourceManagerBlockStartOffset = 0;
    255 
    256   /// The number of source location entries in this AST file.
    257   unsigned LocalNumSLocEntries = 0;
    258 
    259   /// The base ID in the source manager's view of this module.
    260   int SLocEntryBaseID = 0;
    261 
    262   /// The base offset in the source manager's view of this module.
    263   unsigned SLocEntryBaseOffset = 0;
    264 
    265   /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
    266   /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
    267   uint64_t SLocEntryOffsetsBase = 0;
    268 
    269   /// Offsets for all of the source location entries in the
    270   /// AST file.
    271   const uint32_t *SLocEntryOffsets = nullptr;
    272 
    273   /// SLocEntries that we're going to preload.
    274   SmallVector<uint64_t, 4> PreloadSLocEntries;
    275 
    276   /// Remapping table for source locations in this module.
    277   ContinuousRangeMap<uint32_t, int, 2> SLocRemap;
    278 
    279   // === Identifiers ===
    280 
    281   /// The number of identifiers in this AST file.
    282   unsigned LocalNumIdentifiers = 0;
    283 
    284   /// Offsets into the identifier table data.
    285   ///
    286   /// This array is indexed by the identifier ID (-1), and provides
    287   /// the offset into IdentifierTableData where the string data is
    288   /// stored.
    289   const uint32_t *IdentifierOffsets = nullptr;
    290 
    291   /// Base identifier ID for identifiers local to this module.
    292   serialization::IdentID BaseIdentifierID = 0;
    293 
    294   /// Remapping table for identifier IDs in this module.
    295   ContinuousRangeMap<uint32_t, int, 2> IdentifierRemap;
    296 
    297   /// Actual data for the on-disk hash table of identifiers.
    298   ///
    299   /// This pointer points into a memory buffer, where the on-disk hash
    300   /// table for identifiers actually lives.
    301   const unsigned char *IdentifierTableData = nullptr;
    302 
    303   /// A pointer to an on-disk hash table of opaque type
    304   /// IdentifierHashTable.
    305   void *IdentifierLookupTable = nullptr;
    306 
    307   /// Offsets of identifiers that we're going to preload within
    308   /// IdentifierTableData.
    309   std::vector<unsigned> PreloadIdentifierOffsets;
    310 
    311   // === Macros ===
    312 
    313   /// The cursor to the start of the preprocessor block, which stores
    314   /// all of the macro definitions.
    315   llvm::BitstreamCursor MacroCursor;
    316 
    317   /// The number of macros in this AST file.
    318   unsigned LocalNumMacros = 0;
    319 
    320   /// Base file offset for the offsets in MacroOffsets. Real file offset for
    321   /// the entry is MacroOffsetsBase + MacroOffsets[i].
    322   uint64_t MacroOffsetsBase = 0;
    323 
    324   /// Offsets of macros in the preprocessor block.
    325   ///
    326   /// This array is indexed by the macro ID (-1), and provides
    327   /// the offset into the preprocessor block where macro definitions are
    328   /// stored.
    329   const uint32_t *MacroOffsets = nullptr;
    330 
    331   /// Base macro ID for macros local to this module.
    332   serialization::MacroID BaseMacroID = 0;
    333 
    334   /// Remapping table for macro IDs in this module.
    335   ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
    336 
    337   /// The offset of the start of the set of defined macros.
    338   uint64_t MacroStartOffset = 0;
    339 
    340   // === Detailed PreprocessingRecord ===
    341 
    342   /// The cursor to the start of the (optional) detailed preprocessing
    343   /// record block.
    344   llvm::BitstreamCursor PreprocessorDetailCursor;
    345 
    346   /// The offset of the start of the preprocessor detail cursor.
    347   uint64_t PreprocessorDetailStartOffset = 0;
    348 
    349   /// Base preprocessed entity ID for preprocessed entities local to
    350   /// this module.
    351   serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
    352 
    353   /// Remapping table for preprocessed entity IDs in this module.
    354   ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
    355 
    356   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
    357   unsigned NumPreprocessedEntities = 0;
    358 
    359   /// Base ID for preprocessed skipped ranges local to this module.
    360   unsigned BasePreprocessedSkippedRangeID = 0;
    361 
    362   const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
    363   unsigned NumPreprocessedSkippedRanges = 0;
    364 
    365   // === Header search information ===
    366 
    367   /// The number of local HeaderFileInfo structures.
    368   unsigned LocalNumHeaderFileInfos = 0;
    369 
    370   /// Actual data for the on-disk hash table of header file
    371   /// information.
    372   ///
    373   /// This pointer points into a memory buffer, where the on-disk hash
    374   /// table for header file information actually lives.
    375   const char *HeaderFileInfoTableData = nullptr;
    376 
    377   /// The on-disk hash table that contains information about each of
    378   /// the header files.
    379   void *HeaderFileInfoTable = nullptr;
    380 
    381   // === Submodule information ===
    382 
    383   /// The number of submodules in this module.
    384   unsigned LocalNumSubmodules = 0;
    385 
    386   /// Base submodule ID for submodules local to this module.
    387   serialization::SubmoduleID BaseSubmoduleID = 0;
    388 
    389   /// Remapping table for submodule IDs in this module.
    390   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
    391 
    392   // === Selectors ===
    393 
    394   /// The number of selectors new to this file.
    395   ///
    396   /// This is the number of entries in SelectorOffsets.
    397   unsigned LocalNumSelectors = 0;
    398 
    399   /// Offsets into the selector lookup table's data array
    400   /// where each selector resides.
    401   const uint32_t *SelectorOffsets = nullptr;
    402 
    403   /// Base selector ID for selectors local to this module.
    404   serialization::SelectorID BaseSelectorID = 0;
    405 
    406   /// Remapping table for selector IDs in this module.
    407   ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
    408 
    409   /// A pointer to the character data that comprises the selector table
    410   ///
    411   /// The SelectorOffsets table refers into this memory.
    412   const unsigned char *SelectorLookupTableData = nullptr;
    413 
    414   /// A pointer to an on-disk hash table of opaque type
    415   /// ASTSelectorLookupTable.
    416   ///
    417   /// This hash table provides the IDs of all selectors, and the associated
    418   /// instance and factory methods.
    419   void *SelectorLookupTable = nullptr;
    420 
    421   // === Declarations ===
    422 
    423   /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block.
    424   /// It has read all the abbreviations at the start of the block and is ready
    425   /// to jump around with these in context.
    426   llvm::BitstreamCursor DeclsCursor;
    427 
    428   /// The offset to the start of the DECLTYPES_BLOCK block.
    429   uint64_t DeclsBlockStartOffset = 0;
    430 
    431   /// The number of declarations in this AST file.
    432   unsigned LocalNumDecls = 0;
    433 
    434   /// Offset of each declaration within the bitstream, indexed
    435   /// by the declaration ID (-1).
    436   const DeclOffset *DeclOffsets = nullptr;
    437 
    438   /// Base declaration ID for declarations local to this module.
    439   serialization::DeclID BaseDeclID = 0;
    440 
    441   /// Remapping table for declaration IDs in this module.
    442   ContinuousRangeMap<uint32_t, int, 2> DeclRemap;
    443 
    444   /// Mapping from the module files that this module file depends on
    445   /// to the base declaration ID for that module as it is understood within this
    446   /// module.
    447   ///
    448   /// This is effectively a reverse global-to-local mapping for declaration
    449   /// IDs, so that we can interpret a true global ID (for this translation unit)
    450   /// as a local ID (for this module file).
    451   llvm::DenseMap<ModuleFile *, serialization::DeclID> GlobalToLocalDeclIDs;
    452 
    453   /// Array of file-level DeclIDs sorted by file.
    454   const serialization::DeclID *FileSortedDecls = nullptr;
    455   unsigned NumFileSortedDecls = 0;
    456 
    457   /// Array of category list location information within this
    458   /// module file, sorted by the definition ID.
    459   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
    460 
    461   /// The number of redeclaration info entries in ObjCCategoriesMap.
    462   unsigned LocalNumObjCCategoriesInMap = 0;
    463 
    464   /// The Objective-C category lists for categories known to this
    465   /// module.
    466   SmallVector<uint64_t, 1> ObjCCategories;
    467 
    468   // === Types ===
    469 
    470   /// The number of types in this AST file.
    471   unsigned LocalNumTypes = 0;
    472 
    473   /// Offset of each type within the bitstream, indexed by the
    474   /// type ID, or the representation of a Type*.
    475   const UnderalignedInt64 *TypeOffsets = nullptr;
    476 
    477   /// Base type ID for types local to this module as represented in
    478   /// the global type ID space.
    479   serialization::TypeID BaseTypeIndex = 0;
    480 
    481   /// Remapping table for type IDs in this module.
    482   ContinuousRangeMap<uint32_t, int, 2> TypeRemap;
    483 
    484   // === Miscellaneous ===
    485 
    486   /// Diagnostic IDs and their mappings that the user changed.
    487   SmallVector<uint64_t, 8> PragmaDiagMappings;
    488 
    489   /// List of modules which depend on this module
    490   llvm::SetVector<ModuleFile *> ImportedBy;
    491 
    492   /// List of modules which this module depends on
    493   llvm::SetVector<ModuleFile *> Imports;
    494 
    495   /// Determine whether this module was directly imported at
    496   /// any point during translation.
    497   bool isDirectlyImported() const { return DirectlyImported; }
    498 
    499   /// Is this a module file for a module (rather than a PCH or similar).
    500   bool isModule() const {
    501     return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
    502            Kind == MK_PrebuiltModule;
    503   }
    504 
    505   /// Dump debugging output for this module.
    506   void dump();
    507 };
    508 
    509 } // namespace serialization
    510 
    511 } // namespace clang
    512 
    513 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H
    514