Home | History | Annotate | Line # | Download | only in Lex
      1 //===- DependencyDirectivesSourceMinimizer.cpp -  -------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 ///
      9 /// \file
     10 /// This is the implementation for minimizing header and source files to the
     11 /// minimum necessary preprocessor directives for evaluating includes. It
     12 /// reduces the source down to #define, #include, #import, @import, and any
     13 /// conditional preprocessor logic that contains one of those.
     14 ///
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
     18 #include "clang/Basic/CharInfo.h"
     19 #include "clang/Basic/Diagnostic.h"
     20 #include "clang/Lex/LexDiagnostic.h"
     21 #include "llvm/ADT/StringMap.h"
     22 #include "llvm/ADT/StringSwitch.h"
     23 #include "llvm/Support/MemoryBuffer.h"
     24 
     25 using namespace llvm;
     26 using namespace clang;
     27 using namespace clang::minimize_source_to_dependency_directives;
     28 
     29 namespace {
     30 
     31 struct Minimizer {
     32   /// Minimized output.
     33   SmallVectorImpl<char> &Out;
     34   /// The known tokens encountered during the minimization.
     35   SmallVectorImpl<Token> &Tokens;
     36 
     37   Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
     38             StringRef Input, DiagnosticsEngine *Diags,
     39             SourceLocation InputSourceLoc)
     40       : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
     41         InputSourceLoc(InputSourceLoc) {}
     42 
     43   /// Lex the provided source and emit the minimized output.
     44   ///
     45   /// \returns True on error.
     46   bool minimize();
     47 
     48 private:
     49   struct IdInfo {
     50     const char *Last;
     51     StringRef Name;
     52   };
     53 
     54   /// Lex an identifier.
     55   ///
     56   /// \pre First points at a valid identifier head.
     57   LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
     58   LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
     59                                        const char *const End);
     60   LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
     61   LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
     62   LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
     63   LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
     64   LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
     65   LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
     66   LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
     67   LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
     68                                  const char *&First, const char *const End);
     69   Token &makeToken(TokenKind K) {
     70     Tokens.emplace_back(K, Out.size());
     71     return Tokens.back();
     72   }
     73   void popToken() {
     74     Out.resize(Tokens.back().Offset);
     75     Tokens.pop_back();
     76   }
     77   TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
     78 
     79   Minimizer &put(char Byte) {
     80     Out.push_back(Byte);
     81     return *this;
     82   }
     83   Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
     84   Minimizer &append(const char *First, const char *Last) {
     85     Out.append(First, Last);
     86     return *this;
     87   }
     88 
     89   void printToNewline(const char *&First, const char *const End);
     90   void printAdjacentModuleNameParts(const char *&First, const char *const End);
     91   LLVM_NODISCARD bool printAtImportBody(const char *&First,
     92                                         const char *const End);
     93   void printDirectiveBody(const char *&First, const char *const End);
     94   void printAdjacentMacroArgs(const char *&First, const char *const End);
     95   LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
     96 
     97   /// Reports a diagnostic if the diagnostic engine is provided. Always returns
     98   /// true at the end.
     99   bool reportError(const char *CurPtr, unsigned Err);
    100 
    101   StringMap<char> SplitIds;
    102   StringRef Input;
    103   DiagnosticsEngine *Diags;
    104   SourceLocation InputSourceLoc;
    105 };
    106 
    107 } // end anonymous namespace
    108 
    109 bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
    110   if (!Diags)
    111     return true;
    112   assert(CurPtr >= Input.data() && "invalid buffer ptr");
    113   Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
    114   return true;
    115 }
    116 
    117 static void skipOverSpaces(const char *&First, const char *const End) {
    118   while (First != End && isHorizontalWhitespace(*First))
    119     ++First;
    120 }
    121 
    122 LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
    123                                               const char *Current) {
    124   assert(First <= Current);
    125 
    126   // Check if we can even back up.
    127   if (*Current != '"' || First == Current)
    128     return false;
    129 
    130   // Check for an "R".
    131   --Current;
    132   if (*Current != 'R')
    133     return false;
    134   if (First == Current || !isIdentifierBody(*--Current))
    135     return true;
    136 
    137   // Check for a prefix of "u", "U", or "L".
    138   if (*Current == 'u' || *Current == 'U' || *Current == 'L')
    139     return First == Current || !isIdentifierBody(*--Current);
    140 
    141   // Check for a prefix of "u8".
    142   if (*Current != '8' || First == Current || *Current-- != 'u')
    143     return false;
    144   return First == Current || !isIdentifierBody(*--Current);
    145 }
    146 
    147 static void skipRawString(const char *&First, const char *const End) {
    148   assert(First[0] == '"');
    149   assert(First[-1] == 'R');
    150 
    151   const char *Last = ++First;
    152   while (Last != End && *Last != '(')
    153     ++Last;
    154   if (Last == End) {
    155     First = Last; // Hit the end... just give up.
    156     return;
    157   }
    158 
    159   StringRef Terminator(First, Last - First);
    160   for (;;) {
    161     // Move First to just past the next ")".
    162     First = Last;
    163     while (First != End && *First != ')')
    164       ++First;
    165     if (First == End)
    166       return;
    167     ++First;
    168 
    169     // Look ahead for the terminator sequence.
    170     Last = First;
    171     while (Last != End && size_t(Last - First) < Terminator.size() &&
    172            Terminator[Last - First] == *Last)
    173       ++Last;
    174 
    175     // Check if we hit it (or the end of the file).
    176     if (Last == End) {
    177       First = Last;
    178       return;
    179     }
    180     if (size_t(Last - First) < Terminator.size())
    181       continue;
    182     if (*Last != '"')
    183       continue;
    184     First = Last + 1;
    185     return;
    186   }
    187 }
    188 
    189 // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
    190 static unsigned isEOL(const char *First, const char *const End) {
    191   if (First == End)
    192     return 0;
    193   if (End - First > 1 && isVerticalWhitespace(First[0]) &&
    194       isVerticalWhitespace(First[1]) && First[0] != First[1])
    195     return 2;
    196   return !!isVerticalWhitespace(First[0]);
    197 }
    198 
    199 static void skipString(const char *&First, const char *const End) {
    200   assert(*First == '\'' || *First == '"' || *First == '<');
    201   const char Terminator = *First == '<' ? '>' : *First;
    202   for (++First; First != End && *First != Terminator; ++First) {
    203     // String and character literals don't extend past the end of the line.
    204     if (isVerticalWhitespace(*First))
    205       return;
    206     if (*First != '\\')
    207       continue;
    208     // Skip past backslash to the next character. This ensures that the
    209     // character right after it is skipped as well, which matters if it's
    210     // the terminator.
    211     if (++First == End)
    212       return;
    213     if (!isWhitespace(*First))
    214       continue;
    215     // Whitespace after the backslash might indicate a line continuation.
    216     const char *FirstAfterBackslashPastSpace = First;
    217     skipOverSpaces(FirstAfterBackslashPastSpace, End);
    218     if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
    219       // Advance the character pointer to the next line for the next
    220       // iteration.
    221       First = FirstAfterBackslashPastSpace + NLSize - 1;
    222     }
    223   }
    224   if (First != End)
    225     ++First; // Finish off the string.
    226 }
    227 
    228 // Returns the length of the skipped newline
    229 static unsigned skipNewline(const char *&First, const char *End) {
    230   if (First == End)
    231     return 0;
    232   assert(isVerticalWhitespace(*First));
    233   unsigned Len = isEOL(First, End);
    234   assert(Len && "expected newline");
    235   First += Len;
    236   return Len;
    237 }
    238 
    239 static bool wasLineContinuation(const char *First, unsigned EOLLen) {
    240   return *(First - (int)EOLLen - 1) == '\\';
    241 }
    242 
    243 static void skipToNewlineRaw(const char *&First, const char *const End) {
    244   for (;;) {
    245     if (First == End)
    246       return;
    247 
    248     unsigned Len = isEOL(First, End);
    249     if (Len)
    250       return;
    251 
    252     do {
    253       if (++First == End)
    254         return;
    255       Len = isEOL(First, End);
    256     } while (!Len);
    257 
    258     if (First[-1] != '\\')
    259       return;
    260 
    261     First += Len;
    262     // Keep skipping lines...
    263   }
    264 }
    265 
    266 static const char *findLastNonSpace(const char *First, const char *Last) {
    267   assert(First <= Last);
    268   while (First != Last && isHorizontalWhitespace(Last[-1]))
    269     --Last;
    270   return Last;
    271 }
    272 
    273 static const char *findFirstTrailingSpace(const char *First,
    274                                           const char *Last) {
    275   const char *LastNonSpace = findLastNonSpace(First, Last);
    276   if (Last == LastNonSpace)
    277     return Last;
    278   assert(isHorizontalWhitespace(LastNonSpace[0]));
    279   return LastNonSpace + 1;
    280 }
    281 
    282 static void skipLineComment(const char *&First, const char *const End) {
    283   assert(First[0] == '/' && First[1] == '/');
    284   First += 2;
    285   skipToNewlineRaw(First, End);
    286 }
    287 
    288 static void skipBlockComment(const char *&First, const char *const End) {
    289   assert(First[0] == '/' && First[1] == '*');
    290   if (End - First < 4) {
    291     First = End;
    292     return;
    293   }
    294   for (First += 3; First != End; ++First)
    295     if (First[-1] == '*' && First[0] == '/') {
    296       ++First;
    297       return;
    298     }
    299 }
    300 
    301 /// \returns True if the current single quotation mark character is a C++ 14
    302 /// digit separator.
    303 static bool isQuoteCppDigitSeparator(const char *const Start,
    304                                      const char *const Cur,
    305                                      const char *const End) {
    306   assert(*Cur == '\'' && "expected quotation character");
    307   // skipLine called in places where we don't expect a valid number
    308   // body before `start` on the same line, so always return false at the start.
    309   if (Start == Cur)
    310     return false;
    311   // The previous character must be a valid PP number character.
    312   // Make sure that the L, u, U, u8 prefixes don't get marked as a
    313   // separator though.
    314   char Prev = *(Cur - 1);
    315   if (Prev == 'L' || Prev == 'U' || Prev == 'u')
    316     return false;
    317   if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
    318     return false;
    319   if (!isPreprocessingNumberBody(Prev))
    320     return false;
    321   // The next character should be a valid identifier body character.
    322   return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
    323 }
    324 
    325 static void skipLine(const char *&First, const char *const End) {
    326   for (;;) {
    327     assert(First <= End);
    328     if (First == End)
    329       return;
    330 
    331     if (isVerticalWhitespace(*First)) {
    332       skipNewline(First, End);
    333       return;
    334     }
    335     const char *Start = First;
    336     while (First != End && !isVerticalWhitespace(*First)) {
    337       // Iterate over strings correctly to avoid comments and newlines.
    338       if (*First == '"' ||
    339           (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
    340         if (isRawStringLiteral(Start, First))
    341           skipRawString(First, End);
    342         else
    343           skipString(First, End);
    344         continue;
    345       }
    346 
    347       // Iterate over comments correctly.
    348       if (*First != '/' || End - First < 2) {
    349         ++First;
    350         continue;
    351       }
    352 
    353       if (First[1] == '/') {
    354         // "//...".
    355         skipLineComment(First, End);
    356         continue;
    357       }
    358 
    359       if (First[1] != '*') {
    360         ++First;
    361         continue;
    362       }
    363 
    364       // "/*...*/".
    365       skipBlockComment(First, End);
    366     }
    367     if (First == End)
    368       return;
    369 
    370     // Skip over the newline.
    371     unsigned Len = skipNewline(First, End);
    372     if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
    373       break;
    374   }
    375 }
    376 
    377 static void skipDirective(StringRef Name, const char *&First,
    378                           const char *const End) {
    379   if (llvm::StringSwitch<bool>(Name)
    380           .Case("warning", true)
    381           .Case("error", true)
    382           .Default(false))
    383     // Do not process quotes or comments.
    384     skipToNewlineRaw(First, End);
    385   else
    386     skipLine(First, End);
    387 }
    388 
    389 void Minimizer::printToNewline(const char *&First, const char *const End) {
    390   while (First != End && !isVerticalWhitespace(*First)) {
    391     const char *Last = First;
    392     do {
    393       // Iterate over strings correctly to avoid comments and newlines.
    394       if (*Last == '"' || *Last == '\'' ||
    395           (*Last == '<' && top() == pp_include)) {
    396         if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
    397           skipRawString(Last, End);
    398         else
    399           skipString(Last, End);
    400         continue;
    401       }
    402       if (*Last != '/' || End - Last < 2) {
    403         ++Last;
    404         continue; // Gather the rest up to print verbatim.
    405       }
    406 
    407       if (Last[1] != '/' && Last[1] != '*') {
    408         ++Last;
    409         continue;
    410       }
    411 
    412       // Deal with "//..." and "/*...*/".
    413       append(First, findFirstTrailingSpace(First, Last));
    414       First = Last;
    415 
    416       if (Last[1] == '/') {
    417         skipLineComment(First, End);
    418         return;
    419       }
    420 
    421       put(' ');
    422       skipBlockComment(First, End);
    423       skipOverSpaces(First, End);
    424       Last = First;
    425     } while (Last != End && !isVerticalWhitespace(*Last));
    426 
    427     // Print out the string.
    428     const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
    429     if (Last == End || LastBeforeTrailingSpace == First ||
    430         LastBeforeTrailingSpace[-1] != '\\') {
    431       append(First, LastBeforeTrailingSpace);
    432       First = Last;
    433       skipNewline(First, End);
    434       return;
    435     }
    436 
    437     // Print up to the backslash, backing up over spaces. Preserve at least one
    438     // space, as the space matters when tokens are separated by a line
    439     // continuation.
    440     append(First, findFirstTrailingSpace(
    441                       First, LastBeforeTrailingSpace - 1));
    442 
    443     First = Last;
    444     skipNewline(First, End);
    445     skipOverSpaces(First, End);
    446   }
    447 }
    448 
    449 static void skipWhitespace(const char *&First, const char *const End) {
    450   for (;;) {
    451     assert(First <= End);
    452     skipOverSpaces(First, End);
    453 
    454     if (End - First < 2)
    455       return;
    456 
    457     if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
    458       skipNewline(++First, End);
    459       continue;
    460     }
    461 
    462     // Check for a non-comment character.
    463     if (First[0] != '/')
    464       return;
    465 
    466     // "// ...".
    467     if (First[1] == '/') {
    468       skipLineComment(First, End);
    469       return;
    470     }
    471 
    472     // Cannot be a comment.
    473     if (First[1] != '*')
    474       return;
    475 
    476     // "/*...*/".
    477     skipBlockComment(First, End);
    478   }
    479 }
    480 
    481 void Minimizer::printAdjacentModuleNameParts(const char *&First,
    482                                              const char *const End) {
    483   // Skip over parts of the body.
    484   const char *Last = First;
    485   do
    486     ++Last;
    487   while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
    488   append(First, Last);
    489   First = Last;
    490 }
    491 
    492 bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
    493   for (;;) {
    494     skipWhitespace(First, End);
    495     if (First == End)
    496       return true;
    497 
    498     if (isVerticalWhitespace(*First)) {
    499       skipNewline(First, End);
    500       continue;
    501     }
    502 
    503     // Found a semicolon.
    504     if (*First == ';') {
    505       put(*First++).put('\n');
    506       return false;
    507     }
    508 
    509     // Don't handle macro expansions inside @import for now.
    510     if (!isIdentifierBody(*First) && *First != '.')
    511       return true;
    512 
    513     printAdjacentModuleNameParts(First, End);
    514   }
    515 }
    516 
    517 void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
    518   skipWhitespace(First, End); // Skip initial whitespace.
    519   printToNewline(First, End);
    520   while (Out.back() == ' ')
    521     Out.pop_back();
    522   put('\n');
    523 }
    524 
    525 LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
    526                                                    const char *const End) {
    527   assert(isIdentifierBody(*First) && "invalid identifer");
    528   const char *Last = First + 1;
    529   while (Last != End && isIdentifierBody(*Last))
    530     ++Last;
    531   return Last;
    532 }
    533 
    534 LLVM_NODISCARD static const char *
    535 getIdentifierContinuation(const char *First, const char *const End) {
    536   if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
    537     return nullptr;
    538 
    539   ++First;
    540   skipNewline(First, End);
    541   if (First == End)
    542     return nullptr;
    543   return isIdentifierBody(First[0]) ? First : nullptr;
    544 }
    545 
    546 Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
    547                                            const char *const End) {
    548   const char *Last = lexRawIdentifier(First, End);
    549   const char *Next = getIdentifierContinuation(Last, End);
    550   if (LLVM_LIKELY(!Next))
    551     return IdInfo{Last, StringRef(First, Last - First)};
    552 
    553   // Slow path, where identifiers are split over lines.
    554   SmallVector<char, 64> Id(First, Last);
    555   while (Next) {
    556     Last = lexRawIdentifier(Next, End);
    557     Id.append(Next, Last);
    558     Next = getIdentifierContinuation(Last, End);
    559   }
    560   return IdInfo{
    561       Last,
    562       SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
    563 }
    564 
    565 void Minimizer::printAdjacentMacroArgs(const char *&First,
    566                                        const char *const End) {
    567   // Skip over parts of the body.
    568   const char *Last = First;
    569   do
    570     ++Last;
    571   while (Last != End &&
    572          (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
    573   append(First, Last);
    574   First = Last;
    575 }
    576 
    577 bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
    578   assert(*First == '(');
    579   put(*First++);
    580   for (;;) {
    581     skipWhitespace(First, End);
    582     if (First == End)
    583       return true;
    584 
    585     if (*First == ')') {
    586       put(*First++);
    587       return false;
    588     }
    589 
    590     // This is intentionally fairly liberal.
    591     if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
    592       return true;
    593 
    594     printAdjacentMacroArgs(First, End);
    595   }
    596 }
    597 
    598 /// Looks for an identifier starting from Last.
    599 ///
    600 /// Updates "First" to just past the next identifier, if any.  Returns true iff
    601 /// the identifier matches "Id".
    602 bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
    603                                  const char *const End) {
    604   skipWhitespace(First, End);
    605   if (First == End || !isIdentifierHead(*First))
    606     return false;
    607 
    608   IdInfo FoundId = lexIdentifier(First, End);
    609   First = FoundId.Last;
    610   return FoundId.Name == Id;
    611 }
    612 
    613 bool Minimizer::lexAt(const char *&First, const char *const End) {
    614   // Handle "@import".
    615   const char *ImportLoc = First++;
    616   if (!isNextIdentifier("import", First, End)) {
    617     skipLine(First, End);
    618     return false;
    619   }
    620   makeToken(decl_at_import);
    621   append("@import ");
    622   if (printAtImportBody(First, End))
    623     return reportError(
    624         ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
    625   skipWhitespace(First, End);
    626   if (First == End)
    627     return false;
    628   if (!isVerticalWhitespace(*First))
    629     return reportError(
    630         ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
    631   skipNewline(First, End);
    632   return false;
    633 }
    634 
    635 bool Minimizer::lexModule(const char *&First, const char *const End) {
    636   IdInfo Id = lexIdentifier(First, End);
    637   First = Id.Last;
    638   bool Export = false;
    639   if (Id.Name == "export") {
    640     Export = true;
    641     skipWhitespace(First, End);
    642     if (!isIdentifierBody(*First)) {
    643       skipLine(First, End);
    644       return false;
    645     }
    646     Id = lexIdentifier(First, End);
    647     First = Id.Last;
    648   }
    649 
    650   if (Id.Name != "module" && Id.Name != "import") {
    651     skipLine(First, End);
    652     return false;
    653   }
    654 
    655   skipWhitespace(First, End);
    656 
    657   // Ignore this as a module directive if the next character can't be part of
    658   // an import.
    659 
    660   switch (*First) {
    661   case ':':
    662   case '<':
    663   case '"':
    664     break;
    665   default:
    666     if (!isIdentifierBody(*First)) {
    667       skipLine(First, End);
    668       return false;
    669     }
    670   }
    671 
    672   if (Export) {
    673     makeToken(cxx_export_decl);
    674     append("export ");
    675   }
    676 
    677   if (Id.Name == "module")
    678     makeToken(cxx_module_decl);
    679   else
    680     makeToken(cxx_import_decl);
    681   append(Id.Name);
    682   append(" ");
    683   printToNewline(First, End);
    684   append("\n");
    685   return false;
    686 }
    687 
    688 bool Minimizer::lexDefine(const char *&First, const char *const End) {
    689   makeToken(pp_define);
    690   append("#define ");
    691   skipWhitespace(First, End);
    692 
    693   if (!isIdentifierHead(*First))
    694     return reportError(First, diag::err_pp_macro_not_identifier);
    695 
    696   IdInfo Id = lexIdentifier(First, End);
    697   const char *Last = Id.Last;
    698   append(Id.Name);
    699   if (Last == End)
    700     return false;
    701   if (*Last == '(') {
    702     size_t Size = Out.size();
    703     if (printMacroArgs(Last, End)) {
    704       // Be robust to bad macro arguments, since they can show up in disabled
    705       // code.
    706       Out.resize(Size);
    707       append("(/* invalid */\n");
    708       skipLine(Last, End);
    709       return false;
    710     }
    711   }
    712   skipWhitespace(Last, End);
    713   if (Last == End)
    714     return false;
    715   if (!isVerticalWhitespace(*Last))
    716     put(' ');
    717   printDirectiveBody(Last, End);
    718   First = Last;
    719   return false;
    720 }
    721 
    722 bool Minimizer::lexPragma(const char *&First, const char *const End) {
    723   // #pragma.
    724   skipWhitespace(First, End);
    725   if (First == End || !isIdentifierHead(*First))
    726     return false;
    727 
    728   IdInfo FoundId = lexIdentifier(First, End);
    729   First = FoundId.Last;
    730   if (FoundId.Name == "once") {
    731     // #pragma once
    732     skipLine(First, End);
    733     makeToken(pp_pragma_once);
    734     append("#pragma once\n");
    735     return false;
    736   }
    737 
    738   if (FoundId.Name != "clang") {
    739     skipLine(First, End);
    740     return false;
    741   }
    742 
    743   // #pragma clang.
    744   if (!isNextIdentifier("module", First, End)) {
    745     skipLine(First, End);
    746     return false;
    747   }
    748 
    749   // #pragma clang module.
    750   if (!isNextIdentifier("import", First, End)) {
    751     skipLine(First, End);
    752     return false;
    753   }
    754 
    755   // #pragma clang module import.
    756   makeToken(pp_pragma_import);
    757   append("#pragma clang module import ");
    758   printDirectiveBody(First, End);
    759   return false;
    760 }
    761 
    762 bool Minimizer::lexEndif(const char *&First, const char *const End) {
    763   // Strip out "#else" if it's empty.
    764   if (top() == pp_else)
    765     popToken();
    766 
    767   // If "#ifdef" is empty, strip it and skip the "#endif".
    768   //
    769   // FIXME: Once/if Clang starts disallowing __has_include in macro expansions,
    770   // we can skip empty `#if` and `#elif` blocks as well after scanning for a
    771   // literal __has_include in the condition.  Even without that rule we could
    772   // drop the tokens if we scan for identifiers in the condition and find none.
    773   if (top() == pp_ifdef || top() == pp_ifndef) {
    774     popToken();
    775     skipLine(First, End);
    776     return false;
    777   }
    778 
    779   return lexDefault(pp_endif, "endif", First, End);
    780 }
    781 
    782 bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
    783                            const char *&First, const char *const End) {
    784   makeToken(Kind);
    785   put('#').append(Directive).put(' ');
    786   printDirectiveBody(First, End);
    787   return false;
    788 }
    789 
    790 static bool isStartOfRelevantLine(char First) {
    791   switch (First) {
    792   case '#':
    793   case '@':
    794   case 'i':
    795   case 'e':
    796   case 'm':
    797     return true;
    798   }
    799   return false;
    800 }
    801 
    802 bool Minimizer::lexPPLine(const char *&First, const char *const End) {
    803   assert(First != End);
    804 
    805   skipWhitespace(First, End);
    806   assert(First <= End);
    807   if (First == End)
    808     return false;
    809 
    810   if (!isStartOfRelevantLine(*First)) {
    811     skipLine(First, End);
    812     assert(First <= End);
    813     return false;
    814   }
    815 
    816   // Handle "@import".
    817   if (*First == '@')
    818     return lexAt(First, End);
    819 
    820   if (*First == 'i' || *First == 'e' || *First == 'm')
    821     return lexModule(First, End);
    822 
    823   // Handle preprocessing directives.
    824   ++First; // Skip over '#'.
    825   skipWhitespace(First, End);
    826 
    827   if (First == End)
    828     return reportError(First, diag::err_pp_expected_eol);
    829 
    830   if (!isIdentifierHead(*First)) {
    831     skipLine(First, End);
    832     return false;
    833   }
    834 
    835   // Figure out the token.
    836   IdInfo Id = lexIdentifier(First, End);
    837   First = Id.Last;
    838   auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
    839                   .Case("include", pp_include)
    840                   .Case("__include_macros", pp___include_macros)
    841                   .Case("define", pp_define)
    842                   .Case("undef", pp_undef)
    843                   .Case("import", pp_import)
    844                   .Case("include_next", pp_include_next)
    845                   .Case("if", pp_if)
    846                   .Case("ifdef", pp_ifdef)
    847                   .Case("ifndef", pp_ifndef)
    848                   .Case("elif", pp_elif)
    849                   .Case("else", pp_else)
    850                   .Case("endif", pp_endif)
    851                   .Case("pragma", pp_pragma_import)
    852                   .Default(pp_none);
    853   if (Kind == pp_none) {
    854     skipDirective(Id.Name, First, End);
    855     return false;
    856   }
    857 
    858   if (Kind == pp_endif)
    859     return lexEndif(First, End);
    860 
    861   if (Kind == pp_define)
    862     return lexDefine(First, End);
    863 
    864   if (Kind == pp_pragma_import)
    865     return lexPragma(First, End);
    866 
    867   // Everything else.
    868   return lexDefault(Kind, Id.Name, First, End);
    869 }
    870 
    871 static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
    872   if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
    873       First[2] == '\xbf')
    874     First += 3;
    875 }
    876 
    877 bool Minimizer::minimizeImpl(const char *First, const char *const End) {
    878   skipUTF8ByteOrderMark(First, End);
    879   while (First != End)
    880     if (lexPPLine(First, End))
    881       return true;
    882   return false;
    883 }
    884 
    885 bool Minimizer::minimize() {
    886   bool Error = minimizeImpl(Input.begin(), Input.end());
    887 
    888   if (!Error) {
    889     // Add a trailing newline and an EOF on success.
    890     if (!Out.empty() && Out.back() != '\n')
    891       Out.push_back('\n');
    892     makeToken(pp_eof);
    893   }
    894 
    895   // Null-terminate the output. This way the memory buffer that's passed to
    896   // Clang will not have to worry about the terminating '\0'.
    897   Out.push_back(0);
    898   Out.pop_back();
    899   return Error;
    900 }
    901 
    902 bool clang::minimize_source_to_dependency_directives::computeSkippedRanges(
    903     ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
    904   struct Directive {
    905     enum DirectiveKind {
    906       If,  // if/ifdef/ifndef
    907       Else // elif,else
    908     };
    909     int Offset;
    910     DirectiveKind Kind;
    911   };
    912   llvm::SmallVector<Directive, 32> Offsets;
    913   for (const Token &T : Input) {
    914     switch (T.K) {
    915     case pp_if:
    916     case pp_ifdef:
    917     case pp_ifndef:
    918       Offsets.push_back({T.Offset, Directive::If});
    919       break;
    920 
    921     case pp_elif:
    922     case pp_else: {
    923       if (Offsets.empty())
    924         return true;
    925       int PreviousOffset = Offsets.back().Offset;
    926       Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
    927       Offsets.push_back({T.Offset, Directive::Else});
    928       break;
    929     }
    930 
    931     case pp_endif: {
    932       if (Offsets.empty())
    933         return true;
    934       int PreviousOffset = Offsets.back().Offset;
    935       Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
    936       do {
    937         Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
    938         if (Kind == Directive::If)
    939           break;
    940       } while (!Offsets.empty());
    941       break;
    942     }
    943     default:
    944       break;
    945     }
    946   }
    947   return false;
    948 }
    949 
    950 bool clang::minimizeSourceToDependencyDirectives(
    951     StringRef Input, SmallVectorImpl<char> &Output,
    952     SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
    953     SourceLocation InputSourceLoc) {
    954   Output.clear();
    955   Tokens.clear();
    956   return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
    957 }
    958