Home | History | Annotate | Line # | Download | only in AST
      1      1.1  joerg //===--- CommentLexer.cpp -------------------------------------------------===//
      2      1.1  joerg //
      3      1.1  joerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4      1.1  joerg // See https://llvm.org/LICENSE.txt for license information.
      5      1.1  joerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6      1.1  joerg //
      7      1.1  joerg //===----------------------------------------------------------------------===//
      8      1.1  joerg 
      9      1.1  joerg #include "clang/AST/CommentLexer.h"
     10      1.1  joerg #include "clang/AST/CommentCommandTraits.h"
     11      1.1  joerg #include "clang/AST/CommentDiagnostic.h"
     12      1.1  joerg #include "clang/Basic/CharInfo.h"
     13      1.1  joerg #include "llvm/ADT/StringExtras.h"
     14      1.1  joerg #include "llvm/ADT/StringSwitch.h"
     15      1.1  joerg #include "llvm/Support/ConvertUTF.h"
     16      1.1  joerg #include "llvm/Support/ErrorHandling.h"
     17      1.1  joerg 
     18      1.1  joerg namespace clang {
     19      1.1  joerg namespace comments {
     20      1.1  joerg 
     21      1.1  joerg void Token::dump(const Lexer &L, const SourceManager &SM) const {
     22      1.1  joerg   llvm::errs() << "comments::Token Kind=" << Kind << " ";
     23      1.1  joerg   Loc.print(llvm::errs(), SM);
     24      1.1  joerg   llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
     25      1.1  joerg }
     26      1.1  joerg 
     27      1.1  joerg static inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
     28      1.1  joerg   return isLetter(C);
     29      1.1  joerg }
     30      1.1  joerg 
     31      1.1  joerg static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
     32      1.1  joerg   return isDigit(C);
     33      1.1  joerg }
     34      1.1  joerg 
     35      1.1  joerg static inline bool isHTMLHexCharacterReferenceCharacter(char C) {
     36      1.1  joerg   return isHexDigit(C);
     37      1.1  joerg }
     38      1.1  joerg 
     39      1.1  joerg static inline StringRef convertCodePointToUTF8(
     40      1.1  joerg                                       llvm::BumpPtrAllocator &Allocator,
     41      1.1  joerg                                       unsigned CodePoint) {
     42      1.1  joerg   char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
     43      1.1  joerg   char *ResolvedPtr = Resolved;
     44      1.1  joerg   if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
     45      1.1  joerg     return StringRef(Resolved, ResolvedPtr - Resolved);
     46      1.1  joerg   else
     47      1.1  joerg     return StringRef();
     48      1.1  joerg }
     49      1.1  joerg 
     50      1.1  joerg namespace {
     51      1.1  joerg 
     52      1.1  joerg #include "clang/AST/CommentHTMLTags.inc"
     53      1.1  joerg #include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
     54      1.1  joerg 
     55      1.1  joerg } // end anonymous namespace
     56      1.1  joerg 
     57      1.1  joerg StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
     58      1.1  joerg   // Fast path, first check a few most widely used named character references.
     59      1.1  joerg   return llvm::StringSwitch<StringRef>(Name)
     60      1.1  joerg       .Case("amp", "&")
     61      1.1  joerg       .Case("lt", "<")
     62      1.1  joerg       .Case("gt", ">")
     63      1.1  joerg       .Case("quot", "\"")
     64      1.1  joerg       .Case("apos", "\'")
     65      1.1  joerg       // Slow path.
     66      1.1  joerg       .Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
     67      1.1  joerg }
     68      1.1  joerg 
     69      1.1  joerg StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
     70      1.1  joerg   unsigned CodePoint = 0;
     71      1.1  joerg   for (unsigned i = 0, e = Name.size(); i != e; ++i) {
     72      1.1  joerg     assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
     73      1.1  joerg     CodePoint *= 10;
     74      1.1  joerg     CodePoint += Name[i] - '0';
     75      1.1  joerg   }
     76      1.1  joerg   return convertCodePointToUTF8(Allocator, CodePoint);
     77      1.1  joerg }
     78      1.1  joerg 
     79      1.1  joerg StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
     80      1.1  joerg   unsigned CodePoint = 0;
     81      1.1  joerg   for (unsigned i = 0, e = Name.size(); i != e; ++i) {
     82      1.1  joerg     CodePoint *= 16;
     83      1.1  joerg     const char C = Name[i];
     84      1.1  joerg     assert(isHTMLHexCharacterReferenceCharacter(C));
     85      1.1  joerg     CodePoint += llvm::hexDigitValue(C);
     86      1.1  joerg   }
     87      1.1  joerg   return convertCodePointToUTF8(Allocator, CodePoint);
     88      1.1  joerg }
     89      1.1  joerg 
     90      1.1  joerg void Lexer::skipLineStartingDecorations() {
     91      1.1  joerg   // This function should be called only for C comments
     92      1.1  joerg   assert(CommentState == LCS_InsideCComment);
     93      1.1  joerg 
     94      1.1  joerg   if (BufferPtr == CommentEnd)
     95      1.1  joerg     return;
     96      1.1  joerg 
     97      1.1  joerg   switch (*BufferPtr) {
     98      1.1  joerg   case ' ':
     99      1.1  joerg   case '\t':
    100      1.1  joerg   case '\f':
    101      1.1  joerg   case '\v': {
    102      1.1  joerg     const char *NewBufferPtr = BufferPtr;
    103      1.1  joerg     NewBufferPtr++;
    104      1.1  joerg     if (NewBufferPtr == CommentEnd)
    105      1.1  joerg       return;
    106      1.1  joerg 
    107      1.1  joerg     char C = *NewBufferPtr;
    108      1.1  joerg     while (isHorizontalWhitespace(C)) {
    109      1.1  joerg       NewBufferPtr++;
    110      1.1  joerg       if (NewBufferPtr == CommentEnd)
    111      1.1  joerg         return;
    112      1.1  joerg       C = *NewBufferPtr;
    113      1.1  joerg     }
    114      1.1  joerg     if (C == '*')
    115      1.1  joerg       BufferPtr = NewBufferPtr + 1;
    116      1.1  joerg     break;
    117      1.1  joerg   }
    118      1.1  joerg   case '*':
    119      1.1  joerg     BufferPtr++;
    120      1.1  joerg     break;
    121      1.1  joerg   }
    122      1.1  joerg }
    123      1.1  joerg 
    124      1.1  joerg namespace {
    125      1.1  joerg /// Returns pointer to the first newline character in the string.
    126      1.1  joerg const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
    127      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    128      1.1  joerg     if (isVerticalWhitespace(*BufferPtr))
    129      1.1  joerg       return BufferPtr;
    130      1.1  joerg   }
    131      1.1  joerg   return BufferEnd;
    132      1.1  joerg }
    133      1.1  joerg 
    134      1.1  joerg const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
    135      1.1  joerg   if (BufferPtr == BufferEnd)
    136      1.1  joerg     return BufferPtr;
    137      1.1  joerg 
    138      1.1  joerg   if (*BufferPtr == '\n')
    139      1.1  joerg     BufferPtr++;
    140      1.1  joerg   else {
    141      1.1  joerg     assert(*BufferPtr == '\r');
    142      1.1  joerg     BufferPtr++;
    143      1.1  joerg     if (BufferPtr != BufferEnd && *BufferPtr == '\n')
    144      1.1  joerg       BufferPtr++;
    145      1.1  joerg   }
    146      1.1  joerg   return BufferPtr;
    147      1.1  joerg }
    148      1.1  joerg 
    149      1.1  joerg const char *skipNamedCharacterReference(const char *BufferPtr,
    150      1.1  joerg                                         const char *BufferEnd) {
    151      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    152      1.1  joerg     if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
    153      1.1  joerg       return BufferPtr;
    154      1.1  joerg   }
    155      1.1  joerg   return BufferEnd;
    156      1.1  joerg }
    157      1.1  joerg 
    158      1.1  joerg const char *skipDecimalCharacterReference(const char *BufferPtr,
    159      1.1  joerg                                           const char *BufferEnd) {
    160      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    161      1.1  joerg     if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
    162      1.1  joerg       return BufferPtr;
    163      1.1  joerg   }
    164      1.1  joerg   return BufferEnd;
    165      1.1  joerg }
    166      1.1  joerg 
    167      1.1  joerg const char *skipHexCharacterReference(const char *BufferPtr,
    168      1.1  joerg                                       const char *BufferEnd) {
    169      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    170      1.1  joerg     if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
    171      1.1  joerg       return BufferPtr;
    172      1.1  joerg   }
    173      1.1  joerg   return BufferEnd;
    174      1.1  joerg }
    175      1.1  joerg 
    176      1.1  joerg bool isHTMLIdentifierStartingCharacter(char C) {
    177      1.1  joerg   return isLetter(C);
    178      1.1  joerg }
    179      1.1  joerg 
    180      1.1  joerg bool isHTMLIdentifierCharacter(char C) {
    181      1.1  joerg   return isAlphanumeric(C);
    182      1.1  joerg }
    183      1.1  joerg 
    184      1.1  joerg const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
    185      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    186      1.1  joerg     if (!isHTMLIdentifierCharacter(*BufferPtr))
    187      1.1  joerg       return BufferPtr;
    188      1.1  joerg   }
    189      1.1  joerg   return BufferEnd;
    190      1.1  joerg }
    191      1.1  joerg 
    192      1.1  joerg /// Skip HTML string quoted in single or double quotes.  Escaping quotes inside
    193      1.1  joerg /// string allowed.
    194      1.1  joerg ///
    195      1.1  joerg /// Returns pointer to closing quote.
    196      1.1  joerg const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
    197      1.1  joerg {
    198      1.1  joerg   const char Quote = *BufferPtr;
    199      1.1  joerg   assert(Quote == '\"' || Quote == '\'');
    200      1.1  joerg 
    201      1.1  joerg   BufferPtr++;
    202      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    203      1.1  joerg     const char C = *BufferPtr;
    204      1.1  joerg     if (C == Quote && BufferPtr[-1] != '\\')
    205      1.1  joerg       return BufferPtr;
    206      1.1  joerg   }
    207      1.1  joerg   return BufferEnd;
    208      1.1  joerg }
    209      1.1  joerg 
    210      1.1  joerg const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
    211      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    212      1.1  joerg     if (!isWhitespace(*BufferPtr))
    213      1.1  joerg       return BufferPtr;
    214      1.1  joerg   }
    215      1.1  joerg   return BufferEnd;
    216      1.1  joerg }
    217      1.1  joerg 
    218      1.1  joerg bool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
    219      1.1  joerg   return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
    220      1.1  joerg }
    221      1.1  joerg 
    222      1.1  joerg bool isCommandNameStartCharacter(char C) {
    223      1.1  joerg   return isLetter(C);
    224      1.1  joerg }
    225      1.1  joerg 
    226      1.1  joerg bool isCommandNameCharacter(char C) {
    227      1.1  joerg   return isAlphanumeric(C);
    228      1.1  joerg }
    229      1.1  joerg 
    230      1.1  joerg const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
    231      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    232      1.1  joerg     if (!isCommandNameCharacter(*BufferPtr))
    233      1.1  joerg       return BufferPtr;
    234      1.1  joerg   }
    235      1.1  joerg   return BufferEnd;
    236      1.1  joerg }
    237      1.1  joerg 
    238      1.1  joerg /// Return the one past end pointer for BCPL comments.
    239      1.1  joerg /// Handles newlines escaped with backslash or trigraph for backslahs.
    240      1.1  joerg const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
    241      1.1  joerg   const char *CurPtr = BufferPtr;
    242      1.1  joerg   while (CurPtr != BufferEnd) {
    243      1.1  joerg     while (!isVerticalWhitespace(*CurPtr)) {
    244      1.1  joerg       CurPtr++;
    245      1.1  joerg       if (CurPtr == BufferEnd)
    246      1.1  joerg         return BufferEnd;
    247      1.1  joerg     }
    248      1.1  joerg     // We found a newline, check if it is escaped.
    249      1.1  joerg     const char *EscapePtr = CurPtr - 1;
    250      1.1  joerg     while(isHorizontalWhitespace(*EscapePtr))
    251      1.1  joerg       EscapePtr--;
    252      1.1  joerg 
    253      1.1  joerg     if (*EscapePtr == '\\' ||
    254      1.1  joerg         (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
    255      1.1  joerg          EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
    256      1.1  joerg       // We found an escaped newline.
    257      1.1  joerg       CurPtr = skipNewline(CurPtr, BufferEnd);
    258      1.1  joerg     } else
    259      1.1  joerg       return CurPtr; // Not an escaped newline.
    260      1.1  joerg   }
    261      1.1  joerg   return BufferEnd;
    262      1.1  joerg }
    263      1.1  joerg 
    264      1.1  joerg /// Return the one past end pointer for C comments.
    265      1.1  joerg /// Very dumb, does not handle escaped newlines or trigraphs.
    266      1.1  joerg const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
    267      1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    268      1.1  joerg     if (*BufferPtr == '*') {
    269      1.1  joerg       assert(BufferPtr + 1 != BufferEnd);
    270      1.1  joerg       if (*(BufferPtr + 1) == '/')
    271      1.1  joerg         return BufferPtr;
    272      1.1  joerg     }
    273      1.1  joerg   }
    274      1.1  joerg   llvm_unreachable("buffer end hit before '*/' was seen");
    275      1.1  joerg }
    276      1.1  joerg 
    277      1.1  joerg } // end anonymous namespace
    278      1.1  joerg 
    279      1.1  joerg void Lexer::formTokenWithChars(Token &Result, const char *TokEnd,
    280      1.1  joerg                                tok::TokenKind Kind) {
    281      1.1  joerg   const unsigned TokLen = TokEnd - BufferPtr;
    282      1.1  joerg   Result.setLocation(getSourceLocation(BufferPtr));
    283      1.1  joerg   Result.setKind(Kind);
    284      1.1  joerg   Result.setLength(TokLen);
    285      1.1  joerg #ifndef NDEBUG
    286      1.1  joerg   Result.TextPtr = "<UNSET>";
    287      1.1  joerg   Result.IntVal = 7;
    288      1.1  joerg #endif
    289      1.1  joerg   BufferPtr = TokEnd;
    290      1.1  joerg }
    291      1.1  joerg 
    292      1.1  joerg void Lexer::lexCommentText(Token &T) {
    293      1.1  joerg   assert(CommentState == LCS_InsideBCPLComment ||
    294      1.1  joerg          CommentState == LCS_InsideCComment);
    295      1.1  joerg 
    296      1.1  joerg   // Handles lexing non-command text, i.e. text and newline.
    297      1.1  joerg   auto HandleNonCommandToken = [&]() -> void {
    298      1.1  joerg     assert(State == LS_Normal);
    299      1.1  joerg 
    300      1.1  joerg     const char *TokenPtr = BufferPtr;
    301      1.1  joerg     assert(TokenPtr < CommentEnd);
    302      1.1  joerg     switch (*TokenPtr) {
    303      1.1  joerg       case '\n':
    304      1.1  joerg       case '\r':
    305      1.1  joerg           TokenPtr = skipNewline(TokenPtr, CommentEnd);
    306      1.1  joerg           formTokenWithChars(T, TokenPtr, tok::newline);
    307      1.1  joerg 
    308      1.1  joerg           if (CommentState == LCS_InsideCComment)
    309      1.1  joerg             skipLineStartingDecorations();
    310      1.1  joerg           return;
    311      1.1  joerg 
    312      1.1  joerg       default: {
    313      1.1  joerg           StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r";
    314      1.1  joerg           size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr)
    315      1.1  joerg                            .find_first_of(TokStartSymbols);
    316      1.1  joerg           if (End != StringRef::npos)
    317      1.1  joerg             TokenPtr += End;
    318      1.1  joerg           else
    319      1.1  joerg             TokenPtr = CommentEnd;
    320      1.1  joerg           formTextToken(T, TokenPtr);
    321      1.1  joerg           return;
    322      1.1  joerg       }
    323      1.1  joerg     }
    324      1.1  joerg   };
    325      1.1  joerg 
    326      1.1  joerg   if (!ParseCommands)
    327      1.1  joerg     return HandleNonCommandToken();
    328      1.1  joerg 
    329      1.1  joerg   switch (State) {
    330      1.1  joerg   case LS_Normal:
    331      1.1  joerg     break;
    332      1.1  joerg   case LS_VerbatimBlockFirstLine:
    333      1.1  joerg     lexVerbatimBlockFirstLine(T);
    334      1.1  joerg     return;
    335      1.1  joerg   case LS_VerbatimBlockBody:
    336      1.1  joerg     lexVerbatimBlockBody(T);
    337      1.1  joerg     return;
    338      1.1  joerg   case LS_VerbatimLineText:
    339      1.1  joerg     lexVerbatimLineText(T);
    340      1.1  joerg     return;
    341      1.1  joerg   case LS_HTMLStartTag:
    342      1.1  joerg     lexHTMLStartTag(T);
    343      1.1  joerg     return;
    344      1.1  joerg   case LS_HTMLEndTag:
    345      1.1  joerg     lexHTMLEndTag(T);
    346      1.1  joerg     return;
    347      1.1  joerg   }
    348      1.1  joerg 
    349      1.1  joerg   assert(State == LS_Normal);
    350      1.1  joerg   const char *TokenPtr = BufferPtr;
    351      1.1  joerg   assert(TokenPtr < CommentEnd);
    352      1.1  joerg   switch(*TokenPtr) {
    353      1.1  joerg     case '\\':
    354      1.1  joerg     case '@': {
    355      1.1  joerg       // Commands that start with a backslash and commands that start with
    356      1.1  joerg       // 'at' have equivalent semantics.  But we keep information about the
    357      1.1  joerg       // exact syntax in AST for comments.
    358      1.1  joerg       tok::TokenKind CommandKind =
    359      1.1  joerg           (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
    360      1.1  joerg       TokenPtr++;
    361      1.1  joerg       if (TokenPtr == CommentEnd) {
    362      1.1  joerg         formTextToken(T, TokenPtr);
    363      1.1  joerg         return;
    364      1.1  joerg       }
    365      1.1  joerg       char C = *TokenPtr;
    366      1.1  joerg       switch (C) {
    367      1.1  joerg       default:
    368      1.1  joerg         break;
    369      1.1  joerg 
    370      1.1  joerg       case '\\': case '@': case '&': case '$':
    371      1.1  joerg       case '#':  case '<': case '>': case '%':
    372      1.1  joerg       case '\"': case '.': case ':':
    373      1.1  joerg         // This is one of \\ \@ \& \$ etc escape sequences.
    374      1.1  joerg         TokenPtr++;
    375      1.1  joerg         if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
    376      1.1  joerg           // This is the \:: escape sequence.
    377      1.1  joerg           TokenPtr++;
    378      1.1  joerg         }
    379      1.1  joerg         StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
    380      1.1  joerg         formTokenWithChars(T, TokenPtr, tok::text);
    381      1.1  joerg         T.setText(UnescapedText);
    382      1.1  joerg         return;
    383      1.1  joerg       }
    384      1.1  joerg 
    385      1.1  joerg       // Don't make zero-length commands.
    386      1.1  joerg       if (!isCommandNameStartCharacter(*TokenPtr)) {
    387      1.1  joerg         formTextToken(T, TokenPtr);
    388      1.1  joerg         return;
    389      1.1  joerg       }
    390      1.1  joerg 
    391      1.1  joerg       TokenPtr = skipCommandName(TokenPtr, CommentEnd);
    392      1.1  joerg       unsigned Length = TokenPtr - (BufferPtr + 1);
    393      1.1  joerg 
    394      1.1  joerg       // Hardcoded support for lexing LaTeX formula commands
    395      1.1  joerg       // \f$ \f[ \f] \f{ \f} as a single command.
    396      1.1  joerg       if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
    397      1.1  joerg         C = *TokenPtr;
    398      1.1  joerg         if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
    399      1.1  joerg           TokenPtr++;
    400      1.1  joerg           Length++;
    401      1.1  joerg         }
    402      1.1  joerg       }
    403      1.1  joerg 
    404      1.1  joerg       StringRef CommandName(BufferPtr + 1, Length);
    405      1.1  joerg 
    406      1.1  joerg       const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
    407      1.1  joerg       if (!Info) {
    408      1.1  joerg         if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) {
    409      1.1  joerg           StringRef CorrectedName = Info->Name;
    410      1.1  joerg           SourceLocation Loc = getSourceLocation(BufferPtr);
    411      1.1  joerg           SourceLocation EndLoc = getSourceLocation(TokenPtr);
    412      1.1  joerg           SourceRange FullRange = SourceRange(Loc, EndLoc);
    413      1.1  joerg           SourceRange CommandRange(Loc.getLocWithOffset(1), EndLoc);
    414      1.1  joerg           Diag(Loc, diag::warn_correct_comment_command_name)
    415      1.1  joerg             << FullRange << CommandName << CorrectedName
    416      1.1  joerg             << FixItHint::CreateReplacement(CommandRange, CorrectedName);
    417      1.1  joerg         } else {
    418      1.1  joerg           formTokenWithChars(T, TokenPtr, tok::unknown_command);
    419      1.1  joerg           T.setUnknownCommandName(CommandName);
    420      1.1  joerg           Diag(T.getLocation(), diag::warn_unknown_comment_command_name)
    421      1.1  joerg               << SourceRange(T.getLocation(), T.getEndLocation());
    422      1.1  joerg           return;
    423      1.1  joerg         }
    424      1.1  joerg       }
    425      1.1  joerg       if (Info->IsVerbatimBlockCommand) {
    426      1.1  joerg         setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
    427      1.1  joerg         return;
    428      1.1  joerg       }
    429      1.1  joerg       if (Info->IsVerbatimLineCommand) {
    430      1.1  joerg         setupAndLexVerbatimLine(T, TokenPtr, Info);
    431      1.1  joerg         return;
    432      1.1  joerg       }
    433      1.1  joerg       formTokenWithChars(T, TokenPtr, CommandKind);
    434      1.1  joerg       T.setCommandID(Info->getID());
    435      1.1  joerg       return;
    436      1.1  joerg     }
    437      1.1  joerg 
    438      1.1  joerg     case '&':
    439      1.1  joerg       lexHTMLCharacterReference(T);
    440      1.1  joerg       return;
    441      1.1  joerg 
    442      1.1  joerg     case '<': {
    443      1.1  joerg       TokenPtr++;
    444      1.1  joerg       if (TokenPtr == CommentEnd) {
    445      1.1  joerg         formTextToken(T, TokenPtr);
    446      1.1  joerg         return;
    447      1.1  joerg       }
    448      1.1  joerg       const char C = *TokenPtr;
    449      1.1  joerg       if (isHTMLIdentifierStartingCharacter(C))
    450      1.1  joerg         setupAndLexHTMLStartTag(T);
    451      1.1  joerg       else if (C == '/')
    452      1.1  joerg         setupAndLexHTMLEndTag(T);
    453      1.1  joerg       else
    454      1.1  joerg         formTextToken(T, TokenPtr);
    455      1.1  joerg       return;
    456      1.1  joerg     }
    457      1.1  joerg 
    458      1.1  joerg     default:
    459      1.1  joerg       return HandleNonCommandToken();
    460      1.1  joerg   }
    461      1.1  joerg }
    462      1.1  joerg 
    463      1.1  joerg void Lexer::setupAndLexVerbatimBlock(Token &T,
    464      1.1  joerg                                      const char *TextBegin,
    465      1.1  joerg                                      char Marker, const CommandInfo *Info) {
    466      1.1  joerg   assert(Info->IsVerbatimBlockCommand);
    467      1.1  joerg 
    468      1.1  joerg   VerbatimBlockEndCommandName.clear();
    469      1.1  joerg   VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
    470      1.1  joerg   VerbatimBlockEndCommandName.append(Info->EndCommandName);
    471      1.1  joerg 
    472      1.1  joerg   formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
    473      1.1  joerg   T.setVerbatimBlockID(Info->getID());
    474      1.1  joerg 
    475      1.1  joerg   // If there is a newline following the verbatim opening command, skip the
    476      1.1  joerg   // newline so that we don't create an tok::verbatim_block_line with empty
    477      1.1  joerg   // text content.
    478      1.1  joerg   if (BufferPtr != CommentEnd &&
    479      1.1  joerg       isVerticalWhitespace(*BufferPtr)) {
    480      1.1  joerg     BufferPtr = skipNewline(BufferPtr, CommentEnd);
    481      1.1  joerg     State = LS_VerbatimBlockBody;
    482      1.1  joerg     return;
    483      1.1  joerg   }
    484      1.1  joerg 
    485      1.1  joerg   State = LS_VerbatimBlockFirstLine;
    486      1.1  joerg }
    487      1.1  joerg 
    488      1.1  joerg void Lexer::lexVerbatimBlockFirstLine(Token &T) {
    489      1.1  joerg again:
    490      1.1  joerg   assert(BufferPtr < CommentEnd);
    491      1.1  joerg 
    492      1.1  joerg   // FIXME: It would be better to scan the text once, finding either the block
    493      1.1  joerg   // end command or newline.
    494      1.1  joerg   //
    495      1.1  joerg   // Extract current line.
    496      1.1  joerg   const char *Newline = findNewline(BufferPtr, CommentEnd);
    497      1.1  joerg   StringRef Line(BufferPtr, Newline - BufferPtr);
    498      1.1  joerg 
    499      1.1  joerg   // Look for end command in current line.
    500      1.1  joerg   size_t Pos = Line.find(VerbatimBlockEndCommandName);
    501      1.1  joerg   const char *TextEnd;
    502      1.1  joerg   const char *NextLine;
    503      1.1  joerg   if (Pos == StringRef::npos) {
    504      1.1  joerg     // Current line is completely verbatim.
    505      1.1  joerg     TextEnd = Newline;
    506      1.1  joerg     NextLine = skipNewline(Newline, CommentEnd);
    507      1.1  joerg   } else if (Pos == 0) {
    508      1.1  joerg     // Current line contains just an end command.
    509      1.1  joerg     const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
    510      1.1  joerg     StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
    511      1.1  joerg     formTokenWithChars(T, End, tok::verbatim_block_end);
    512      1.1  joerg     T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
    513      1.1  joerg     State = LS_Normal;
    514      1.1  joerg     return;
    515      1.1  joerg   } else {
    516      1.1  joerg     // There is some text, followed by end command.  Extract text first.
    517      1.1  joerg     TextEnd = BufferPtr + Pos;
    518      1.1  joerg     NextLine = TextEnd;
    519      1.1  joerg     // If there is only whitespace before end command, skip whitespace.
    520      1.1  joerg     if (isWhitespace(BufferPtr, TextEnd)) {
    521      1.1  joerg       BufferPtr = TextEnd;
    522      1.1  joerg       goto again;
    523      1.1  joerg     }
    524      1.1  joerg   }
    525      1.1  joerg 
    526      1.1  joerg   StringRef Text(BufferPtr, TextEnd - BufferPtr);
    527      1.1  joerg   formTokenWithChars(T, NextLine, tok::verbatim_block_line);
    528      1.1  joerg   T.setVerbatimBlockText(Text);
    529      1.1  joerg 
    530      1.1  joerg   State = LS_VerbatimBlockBody;
    531      1.1  joerg }
    532      1.1  joerg 
    533      1.1  joerg void Lexer::lexVerbatimBlockBody(Token &T) {
    534      1.1  joerg   assert(State == LS_VerbatimBlockBody);
    535      1.1  joerg 
    536      1.1  joerg   if (CommentState == LCS_InsideCComment)
    537      1.1  joerg     skipLineStartingDecorations();
    538      1.1  joerg 
    539      1.1  joerg   if (BufferPtr == CommentEnd) {
    540      1.1  joerg     formTokenWithChars(T, BufferPtr, tok::verbatim_block_line);
    541      1.1  joerg     T.setVerbatimBlockText("");
    542      1.1  joerg     return;
    543      1.1  joerg   }
    544      1.1  joerg 
    545      1.1  joerg   lexVerbatimBlockFirstLine(T);
    546      1.1  joerg }
    547      1.1  joerg 
    548      1.1  joerg void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
    549      1.1  joerg                                     const CommandInfo *Info) {
    550      1.1  joerg   assert(Info->IsVerbatimLineCommand);
    551      1.1  joerg   formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
    552      1.1  joerg   T.setVerbatimLineID(Info->getID());
    553      1.1  joerg 
    554      1.1  joerg   State = LS_VerbatimLineText;
    555      1.1  joerg }
    556      1.1  joerg 
    557      1.1  joerg void Lexer::lexVerbatimLineText(Token &T) {
    558      1.1  joerg   assert(State == LS_VerbatimLineText);
    559      1.1  joerg 
    560      1.1  joerg   // Extract current line.
    561      1.1  joerg   const char *Newline = findNewline(BufferPtr, CommentEnd);
    562      1.1  joerg   StringRef Text(BufferPtr, Newline - BufferPtr);
    563      1.1  joerg   formTokenWithChars(T, Newline, tok::verbatim_line_text);
    564      1.1  joerg   T.setVerbatimLineText(Text);
    565      1.1  joerg 
    566      1.1  joerg   State = LS_Normal;
    567      1.1  joerg }
    568      1.1  joerg 
    569      1.1  joerg void Lexer::lexHTMLCharacterReference(Token &T) {
    570      1.1  joerg   const char *TokenPtr = BufferPtr;
    571      1.1  joerg   assert(*TokenPtr == '&');
    572      1.1  joerg   TokenPtr++;
    573      1.1  joerg   if (TokenPtr == CommentEnd) {
    574      1.1  joerg     formTextToken(T, TokenPtr);
    575      1.1  joerg     return;
    576      1.1  joerg   }
    577      1.1  joerg   const char *NamePtr;
    578      1.1  joerg   bool isNamed = false;
    579      1.1  joerg   bool isDecimal = false;
    580      1.1  joerg   char C = *TokenPtr;
    581      1.1  joerg   if (isHTMLNamedCharacterReferenceCharacter(C)) {
    582      1.1  joerg     NamePtr = TokenPtr;
    583      1.1  joerg     TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
    584      1.1  joerg     isNamed = true;
    585      1.1  joerg   } else if (C == '#') {
    586      1.1  joerg     TokenPtr++;
    587      1.1  joerg     if (TokenPtr == CommentEnd) {
    588      1.1  joerg       formTextToken(T, TokenPtr);
    589      1.1  joerg       return;
    590      1.1  joerg     }
    591      1.1  joerg     C = *TokenPtr;
    592      1.1  joerg     if (isHTMLDecimalCharacterReferenceCharacter(C)) {
    593      1.1  joerg       NamePtr = TokenPtr;
    594      1.1  joerg       TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
    595      1.1  joerg       isDecimal = true;
    596      1.1  joerg     } else if (C == 'x' || C == 'X') {
    597      1.1  joerg       TokenPtr++;
    598      1.1  joerg       NamePtr = TokenPtr;
    599      1.1  joerg       TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
    600      1.1  joerg     } else {
    601      1.1  joerg       formTextToken(T, TokenPtr);
    602      1.1  joerg       return;
    603      1.1  joerg     }
    604      1.1  joerg   } else {
    605      1.1  joerg     formTextToken(T, TokenPtr);
    606      1.1  joerg     return;
    607      1.1  joerg   }
    608      1.1  joerg   if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
    609      1.1  joerg       *TokenPtr != ';') {
    610      1.1  joerg     formTextToken(T, TokenPtr);
    611      1.1  joerg     return;
    612      1.1  joerg   }
    613      1.1  joerg   StringRef Name(NamePtr, TokenPtr - NamePtr);
    614      1.1  joerg   TokenPtr++; // Skip semicolon.
    615      1.1  joerg   StringRef Resolved;
    616      1.1  joerg   if (isNamed)
    617      1.1  joerg     Resolved = resolveHTMLNamedCharacterReference(Name);
    618      1.1  joerg   else if (isDecimal)
    619      1.1  joerg     Resolved = resolveHTMLDecimalCharacterReference(Name);
    620      1.1  joerg   else
    621      1.1  joerg     Resolved = resolveHTMLHexCharacterReference(Name);
    622      1.1  joerg 
    623      1.1  joerg   if (Resolved.empty()) {
    624      1.1  joerg     formTextToken(T, TokenPtr);
    625      1.1  joerg     return;
    626      1.1  joerg   }
    627      1.1  joerg   formTokenWithChars(T, TokenPtr, tok::text);
    628      1.1  joerg   T.setText(Resolved);
    629      1.1  joerg }
    630      1.1  joerg 
    631      1.1  joerg void Lexer::setupAndLexHTMLStartTag(Token &T) {
    632      1.1  joerg   assert(BufferPtr[0] == '<' &&
    633      1.1  joerg          isHTMLIdentifierStartingCharacter(BufferPtr[1]));
    634      1.1  joerg   const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
    635      1.1  joerg   StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
    636      1.1  joerg   if (!isHTMLTagName(Name)) {
    637      1.1  joerg     formTextToken(T, TagNameEnd);
    638      1.1  joerg     return;
    639      1.1  joerg   }
    640      1.1  joerg 
    641      1.1  joerg   formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
    642      1.1  joerg   T.setHTMLTagStartName(Name);
    643      1.1  joerg 
    644      1.1  joerg   BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
    645      1.1  joerg 
    646      1.1  joerg   const char C = *BufferPtr;
    647      1.1  joerg   if (BufferPtr != CommentEnd &&
    648      1.1  joerg       (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C)))
    649      1.1  joerg     State = LS_HTMLStartTag;
    650      1.1  joerg }
    651      1.1  joerg 
    652      1.1  joerg void Lexer::lexHTMLStartTag(Token &T) {
    653      1.1  joerg   assert(State == LS_HTMLStartTag);
    654      1.1  joerg 
    655      1.1  joerg   const char *TokenPtr = BufferPtr;
    656      1.1  joerg   char C = *TokenPtr;
    657      1.1  joerg   if (isHTMLIdentifierCharacter(C)) {
    658      1.1  joerg     TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
    659      1.1  joerg     StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
    660      1.1  joerg     formTokenWithChars(T, TokenPtr, tok::html_ident);
    661      1.1  joerg     T.setHTMLIdent(Ident);
    662      1.1  joerg   } else {
    663      1.1  joerg     switch (C) {
    664      1.1  joerg     case '=':
    665      1.1  joerg       TokenPtr++;
    666      1.1  joerg       formTokenWithChars(T, TokenPtr, tok::html_equals);
    667      1.1  joerg       break;
    668      1.1  joerg     case '\"':
    669      1.1  joerg     case '\'': {
    670      1.1  joerg       const char *OpenQuote = TokenPtr;
    671      1.1  joerg       TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
    672      1.1  joerg       const char *ClosingQuote = TokenPtr;
    673      1.1  joerg       if (TokenPtr != CommentEnd) // Skip closing quote.
    674      1.1  joerg         TokenPtr++;
    675      1.1  joerg       formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
    676      1.1  joerg       T.setHTMLQuotedString(StringRef(OpenQuote + 1,
    677      1.1  joerg                                       ClosingQuote - (OpenQuote + 1)));
    678      1.1  joerg       break;
    679      1.1  joerg     }
    680      1.1  joerg     case '>':
    681      1.1  joerg       TokenPtr++;
    682      1.1  joerg       formTokenWithChars(T, TokenPtr, tok::html_greater);
    683      1.1  joerg       State = LS_Normal;
    684      1.1  joerg       return;
    685      1.1  joerg     case '/':
    686      1.1  joerg       TokenPtr++;
    687      1.1  joerg       if (TokenPtr != CommentEnd && *TokenPtr == '>') {
    688      1.1  joerg         TokenPtr++;
    689      1.1  joerg         formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
    690      1.1  joerg       } else
    691      1.1  joerg         formTextToken(T, TokenPtr);
    692      1.1  joerg 
    693      1.1  joerg       State = LS_Normal;
    694      1.1  joerg       return;
    695      1.1  joerg     }
    696      1.1  joerg   }
    697      1.1  joerg 
    698      1.1  joerg   // Now look ahead and return to normal state if we don't see any HTML tokens
    699      1.1  joerg   // ahead.
    700      1.1  joerg   BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
    701      1.1  joerg   if (BufferPtr == CommentEnd) {
    702      1.1  joerg     State = LS_Normal;
    703      1.1  joerg     return;
    704      1.1  joerg   }
    705      1.1  joerg 
    706      1.1  joerg   C = *BufferPtr;
    707      1.1  joerg   if (!isHTMLIdentifierStartingCharacter(C) &&
    708      1.1  joerg       C != '=' && C != '\"' && C != '\'' && C != '>') {
    709      1.1  joerg     State = LS_Normal;
    710      1.1  joerg     return;
    711      1.1  joerg   }
    712      1.1  joerg }
    713      1.1  joerg 
    714      1.1  joerg void Lexer::setupAndLexHTMLEndTag(Token &T) {
    715      1.1  joerg   assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
    716      1.1  joerg 
    717      1.1  joerg   const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
    718      1.1  joerg   const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
    719      1.1  joerg   StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
    720      1.1  joerg   if (!isHTMLTagName(Name)) {
    721      1.1  joerg     formTextToken(T, TagNameEnd);
    722      1.1  joerg     return;
    723      1.1  joerg   }
    724      1.1  joerg 
    725      1.1  joerg   const char *End = skipWhitespace(TagNameEnd, CommentEnd);
    726      1.1  joerg 
    727      1.1  joerg   formTokenWithChars(T, End, tok::html_end_tag);
    728      1.1  joerg   T.setHTMLTagEndName(Name);
    729      1.1  joerg 
    730      1.1  joerg   if (BufferPtr != CommentEnd && *BufferPtr == '>')
    731      1.1  joerg     State = LS_HTMLEndTag;
    732      1.1  joerg }
    733      1.1  joerg 
    734      1.1  joerg void Lexer::lexHTMLEndTag(Token &T) {
    735      1.1  joerg   assert(BufferPtr != CommentEnd && *BufferPtr == '>');
    736      1.1  joerg 
    737      1.1  joerg   formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
    738      1.1  joerg   State = LS_Normal;
    739      1.1  joerg }
    740      1.1  joerg 
    741      1.1  joerg Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
    742      1.1  joerg              const CommandTraits &Traits, SourceLocation FileLoc,
    743  1.1.1.2  joerg              const char *BufferStart, const char *BufferEnd, bool ParseCommands)
    744      1.1  joerg     : Allocator(Allocator), Diags(Diags), Traits(Traits),
    745  1.1.1.2  joerg       BufferStart(BufferStart), BufferEnd(BufferEnd), BufferPtr(BufferStart),
    746  1.1.1.2  joerg       FileLoc(FileLoc), ParseCommands(ParseCommands),
    747  1.1.1.2  joerg       CommentState(LCS_BeforeComment), State(LS_Normal) {}
    748      1.1  joerg 
    749      1.1  joerg void Lexer::lex(Token &T) {
    750      1.1  joerg again:
    751      1.1  joerg   switch (CommentState) {
    752      1.1  joerg   case LCS_BeforeComment:
    753      1.1  joerg     if (BufferPtr == BufferEnd) {
    754      1.1  joerg       formTokenWithChars(T, BufferPtr, tok::eof);
    755      1.1  joerg       return;
    756      1.1  joerg     }
    757      1.1  joerg 
    758      1.1  joerg     assert(*BufferPtr == '/');
    759      1.1  joerg     BufferPtr++; // Skip first slash.
    760      1.1  joerg     switch(*BufferPtr) {
    761      1.1  joerg     case '/': { // BCPL comment.
    762      1.1  joerg       BufferPtr++; // Skip second slash.
    763      1.1  joerg 
    764      1.1  joerg       if (BufferPtr != BufferEnd) {
    765      1.1  joerg         // Skip Doxygen magic marker, if it is present.
    766      1.1  joerg         // It might be missing because of a typo //< or /*<, or because we
    767      1.1  joerg         // merged this non-Doxygen comment into a bunch of Doxygen comments
    768      1.1  joerg         // around it: /** ... */ /* ... */ /** ... */
    769      1.1  joerg         const char C = *BufferPtr;
    770      1.1  joerg         if (C == '/' || C == '!')
    771      1.1  joerg           BufferPtr++;
    772      1.1  joerg       }
    773      1.1  joerg 
    774      1.1  joerg       // Skip less-than symbol that marks trailing comments.
    775      1.1  joerg       // Skip it even if the comment is not a Doxygen one, because //< and /*<
    776      1.1  joerg       // are frequent typos.
    777      1.1  joerg       if (BufferPtr != BufferEnd && *BufferPtr == '<')
    778      1.1  joerg         BufferPtr++;
    779      1.1  joerg 
    780      1.1  joerg       CommentState = LCS_InsideBCPLComment;
    781      1.1  joerg       if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
    782      1.1  joerg         State = LS_Normal;
    783      1.1  joerg       CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
    784      1.1  joerg       goto again;
    785      1.1  joerg     }
    786      1.1  joerg     case '*': { // C comment.
    787      1.1  joerg       BufferPtr++; // Skip star.
    788      1.1  joerg 
    789      1.1  joerg       // Skip Doxygen magic marker.
    790      1.1  joerg       const char C = *BufferPtr;
    791      1.1  joerg       if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
    792      1.1  joerg         BufferPtr++;
    793      1.1  joerg 
    794      1.1  joerg       // Skip less-than symbol that marks trailing comments.
    795      1.1  joerg       if (BufferPtr != BufferEnd && *BufferPtr == '<')
    796      1.1  joerg         BufferPtr++;
    797      1.1  joerg 
    798      1.1  joerg       CommentState = LCS_InsideCComment;
    799      1.1  joerg       State = LS_Normal;
    800      1.1  joerg       CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
    801      1.1  joerg       goto again;
    802      1.1  joerg     }
    803      1.1  joerg     default:
    804      1.1  joerg       llvm_unreachable("second character of comment should be '/' or '*'");
    805      1.1  joerg     }
    806      1.1  joerg 
    807      1.1  joerg   case LCS_BetweenComments: {
    808      1.1  joerg     // Consecutive comments are extracted only if there is only whitespace
    809      1.1  joerg     // between them.  So we can search for the start of the next comment.
    810      1.1  joerg     const char *EndWhitespace = BufferPtr;
    811      1.1  joerg     while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
    812      1.1  joerg       EndWhitespace++;
    813      1.1  joerg 
    814      1.1  joerg     // Turn any whitespace between comments (and there is only whitespace
    815      1.1  joerg     // between them -- guaranteed by comment extraction) into a newline.  We
    816      1.1  joerg     // have two newlines between C comments in total (first one was synthesized
    817      1.1  joerg     // after a comment).
    818      1.1  joerg     formTokenWithChars(T, EndWhitespace, tok::newline);
    819      1.1  joerg 
    820      1.1  joerg     CommentState = LCS_BeforeComment;
    821      1.1  joerg     break;
    822      1.1  joerg   }
    823      1.1  joerg 
    824      1.1  joerg   case LCS_InsideBCPLComment:
    825      1.1  joerg   case LCS_InsideCComment:
    826      1.1  joerg     if (BufferPtr != CommentEnd) {
    827      1.1  joerg       lexCommentText(T);
    828      1.1  joerg       break;
    829      1.1  joerg     } else {
    830      1.1  joerg       // Skip C comment closing sequence.
    831      1.1  joerg       if (CommentState == LCS_InsideCComment) {
    832      1.1  joerg         assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
    833      1.1  joerg         BufferPtr += 2;
    834      1.1  joerg         assert(BufferPtr <= BufferEnd);
    835      1.1  joerg 
    836      1.1  joerg         // Synthenize newline just after the C comment, regardless if there is
    837      1.1  joerg         // actually a newline.
    838      1.1  joerg         formTokenWithChars(T, BufferPtr, tok::newline);
    839      1.1  joerg 
    840      1.1  joerg         CommentState = LCS_BetweenComments;
    841      1.1  joerg         break;
    842      1.1  joerg       } else {
    843      1.1  joerg         // Don't synthesized a newline after BCPL comment.
    844      1.1  joerg         CommentState = LCS_BetweenComments;
    845      1.1  joerg         goto again;
    846      1.1  joerg       }
    847      1.1  joerg     }
    848      1.1  joerg   }
    849      1.1  joerg }
    850      1.1  joerg 
    851      1.1  joerg StringRef Lexer::getSpelling(const Token &Tok,
    852      1.1  joerg                              const SourceManager &SourceMgr) const {
    853      1.1  joerg   SourceLocation Loc = Tok.getLocation();
    854      1.1  joerg   std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
    855      1.1  joerg 
    856      1.1  joerg   bool InvalidTemp = false;
    857      1.1  joerg   StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
    858      1.1  joerg   if (InvalidTemp)
    859      1.1  joerg     return StringRef();
    860      1.1  joerg 
    861      1.1  joerg   const char *Begin = File.data() + LocInfo.second;
    862      1.1  joerg   return StringRef(Begin, Tok.getLength());
    863      1.1  joerg }
    864      1.1  joerg 
    865      1.1  joerg } // end namespace comments
    866      1.1  joerg } // end namespace clang
    867