Home | History | Annotate | Line # | Download | only in AST
CommentLexer.cpp revision 1.1.1.1
      1  1.1  joerg //===--- CommentLexer.cpp -------------------------------------------------===//
      2  1.1  joerg //
      3  1.1  joerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  1.1  joerg // See https://llvm.org/LICENSE.txt for license information.
      5  1.1  joerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  1.1  joerg //
      7  1.1  joerg //===----------------------------------------------------------------------===//
      8  1.1  joerg 
      9  1.1  joerg #include "clang/AST/CommentLexer.h"
     10  1.1  joerg #include "clang/AST/CommentCommandTraits.h"
     11  1.1  joerg #include "clang/AST/CommentDiagnostic.h"
     12  1.1  joerg #include "clang/Basic/CharInfo.h"
     13  1.1  joerg #include "llvm/ADT/StringExtras.h"
     14  1.1  joerg #include "llvm/ADT/StringSwitch.h"
     15  1.1  joerg #include "llvm/Support/ConvertUTF.h"
     16  1.1  joerg #include "llvm/Support/ErrorHandling.h"
     17  1.1  joerg 
     18  1.1  joerg namespace clang {
     19  1.1  joerg namespace comments {
     20  1.1  joerg 
     21  1.1  joerg void Token::dump(const Lexer &L, const SourceManager &SM) const {
     22  1.1  joerg   llvm::errs() << "comments::Token Kind=" << Kind << " ";
     23  1.1  joerg   Loc.print(llvm::errs(), SM);
     24  1.1  joerg   llvm::errs() << " " << Length << " \"" << L.getSpelling(*this, SM) << "\"\n";
     25  1.1  joerg }
     26  1.1  joerg 
     27  1.1  joerg static inline bool isHTMLNamedCharacterReferenceCharacter(char C) {
     28  1.1  joerg   return isLetter(C);
     29  1.1  joerg }
     30  1.1  joerg 
     31  1.1  joerg static inline bool isHTMLDecimalCharacterReferenceCharacter(char C) {
     32  1.1  joerg   return isDigit(C);
     33  1.1  joerg }
     34  1.1  joerg 
     35  1.1  joerg static inline bool isHTMLHexCharacterReferenceCharacter(char C) {
     36  1.1  joerg   return isHexDigit(C);
     37  1.1  joerg }
     38  1.1  joerg 
     39  1.1  joerg static inline StringRef convertCodePointToUTF8(
     40  1.1  joerg                                       llvm::BumpPtrAllocator &Allocator,
     41  1.1  joerg                                       unsigned CodePoint) {
     42  1.1  joerg   char *Resolved = Allocator.Allocate<char>(UNI_MAX_UTF8_BYTES_PER_CODE_POINT);
     43  1.1  joerg   char *ResolvedPtr = Resolved;
     44  1.1  joerg   if (llvm::ConvertCodePointToUTF8(CodePoint, ResolvedPtr))
     45  1.1  joerg     return StringRef(Resolved, ResolvedPtr - Resolved);
     46  1.1  joerg   else
     47  1.1  joerg     return StringRef();
     48  1.1  joerg }
     49  1.1  joerg 
     50  1.1  joerg namespace {
     51  1.1  joerg 
     52  1.1  joerg #include "clang/AST/CommentHTMLTags.inc"
     53  1.1  joerg #include "clang/AST/CommentHTMLNamedCharacterReferences.inc"
     54  1.1  joerg 
     55  1.1  joerg } // end anonymous namespace
     56  1.1  joerg 
     57  1.1  joerg StringRef Lexer::resolveHTMLNamedCharacterReference(StringRef Name) const {
     58  1.1  joerg   // Fast path, first check a few most widely used named character references.
     59  1.1  joerg   return llvm::StringSwitch<StringRef>(Name)
     60  1.1  joerg       .Case("amp", "&")
     61  1.1  joerg       .Case("lt", "<")
     62  1.1  joerg       .Case("gt", ">")
     63  1.1  joerg       .Case("quot", "\"")
     64  1.1  joerg       .Case("apos", "\'")
     65  1.1  joerg       // Slow path.
     66  1.1  joerg       .Default(translateHTMLNamedCharacterReferenceToUTF8(Name));
     67  1.1  joerg }
     68  1.1  joerg 
     69  1.1  joerg StringRef Lexer::resolveHTMLDecimalCharacterReference(StringRef Name) const {
     70  1.1  joerg   unsigned CodePoint = 0;
     71  1.1  joerg   for (unsigned i = 0, e = Name.size(); i != e; ++i) {
     72  1.1  joerg     assert(isHTMLDecimalCharacterReferenceCharacter(Name[i]));
     73  1.1  joerg     CodePoint *= 10;
     74  1.1  joerg     CodePoint += Name[i] - '0';
     75  1.1  joerg   }
     76  1.1  joerg   return convertCodePointToUTF8(Allocator, CodePoint);
     77  1.1  joerg }
     78  1.1  joerg 
     79  1.1  joerg StringRef Lexer::resolveHTMLHexCharacterReference(StringRef Name) const {
     80  1.1  joerg   unsigned CodePoint = 0;
     81  1.1  joerg   for (unsigned i = 0, e = Name.size(); i != e; ++i) {
     82  1.1  joerg     CodePoint *= 16;
     83  1.1  joerg     const char C = Name[i];
     84  1.1  joerg     assert(isHTMLHexCharacterReferenceCharacter(C));
     85  1.1  joerg     CodePoint += llvm::hexDigitValue(C);
     86  1.1  joerg   }
     87  1.1  joerg   return convertCodePointToUTF8(Allocator, CodePoint);
     88  1.1  joerg }
     89  1.1  joerg 
     90  1.1  joerg void Lexer::skipLineStartingDecorations() {
     91  1.1  joerg   // This function should be called only for C comments
     92  1.1  joerg   assert(CommentState == LCS_InsideCComment);
     93  1.1  joerg 
     94  1.1  joerg   if (BufferPtr == CommentEnd)
     95  1.1  joerg     return;
     96  1.1  joerg 
     97  1.1  joerg   switch (*BufferPtr) {
     98  1.1  joerg   case ' ':
     99  1.1  joerg   case '\t':
    100  1.1  joerg   case '\f':
    101  1.1  joerg   case '\v': {
    102  1.1  joerg     const char *NewBufferPtr = BufferPtr;
    103  1.1  joerg     NewBufferPtr++;
    104  1.1  joerg     if (NewBufferPtr == CommentEnd)
    105  1.1  joerg       return;
    106  1.1  joerg 
    107  1.1  joerg     char C = *NewBufferPtr;
    108  1.1  joerg     while (isHorizontalWhitespace(C)) {
    109  1.1  joerg       NewBufferPtr++;
    110  1.1  joerg       if (NewBufferPtr == CommentEnd)
    111  1.1  joerg         return;
    112  1.1  joerg       C = *NewBufferPtr;
    113  1.1  joerg     }
    114  1.1  joerg     if (C == '*')
    115  1.1  joerg       BufferPtr = NewBufferPtr + 1;
    116  1.1  joerg     break;
    117  1.1  joerg   }
    118  1.1  joerg   case '*':
    119  1.1  joerg     BufferPtr++;
    120  1.1  joerg     break;
    121  1.1  joerg   }
    122  1.1  joerg }
    123  1.1  joerg 
    124  1.1  joerg namespace {
    125  1.1  joerg /// Returns pointer to the first newline character in the string.
    126  1.1  joerg const char *findNewline(const char *BufferPtr, const char *BufferEnd) {
    127  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    128  1.1  joerg     if (isVerticalWhitespace(*BufferPtr))
    129  1.1  joerg       return BufferPtr;
    130  1.1  joerg   }
    131  1.1  joerg   return BufferEnd;
    132  1.1  joerg }
    133  1.1  joerg 
    134  1.1  joerg const char *skipNewline(const char *BufferPtr, const char *BufferEnd) {
    135  1.1  joerg   if (BufferPtr == BufferEnd)
    136  1.1  joerg     return BufferPtr;
    137  1.1  joerg 
    138  1.1  joerg   if (*BufferPtr == '\n')
    139  1.1  joerg     BufferPtr++;
    140  1.1  joerg   else {
    141  1.1  joerg     assert(*BufferPtr == '\r');
    142  1.1  joerg     BufferPtr++;
    143  1.1  joerg     if (BufferPtr != BufferEnd && *BufferPtr == '\n')
    144  1.1  joerg       BufferPtr++;
    145  1.1  joerg   }
    146  1.1  joerg   return BufferPtr;
    147  1.1  joerg }
    148  1.1  joerg 
    149  1.1  joerg const char *skipNamedCharacterReference(const char *BufferPtr,
    150  1.1  joerg                                         const char *BufferEnd) {
    151  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    152  1.1  joerg     if (!isHTMLNamedCharacterReferenceCharacter(*BufferPtr))
    153  1.1  joerg       return BufferPtr;
    154  1.1  joerg   }
    155  1.1  joerg   return BufferEnd;
    156  1.1  joerg }
    157  1.1  joerg 
    158  1.1  joerg const char *skipDecimalCharacterReference(const char *BufferPtr,
    159  1.1  joerg                                           const char *BufferEnd) {
    160  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    161  1.1  joerg     if (!isHTMLDecimalCharacterReferenceCharacter(*BufferPtr))
    162  1.1  joerg       return BufferPtr;
    163  1.1  joerg   }
    164  1.1  joerg   return BufferEnd;
    165  1.1  joerg }
    166  1.1  joerg 
    167  1.1  joerg const char *skipHexCharacterReference(const char *BufferPtr,
    168  1.1  joerg                                       const char *BufferEnd) {
    169  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    170  1.1  joerg     if (!isHTMLHexCharacterReferenceCharacter(*BufferPtr))
    171  1.1  joerg       return BufferPtr;
    172  1.1  joerg   }
    173  1.1  joerg   return BufferEnd;
    174  1.1  joerg }
    175  1.1  joerg 
    176  1.1  joerg bool isHTMLIdentifierStartingCharacter(char C) {
    177  1.1  joerg   return isLetter(C);
    178  1.1  joerg }
    179  1.1  joerg 
    180  1.1  joerg bool isHTMLIdentifierCharacter(char C) {
    181  1.1  joerg   return isAlphanumeric(C);
    182  1.1  joerg }
    183  1.1  joerg 
    184  1.1  joerg const char *skipHTMLIdentifier(const char *BufferPtr, const char *BufferEnd) {
    185  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    186  1.1  joerg     if (!isHTMLIdentifierCharacter(*BufferPtr))
    187  1.1  joerg       return BufferPtr;
    188  1.1  joerg   }
    189  1.1  joerg   return BufferEnd;
    190  1.1  joerg }
    191  1.1  joerg 
    192  1.1  joerg /// Skip HTML string quoted in single or double quotes.  Escaping quotes inside
    193  1.1  joerg /// string allowed.
    194  1.1  joerg ///
    195  1.1  joerg /// Returns pointer to closing quote.
    196  1.1  joerg const char *skipHTMLQuotedString(const char *BufferPtr, const char *BufferEnd)
    197  1.1  joerg {
    198  1.1  joerg   const char Quote = *BufferPtr;
    199  1.1  joerg   assert(Quote == '\"' || Quote == '\'');
    200  1.1  joerg 
    201  1.1  joerg   BufferPtr++;
    202  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    203  1.1  joerg     const char C = *BufferPtr;
    204  1.1  joerg     if (C == Quote && BufferPtr[-1] != '\\')
    205  1.1  joerg       return BufferPtr;
    206  1.1  joerg   }
    207  1.1  joerg   return BufferEnd;
    208  1.1  joerg }
    209  1.1  joerg 
    210  1.1  joerg const char *skipWhitespace(const char *BufferPtr, const char *BufferEnd) {
    211  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    212  1.1  joerg     if (!isWhitespace(*BufferPtr))
    213  1.1  joerg       return BufferPtr;
    214  1.1  joerg   }
    215  1.1  joerg   return BufferEnd;
    216  1.1  joerg }
    217  1.1  joerg 
    218  1.1  joerg bool isWhitespace(const char *BufferPtr, const char *BufferEnd) {
    219  1.1  joerg   return skipWhitespace(BufferPtr, BufferEnd) == BufferEnd;
    220  1.1  joerg }
    221  1.1  joerg 
    222  1.1  joerg bool isCommandNameStartCharacter(char C) {
    223  1.1  joerg   return isLetter(C);
    224  1.1  joerg }
    225  1.1  joerg 
    226  1.1  joerg bool isCommandNameCharacter(char C) {
    227  1.1  joerg   return isAlphanumeric(C);
    228  1.1  joerg }
    229  1.1  joerg 
    230  1.1  joerg const char *skipCommandName(const char *BufferPtr, const char *BufferEnd) {
    231  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    232  1.1  joerg     if (!isCommandNameCharacter(*BufferPtr))
    233  1.1  joerg       return BufferPtr;
    234  1.1  joerg   }
    235  1.1  joerg   return BufferEnd;
    236  1.1  joerg }
    237  1.1  joerg 
    238  1.1  joerg /// Return the one past end pointer for BCPL comments.
    239  1.1  joerg /// Handles newlines escaped with backslash or trigraph for backslahs.
    240  1.1  joerg const char *findBCPLCommentEnd(const char *BufferPtr, const char *BufferEnd) {
    241  1.1  joerg   const char *CurPtr = BufferPtr;
    242  1.1  joerg   while (CurPtr != BufferEnd) {
    243  1.1  joerg     while (!isVerticalWhitespace(*CurPtr)) {
    244  1.1  joerg       CurPtr++;
    245  1.1  joerg       if (CurPtr == BufferEnd)
    246  1.1  joerg         return BufferEnd;
    247  1.1  joerg     }
    248  1.1  joerg     // We found a newline, check if it is escaped.
    249  1.1  joerg     const char *EscapePtr = CurPtr - 1;
    250  1.1  joerg     while(isHorizontalWhitespace(*EscapePtr))
    251  1.1  joerg       EscapePtr--;
    252  1.1  joerg 
    253  1.1  joerg     if (*EscapePtr == '\\' ||
    254  1.1  joerg         (EscapePtr - 2 >= BufferPtr && EscapePtr[0] == '/' &&
    255  1.1  joerg          EscapePtr[-1] == '?' && EscapePtr[-2] == '?')) {
    256  1.1  joerg       // We found an escaped newline.
    257  1.1  joerg       CurPtr = skipNewline(CurPtr, BufferEnd);
    258  1.1  joerg     } else
    259  1.1  joerg       return CurPtr; // Not an escaped newline.
    260  1.1  joerg   }
    261  1.1  joerg   return BufferEnd;
    262  1.1  joerg }
    263  1.1  joerg 
    264  1.1  joerg /// Return the one past end pointer for C comments.
    265  1.1  joerg /// Very dumb, does not handle escaped newlines or trigraphs.
    266  1.1  joerg const char *findCCommentEnd(const char *BufferPtr, const char *BufferEnd) {
    267  1.1  joerg   for ( ; BufferPtr != BufferEnd; ++BufferPtr) {
    268  1.1  joerg     if (*BufferPtr == '*') {
    269  1.1  joerg       assert(BufferPtr + 1 != BufferEnd);
    270  1.1  joerg       if (*(BufferPtr + 1) == '/')
    271  1.1  joerg         return BufferPtr;
    272  1.1  joerg     }
    273  1.1  joerg   }
    274  1.1  joerg   llvm_unreachable("buffer end hit before '*/' was seen");
    275  1.1  joerg }
    276  1.1  joerg 
    277  1.1  joerg } // end anonymous namespace
    278  1.1  joerg 
    279  1.1  joerg void Lexer::formTokenWithChars(Token &Result, const char *TokEnd,
    280  1.1  joerg                                tok::TokenKind Kind) {
    281  1.1  joerg   const unsigned TokLen = TokEnd - BufferPtr;
    282  1.1  joerg   Result.setLocation(getSourceLocation(BufferPtr));
    283  1.1  joerg   Result.setKind(Kind);
    284  1.1  joerg   Result.setLength(TokLen);
    285  1.1  joerg #ifndef NDEBUG
    286  1.1  joerg   Result.TextPtr = "<UNSET>";
    287  1.1  joerg   Result.IntVal = 7;
    288  1.1  joerg #endif
    289  1.1  joerg   BufferPtr = TokEnd;
    290  1.1  joerg }
    291  1.1  joerg 
    292  1.1  joerg void Lexer::lexCommentText(Token &T) {
    293  1.1  joerg   assert(CommentState == LCS_InsideBCPLComment ||
    294  1.1  joerg          CommentState == LCS_InsideCComment);
    295  1.1  joerg 
    296  1.1  joerg   // Handles lexing non-command text, i.e. text and newline.
    297  1.1  joerg   auto HandleNonCommandToken = [&]() -> void {
    298  1.1  joerg     assert(State == LS_Normal);
    299  1.1  joerg 
    300  1.1  joerg     const char *TokenPtr = BufferPtr;
    301  1.1  joerg     assert(TokenPtr < CommentEnd);
    302  1.1  joerg     switch (*TokenPtr) {
    303  1.1  joerg       case '\n':
    304  1.1  joerg       case '\r':
    305  1.1  joerg           TokenPtr = skipNewline(TokenPtr, CommentEnd);
    306  1.1  joerg           formTokenWithChars(T, TokenPtr, tok::newline);
    307  1.1  joerg 
    308  1.1  joerg           if (CommentState == LCS_InsideCComment)
    309  1.1  joerg             skipLineStartingDecorations();
    310  1.1  joerg           return;
    311  1.1  joerg 
    312  1.1  joerg       default: {
    313  1.1  joerg           StringRef TokStartSymbols = ParseCommands ? "\n\r\\@&<" : "\n\r";
    314  1.1  joerg           size_t End = StringRef(TokenPtr, CommentEnd - TokenPtr)
    315  1.1  joerg                            .find_first_of(TokStartSymbols);
    316  1.1  joerg           if (End != StringRef::npos)
    317  1.1  joerg             TokenPtr += End;
    318  1.1  joerg           else
    319  1.1  joerg             TokenPtr = CommentEnd;
    320  1.1  joerg           formTextToken(T, TokenPtr);
    321  1.1  joerg           return;
    322  1.1  joerg       }
    323  1.1  joerg     }
    324  1.1  joerg   };
    325  1.1  joerg 
    326  1.1  joerg   if (!ParseCommands)
    327  1.1  joerg     return HandleNonCommandToken();
    328  1.1  joerg 
    329  1.1  joerg   switch (State) {
    330  1.1  joerg   case LS_Normal:
    331  1.1  joerg     break;
    332  1.1  joerg   case LS_VerbatimBlockFirstLine:
    333  1.1  joerg     lexVerbatimBlockFirstLine(T);
    334  1.1  joerg     return;
    335  1.1  joerg   case LS_VerbatimBlockBody:
    336  1.1  joerg     lexVerbatimBlockBody(T);
    337  1.1  joerg     return;
    338  1.1  joerg   case LS_VerbatimLineText:
    339  1.1  joerg     lexVerbatimLineText(T);
    340  1.1  joerg     return;
    341  1.1  joerg   case LS_HTMLStartTag:
    342  1.1  joerg     lexHTMLStartTag(T);
    343  1.1  joerg     return;
    344  1.1  joerg   case LS_HTMLEndTag:
    345  1.1  joerg     lexHTMLEndTag(T);
    346  1.1  joerg     return;
    347  1.1  joerg   }
    348  1.1  joerg 
    349  1.1  joerg   assert(State == LS_Normal);
    350  1.1  joerg   const char *TokenPtr = BufferPtr;
    351  1.1  joerg   assert(TokenPtr < CommentEnd);
    352  1.1  joerg   switch(*TokenPtr) {
    353  1.1  joerg     case '\\':
    354  1.1  joerg     case '@': {
    355  1.1  joerg       // Commands that start with a backslash and commands that start with
    356  1.1  joerg       // 'at' have equivalent semantics.  But we keep information about the
    357  1.1  joerg       // exact syntax in AST for comments.
    358  1.1  joerg       tok::TokenKind CommandKind =
    359  1.1  joerg           (*TokenPtr == '@') ? tok::at_command : tok::backslash_command;
    360  1.1  joerg       TokenPtr++;
    361  1.1  joerg       if (TokenPtr == CommentEnd) {
    362  1.1  joerg         formTextToken(T, TokenPtr);
    363  1.1  joerg         return;
    364  1.1  joerg       }
    365  1.1  joerg       char C = *TokenPtr;
    366  1.1  joerg       switch (C) {
    367  1.1  joerg       default:
    368  1.1  joerg         break;
    369  1.1  joerg 
    370  1.1  joerg       case '\\': case '@': case '&': case '$':
    371  1.1  joerg       case '#':  case '<': case '>': case '%':
    372  1.1  joerg       case '\"': case '.': case ':':
    373  1.1  joerg         // This is one of \\ \@ \& \$ etc escape sequences.
    374  1.1  joerg         TokenPtr++;
    375  1.1  joerg         if (C == ':' && TokenPtr != CommentEnd && *TokenPtr == ':') {
    376  1.1  joerg           // This is the \:: escape sequence.
    377  1.1  joerg           TokenPtr++;
    378  1.1  joerg         }
    379  1.1  joerg         StringRef UnescapedText(BufferPtr + 1, TokenPtr - (BufferPtr + 1));
    380  1.1  joerg         formTokenWithChars(T, TokenPtr, tok::text);
    381  1.1  joerg         T.setText(UnescapedText);
    382  1.1  joerg         return;
    383  1.1  joerg       }
    384  1.1  joerg 
    385  1.1  joerg       // Don't make zero-length commands.
    386  1.1  joerg       if (!isCommandNameStartCharacter(*TokenPtr)) {
    387  1.1  joerg         formTextToken(T, TokenPtr);
    388  1.1  joerg         return;
    389  1.1  joerg       }
    390  1.1  joerg 
    391  1.1  joerg       TokenPtr = skipCommandName(TokenPtr, CommentEnd);
    392  1.1  joerg       unsigned Length = TokenPtr - (BufferPtr + 1);
    393  1.1  joerg 
    394  1.1  joerg       // Hardcoded support for lexing LaTeX formula commands
    395  1.1  joerg       // \f$ \f[ \f] \f{ \f} as a single command.
    396  1.1  joerg       if (Length == 1 && TokenPtr[-1] == 'f' && TokenPtr != CommentEnd) {
    397  1.1  joerg         C = *TokenPtr;
    398  1.1  joerg         if (C == '$' || C == '[' || C == ']' || C == '{' || C == '}') {
    399  1.1  joerg           TokenPtr++;
    400  1.1  joerg           Length++;
    401  1.1  joerg         }
    402  1.1  joerg       }
    403  1.1  joerg 
    404  1.1  joerg       StringRef CommandName(BufferPtr + 1, Length);
    405  1.1  joerg 
    406  1.1  joerg       const CommandInfo *Info = Traits.getCommandInfoOrNULL(CommandName);
    407  1.1  joerg       if (!Info) {
    408  1.1  joerg         if ((Info = Traits.getTypoCorrectCommandInfo(CommandName))) {
    409  1.1  joerg           StringRef CorrectedName = Info->Name;
    410  1.1  joerg           SourceLocation Loc = getSourceLocation(BufferPtr);
    411  1.1  joerg           SourceLocation EndLoc = getSourceLocation(TokenPtr);
    412  1.1  joerg           SourceRange FullRange = SourceRange(Loc, EndLoc);
    413  1.1  joerg           SourceRange CommandRange(Loc.getLocWithOffset(1), EndLoc);
    414  1.1  joerg           Diag(Loc, diag::warn_correct_comment_command_name)
    415  1.1  joerg             << FullRange << CommandName << CorrectedName
    416  1.1  joerg             << FixItHint::CreateReplacement(CommandRange, CorrectedName);
    417  1.1  joerg         } else {
    418  1.1  joerg           formTokenWithChars(T, TokenPtr, tok::unknown_command);
    419  1.1  joerg           T.setUnknownCommandName(CommandName);
    420  1.1  joerg           Diag(T.getLocation(), diag::warn_unknown_comment_command_name)
    421  1.1  joerg               << SourceRange(T.getLocation(), T.getEndLocation());
    422  1.1  joerg           return;
    423  1.1  joerg         }
    424  1.1  joerg       }
    425  1.1  joerg       if (Info->IsVerbatimBlockCommand) {
    426  1.1  joerg         setupAndLexVerbatimBlock(T, TokenPtr, *BufferPtr, Info);
    427  1.1  joerg         return;
    428  1.1  joerg       }
    429  1.1  joerg       if (Info->IsVerbatimLineCommand) {
    430  1.1  joerg         setupAndLexVerbatimLine(T, TokenPtr, Info);
    431  1.1  joerg         return;
    432  1.1  joerg       }
    433  1.1  joerg       formTokenWithChars(T, TokenPtr, CommandKind);
    434  1.1  joerg       T.setCommandID(Info->getID());
    435  1.1  joerg       return;
    436  1.1  joerg     }
    437  1.1  joerg 
    438  1.1  joerg     case '&':
    439  1.1  joerg       lexHTMLCharacterReference(T);
    440  1.1  joerg       return;
    441  1.1  joerg 
    442  1.1  joerg     case '<': {
    443  1.1  joerg       TokenPtr++;
    444  1.1  joerg       if (TokenPtr == CommentEnd) {
    445  1.1  joerg         formTextToken(T, TokenPtr);
    446  1.1  joerg         return;
    447  1.1  joerg       }
    448  1.1  joerg       const char C = *TokenPtr;
    449  1.1  joerg       if (isHTMLIdentifierStartingCharacter(C))
    450  1.1  joerg         setupAndLexHTMLStartTag(T);
    451  1.1  joerg       else if (C == '/')
    452  1.1  joerg         setupAndLexHTMLEndTag(T);
    453  1.1  joerg       else
    454  1.1  joerg         formTextToken(T, TokenPtr);
    455  1.1  joerg       return;
    456  1.1  joerg     }
    457  1.1  joerg 
    458  1.1  joerg     default:
    459  1.1  joerg       return HandleNonCommandToken();
    460  1.1  joerg   }
    461  1.1  joerg }
    462  1.1  joerg 
    463  1.1  joerg void Lexer::setupAndLexVerbatimBlock(Token &T,
    464  1.1  joerg                                      const char *TextBegin,
    465  1.1  joerg                                      char Marker, const CommandInfo *Info) {
    466  1.1  joerg   assert(Info->IsVerbatimBlockCommand);
    467  1.1  joerg 
    468  1.1  joerg   VerbatimBlockEndCommandName.clear();
    469  1.1  joerg   VerbatimBlockEndCommandName.append(Marker == '\\' ? "\\" : "@");
    470  1.1  joerg   VerbatimBlockEndCommandName.append(Info->EndCommandName);
    471  1.1  joerg 
    472  1.1  joerg   formTokenWithChars(T, TextBegin, tok::verbatim_block_begin);
    473  1.1  joerg   T.setVerbatimBlockID(Info->getID());
    474  1.1  joerg 
    475  1.1  joerg   // If there is a newline following the verbatim opening command, skip the
    476  1.1  joerg   // newline so that we don't create an tok::verbatim_block_line with empty
    477  1.1  joerg   // text content.
    478  1.1  joerg   if (BufferPtr != CommentEnd &&
    479  1.1  joerg       isVerticalWhitespace(*BufferPtr)) {
    480  1.1  joerg     BufferPtr = skipNewline(BufferPtr, CommentEnd);
    481  1.1  joerg     State = LS_VerbatimBlockBody;
    482  1.1  joerg     return;
    483  1.1  joerg   }
    484  1.1  joerg 
    485  1.1  joerg   State = LS_VerbatimBlockFirstLine;
    486  1.1  joerg }
    487  1.1  joerg 
    488  1.1  joerg void Lexer::lexVerbatimBlockFirstLine(Token &T) {
    489  1.1  joerg again:
    490  1.1  joerg   assert(BufferPtr < CommentEnd);
    491  1.1  joerg 
    492  1.1  joerg   // FIXME: It would be better to scan the text once, finding either the block
    493  1.1  joerg   // end command or newline.
    494  1.1  joerg   //
    495  1.1  joerg   // Extract current line.
    496  1.1  joerg   const char *Newline = findNewline(BufferPtr, CommentEnd);
    497  1.1  joerg   StringRef Line(BufferPtr, Newline - BufferPtr);
    498  1.1  joerg 
    499  1.1  joerg   // Look for end command in current line.
    500  1.1  joerg   size_t Pos = Line.find(VerbatimBlockEndCommandName);
    501  1.1  joerg   const char *TextEnd;
    502  1.1  joerg   const char *NextLine;
    503  1.1  joerg   if (Pos == StringRef::npos) {
    504  1.1  joerg     // Current line is completely verbatim.
    505  1.1  joerg     TextEnd = Newline;
    506  1.1  joerg     NextLine = skipNewline(Newline, CommentEnd);
    507  1.1  joerg   } else if (Pos == 0) {
    508  1.1  joerg     // Current line contains just an end command.
    509  1.1  joerg     const char *End = BufferPtr + VerbatimBlockEndCommandName.size();
    510  1.1  joerg     StringRef Name(BufferPtr + 1, End - (BufferPtr + 1));
    511  1.1  joerg     formTokenWithChars(T, End, tok::verbatim_block_end);
    512  1.1  joerg     T.setVerbatimBlockID(Traits.getCommandInfo(Name)->getID());
    513  1.1  joerg     State = LS_Normal;
    514  1.1  joerg     return;
    515  1.1  joerg   } else {
    516  1.1  joerg     // There is some text, followed by end command.  Extract text first.
    517  1.1  joerg     TextEnd = BufferPtr + Pos;
    518  1.1  joerg     NextLine = TextEnd;
    519  1.1  joerg     // If there is only whitespace before end command, skip whitespace.
    520  1.1  joerg     if (isWhitespace(BufferPtr, TextEnd)) {
    521  1.1  joerg       BufferPtr = TextEnd;
    522  1.1  joerg       goto again;
    523  1.1  joerg     }
    524  1.1  joerg   }
    525  1.1  joerg 
    526  1.1  joerg   StringRef Text(BufferPtr, TextEnd - BufferPtr);
    527  1.1  joerg   formTokenWithChars(T, NextLine, tok::verbatim_block_line);
    528  1.1  joerg   T.setVerbatimBlockText(Text);
    529  1.1  joerg 
    530  1.1  joerg   State = LS_VerbatimBlockBody;
    531  1.1  joerg }
    532  1.1  joerg 
    533  1.1  joerg void Lexer::lexVerbatimBlockBody(Token &T) {
    534  1.1  joerg   assert(State == LS_VerbatimBlockBody);
    535  1.1  joerg 
    536  1.1  joerg   if (CommentState == LCS_InsideCComment)
    537  1.1  joerg     skipLineStartingDecorations();
    538  1.1  joerg 
    539  1.1  joerg   if (BufferPtr == CommentEnd) {
    540  1.1  joerg     formTokenWithChars(T, BufferPtr, tok::verbatim_block_line);
    541  1.1  joerg     T.setVerbatimBlockText("");
    542  1.1  joerg     return;
    543  1.1  joerg   }
    544  1.1  joerg 
    545  1.1  joerg   lexVerbatimBlockFirstLine(T);
    546  1.1  joerg }
    547  1.1  joerg 
    548  1.1  joerg void Lexer::setupAndLexVerbatimLine(Token &T, const char *TextBegin,
    549  1.1  joerg                                     const CommandInfo *Info) {
    550  1.1  joerg   assert(Info->IsVerbatimLineCommand);
    551  1.1  joerg   formTokenWithChars(T, TextBegin, tok::verbatim_line_name);
    552  1.1  joerg   T.setVerbatimLineID(Info->getID());
    553  1.1  joerg 
    554  1.1  joerg   State = LS_VerbatimLineText;
    555  1.1  joerg }
    556  1.1  joerg 
    557  1.1  joerg void Lexer::lexVerbatimLineText(Token &T) {
    558  1.1  joerg   assert(State == LS_VerbatimLineText);
    559  1.1  joerg 
    560  1.1  joerg   // Extract current line.
    561  1.1  joerg   const char *Newline = findNewline(BufferPtr, CommentEnd);
    562  1.1  joerg   StringRef Text(BufferPtr, Newline - BufferPtr);
    563  1.1  joerg   formTokenWithChars(T, Newline, tok::verbatim_line_text);
    564  1.1  joerg   T.setVerbatimLineText(Text);
    565  1.1  joerg 
    566  1.1  joerg   State = LS_Normal;
    567  1.1  joerg }
    568  1.1  joerg 
    569  1.1  joerg void Lexer::lexHTMLCharacterReference(Token &T) {
    570  1.1  joerg   const char *TokenPtr = BufferPtr;
    571  1.1  joerg   assert(*TokenPtr == '&');
    572  1.1  joerg   TokenPtr++;
    573  1.1  joerg   if (TokenPtr == CommentEnd) {
    574  1.1  joerg     formTextToken(T, TokenPtr);
    575  1.1  joerg     return;
    576  1.1  joerg   }
    577  1.1  joerg   const char *NamePtr;
    578  1.1  joerg   bool isNamed = false;
    579  1.1  joerg   bool isDecimal = false;
    580  1.1  joerg   char C = *TokenPtr;
    581  1.1  joerg   if (isHTMLNamedCharacterReferenceCharacter(C)) {
    582  1.1  joerg     NamePtr = TokenPtr;
    583  1.1  joerg     TokenPtr = skipNamedCharacterReference(TokenPtr, CommentEnd);
    584  1.1  joerg     isNamed = true;
    585  1.1  joerg   } else if (C == '#') {
    586  1.1  joerg     TokenPtr++;
    587  1.1  joerg     if (TokenPtr == CommentEnd) {
    588  1.1  joerg       formTextToken(T, TokenPtr);
    589  1.1  joerg       return;
    590  1.1  joerg     }
    591  1.1  joerg     C = *TokenPtr;
    592  1.1  joerg     if (isHTMLDecimalCharacterReferenceCharacter(C)) {
    593  1.1  joerg       NamePtr = TokenPtr;
    594  1.1  joerg       TokenPtr = skipDecimalCharacterReference(TokenPtr, CommentEnd);
    595  1.1  joerg       isDecimal = true;
    596  1.1  joerg     } else if (C == 'x' || C == 'X') {
    597  1.1  joerg       TokenPtr++;
    598  1.1  joerg       NamePtr = TokenPtr;
    599  1.1  joerg       TokenPtr = skipHexCharacterReference(TokenPtr, CommentEnd);
    600  1.1  joerg     } else {
    601  1.1  joerg       formTextToken(T, TokenPtr);
    602  1.1  joerg       return;
    603  1.1  joerg     }
    604  1.1  joerg   } else {
    605  1.1  joerg     formTextToken(T, TokenPtr);
    606  1.1  joerg     return;
    607  1.1  joerg   }
    608  1.1  joerg   if (NamePtr == TokenPtr || TokenPtr == CommentEnd ||
    609  1.1  joerg       *TokenPtr != ';') {
    610  1.1  joerg     formTextToken(T, TokenPtr);
    611  1.1  joerg     return;
    612  1.1  joerg   }
    613  1.1  joerg   StringRef Name(NamePtr, TokenPtr - NamePtr);
    614  1.1  joerg   TokenPtr++; // Skip semicolon.
    615  1.1  joerg   StringRef Resolved;
    616  1.1  joerg   if (isNamed)
    617  1.1  joerg     Resolved = resolveHTMLNamedCharacterReference(Name);
    618  1.1  joerg   else if (isDecimal)
    619  1.1  joerg     Resolved = resolveHTMLDecimalCharacterReference(Name);
    620  1.1  joerg   else
    621  1.1  joerg     Resolved = resolveHTMLHexCharacterReference(Name);
    622  1.1  joerg 
    623  1.1  joerg   if (Resolved.empty()) {
    624  1.1  joerg     formTextToken(T, TokenPtr);
    625  1.1  joerg     return;
    626  1.1  joerg   }
    627  1.1  joerg   formTokenWithChars(T, TokenPtr, tok::text);
    628  1.1  joerg   T.setText(Resolved);
    629  1.1  joerg }
    630  1.1  joerg 
    631  1.1  joerg void Lexer::setupAndLexHTMLStartTag(Token &T) {
    632  1.1  joerg   assert(BufferPtr[0] == '<' &&
    633  1.1  joerg          isHTMLIdentifierStartingCharacter(BufferPtr[1]));
    634  1.1  joerg   const char *TagNameEnd = skipHTMLIdentifier(BufferPtr + 2, CommentEnd);
    635  1.1  joerg   StringRef Name(BufferPtr + 1, TagNameEnd - (BufferPtr + 1));
    636  1.1  joerg   if (!isHTMLTagName(Name)) {
    637  1.1  joerg     formTextToken(T, TagNameEnd);
    638  1.1  joerg     return;
    639  1.1  joerg   }
    640  1.1  joerg 
    641  1.1  joerg   formTokenWithChars(T, TagNameEnd, tok::html_start_tag);
    642  1.1  joerg   T.setHTMLTagStartName(Name);
    643  1.1  joerg 
    644  1.1  joerg   BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
    645  1.1  joerg 
    646  1.1  joerg   const char C = *BufferPtr;
    647  1.1  joerg   if (BufferPtr != CommentEnd &&
    648  1.1  joerg       (C == '>' || C == '/' || isHTMLIdentifierStartingCharacter(C)))
    649  1.1  joerg     State = LS_HTMLStartTag;
    650  1.1  joerg }
    651  1.1  joerg 
    652  1.1  joerg void Lexer::lexHTMLStartTag(Token &T) {
    653  1.1  joerg   assert(State == LS_HTMLStartTag);
    654  1.1  joerg 
    655  1.1  joerg   const char *TokenPtr = BufferPtr;
    656  1.1  joerg   char C = *TokenPtr;
    657  1.1  joerg   if (isHTMLIdentifierCharacter(C)) {
    658  1.1  joerg     TokenPtr = skipHTMLIdentifier(TokenPtr, CommentEnd);
    659  1.1  joerg     StringRef Ident(BufferPtr, TokenPtr - BufferPtr);
    660  1.1  joerg     formTokenWithChars(T, TokenPtr, tok::html_ident);
    661  1.1  joerg     T.setHTMLIdent(Ident);
    662  1.1  joerg   } else {
    663  1.1  joerg     switch (C) {
    664  1.1  joerg     case '=':
    665  1.1  joerg       TokenPtr++;
    666  1.1  joerg       formTokenWithChars(T, TokenPtr, tok::html_equals);
    667  1.1  joerg       break;
    668  1.1  joerg     case '\"':
    669  1.1  joerg     case '\'': {
    670  1.1  joerg       const char *OpenQuote = TokenPtr;
    671  1.1  joerg       TokenPtr = skipHTMLQuotedString(TokenPtr, CommentEnd);
    672  1.1  joerg       const char *ClosingQuote = TokenPtr;
    673  1.1  joerg       if (TokenPtr != CommentEnd) // Skip closing quote.
    674  1.1  joerg         TokenPtr++;
    675  1.1  joerg       formTokenWithChars(T, TokenPtr, tok::html_quoted_string);
    676  1.1  joerg       T.setHTMLQuotedString(StringRef(OpenQuote + 1,
    677  1.1  joerg                                       ClosingQuote - (OpenQuote + 1)));
    678  1.1  joerg       break;
    679  1.1  joerg     }
    680  1.1  joerg     case '>':
    681  1.1  joerg       TokenPtr++;
    682  1.1  joerg       formTokenWithChars(T, TokenPtr, tok::html_greater);
    683  1.1  joerg       State = LS_Normal;
    684  1.1  joerg       return;
    685  1.1  joerg     case '/':
    686  1.1  joerg       TokenPtr++;
    687  1.1  joerg       if (TokenPtr != CommentEnd && *TokenPtr == '>') {
    688  1.1  joerg         TokenPtr++;
    689  1.1  joerg         formTokenWithChars(T, TokenPtr, tok::html_slash_greater);
    690  1.1  joerg       } else
    691  1.1  joerg         formTextToken(T, TokenPtr);
    692  1.1  joerg 
    693  1.1  joerg       State = LS_Normal;
    694  1.1  joerg       return;
    695  1.1  joerg     }
    696  1.1  joerg   }
    697  1.1  joerg 
    698  1.1  joerg   // Now look ahead and return to normal state if we don't see any HTML tokens
    699  1.1  joerg   // ahead.
    700  1.1  joerg   BufferPtr = skipWhitespace(BufferPtr, CommentEnd);
    701  1.1  joerg   if (BufferPtr == CommentEnd) {
    702  1.1  joerg     State = LS_Normal;
    703  1.1  joerg     return;
    704  1.1  joerg   }
    705  1.1  joerg 
    706  1.1  joerg   C = *BufferPtr;
    707  1.1  joerg   if (!isHTMLIdentifierStartingCharacter(C) &&
    708  1.1  joerg       C != '=' && C != '\"' && C != '\'' && C != '>') {
    709  1.1  joerg     State = LS_Normal;
    710  1.1  joerg     return;
    711  1.1  joerg   }
    712  1.1  joerg }
    713  1.1  joerg 
    714  1.1  joerg void Lexer::setupAndLexHTMLEndTag(Token &T) {
    715  1.1  joerg   assert(BufferPtr[0] == '<' && BufferPtr[1] == '/');
    716  1.1  joerg 
    717  1.1  joerg   const char *TagNameBegin = skipWhitespace(BufferPtr + 2, CommentEnd);
    718  1.1  joerg   const char *TagNameEnd = skipHTMLIdentifier(TagNameBegin, CommentEnd);
    719  1.1  joerg   StringRef Name(TagNameBegin, TagNameEnd - TagNameBegin);
    720  1.1  joerg   if (!isHTMLTagName(Name)) {
    721  1.1  joerg     formTextToken(T, TagNameEnd);
    722  1.1  joerg     return;
    723  1.1  joerg   }
    724  1.1  joerg 
    725  1.1  joerg   const char *End = skipWhitespace(TagNameEnd, CommentEnd);
    726  1.1  joerg 
    727  1.1  joerg   formTokenWithChars(T, End, tok::html_end_tag);
    728  1.1  joerg   T.setHTMLTagEndName(Name);
    729  1.1  joerg 
    730  1.1  joerg   if (BufferPtr != CommentEnd && *BufferPtr == '>')
    731  1.1  joerg     State = LS_HTMLEndTag;
    732  1.1  joerg }
    733  1.1  joerg 
    734  1.1  joerg void Lexer::lexHTMLEndTag(Token &T) {
    735  1.1  joerg   assert(BufferPtr != CommentEnd && *BufferPtr == '>');
    736  1.1  joerg 
    737  1.1  joerg   formTokenWithChars(T, BufferPtr + 1, tok::html_greater);
    738  1.1  joerg   State = LS_Normal;
    739  1.1  joerg }
    740  1.1  joerg 
    741  1.1  joerg Lexer::Lexer(llvm::BumpPtrAllocator &Allocator, DiagnosticsEngine &Diags,
    742  1.1  joerg              const CommandTraits &Traits, SourceLocation FileLoc,
    743  1.1  joerg              const char *BufferStart, const char *BufferEnd,
    744  1.1  joerg              bool ParseCommands)
    745  1.1  joerg     : Allocator(Allocator), Diags(Diags), Traits(Traits),
    746  1.1  joerg       BufferStart(BufferStart), BufferEnd(BufferEnd), FileLoc(FileLoc),
    747  1.1  joerg       BufferPtr(BufferStart), CommentState(LCS_BeforeComment), State(LS_Normal),
    748  1.1  joerg       ParseCommands(ParseCommands) {}
    749  1.1  joerg 
    750  1.1  joerg void Lexer::lex(Token &T) {
    751  1.1  joerg again:
    752  1.1  joerg   switch (CommentState) {
    753  1.1  joerg   case LCS_BeforeComment:
    754  1.1  joerg     if (BufferPtr == BufferEnd) {
    755  1.1  joerg       formTokenWithChars(T, BufferPtr, tok::eof);
    756  1.1  joerg       return;
    757  1.1  joerg     }
    758  1.1  joerg 
    759  1.1  joerg     assert(*BufferPtr == '/');
    760  1.1  joerg     BufferPtr++; // Skip first slash.
    761  1.1  joerg     switch(*BufferPtr) {
    762  1.1  joerg     case '/': { // BCPL comment.
    763  1.1  joerg       BufferPtr++; // Skip second slash.
    764  1.1  joerg 
    765  1.1  joerg       if (BufferPtr != BufferEnd) {
    766  1.1  joerg         // Skip Doxygen magic marker, if it is present.
    767  1.1  joerg         // It might be missing because of a typo //< or /*<, or because we
    768  1.1  joerg         // merged this non-Doxygen comment into a bunch of Doxygen comments
    769  1.1  joerg         // around it: /** ... */ /* ... */ /** ... */
    770  1.1  joerg         const char C = *BufferPtr;
    771  1.1  joerg         if (C == '/' || C == '!')
    772  1.1  joerg           BufferPtr++;
    773  1.1  joerg       }
    774  1.1  joerg 
    775  1.1  joerg       // Skip less-than symbol that marks trailing comments.
    776  1.1  joerg       // Skip it even if the comment is not a Doxygen one, because //< and /*<
    777  1.1  joerg       // are frequent typos.
    778  1.1  joerg       if (BufferPtr != BufferEnd && *BufferPtr == '<')
    779  1.1  joerg         BufferPtr++;
    780  1.1  joerg 
    781  1.1  joerg       CommentState = LCS_InsideBCPLComment;
    782  1.1  joerg       if (State != LS_VerbatimBlockBody && State != LS_VerbatimBlockFirstLine)
    783  1.1  joerg         State = LS_Normal;
    784  1.1  joerg       CommentEnd = findBCPLCommentEnd(BufferPtr, BufferEnd);
    785  1.1  joerg       goto again;
    786  1.1  joerg     }
    787  1.1  joerg     case '*': { // C comment.
    788  1.1  joerg       BufferPtr++; // Skip star.
    789  1.1  joerg 
    790  1.1  joerg       // Skip Doxygen magic marker.
    791  1.1  joerg       const char C = *BufferPtr;
    792  1.1  joerg       if ((C == '*' && *(BufferPtr + 1) != '/') || C == '!')
    793  1.1  joerg         BufferPtr++;
    794  1.1  joerg 
    795  1.1  joerg       // Skip less-than symbol that marks trailing comments.
    796  1.1  joerg       if (BufferPtr != BufferEnd && *BufferPtr == '<')
    797  1.1  joerg         BufferPtr++;
    798  1.1  joerg 
    799  1.1  joerg       CommentState = LCS_InsideCComment;
    800  1.1  joerg       State = LS_Normal;
    801  1.1  joerg       CommentEnd = findCCommentEnd(BufferPtr, BufferEnd);
    802  1.1  joerg       goto again;
    803  1.1  joerg     }
    804  1.1  joerg     default:
    805  1.1  joerg       llvm_unreachable("second character of comment should be '/' or '*'");
    806  1.1  joerg     }
    807  1.1  joerg 
    808  1.1  joerg   case LCS_BetweenComments: {
    809  1.1  joerg     // Consecutive comments are extracted only if there is only whitespace
    810  1.1  joerg     // between them.  So we can search for the start of the next comment.
    811  1.1  joerg     const char *EndWhitespace = BufferPtr;
    812  1.1  joerg     while(EndWhitespace != BufferEnd && *EndWhitespace != '/')
    813  1.1  joerg       EndWhitespace++;
    814  1.1  joerg 
    815  1.1  joerg     // Turn any whitespace between comments (and there is only whitespace
    816  1.1  joerg     // between them -- guaranteed by comment extraction) into a newline.  We
    817  1.1  joerg     // have two newlines between C comments in total (first one was synthesized
    818  1.1  joerg     // after a comment).
    819  1.1  joerg     formTokenWithChars(T, EndWhitespace, tok::newline);
    820  1.1  joerg 
    821  1.1  joerg     CommentState = LCS_BeforeComment;
    822  1.1  joerg     break;
    823  1.1  joerg   }
    824  1.1  joerg 
    825  1.1  joerg   case LCS_InsideBCPLComment:
    826  1.1  joerg   case LCS_InsideCComment:
    827  1.1  joerg     if (BufferPtr != CommentEnd) {
    828  1.1  joerg       lexCommentText(T);
    829  1.1  joerg       break;
    830  1.1  joerg     } else {
    831  1.1  joerg       // Skip C comment closing sequence.
    832  1.1  joerg       if (CommentState == LCS_InsideCComment) {
    833  1.1  joerg         assert(BufferPtr[0] == '*' && BufferPtr[1] == '/');
    834  1.1  joerg         BufferPtr += 2;
    835  1.1  joerg         assert(BufferPtr <= BufferEnd);
    836  1.1  joerg 
    837  1.1  joerg         // Synthenize newline just after the C comment, regardless if there is
    838  1.1  joerg         // actually a newline.
    839  1.1  joerg         formTokenWithChars(T, BufferPtr, tok::newline);
    840  1.1  joerg 
    841  1.1  joerg         CommentState = LCS_BetweenComments;
    842  1.1  joerg         break;
    843  1.1  joerg       } else {
    844  1.1  joerg         // Don't synthesized a newline after BCPL comment.
    845  1.1  joerg         CommentState = LCS_BetweenComments;
    846  1.1  joerg         goto again;
    847  1.1  joerg       }
    848  1.1  joerg     }
    849  1.1  joerg   }
    850  1.1  joerg }
    851  1.1  joerg 
    852  1.1  joerg StringRef Lexer::getSpelling(const Token &Tok,
    853  1.1  joerg                              const SourceManager &SourceMgr) const {
    854  1.1  joerg   SourceLocation Loc = Tok.getLocation();
    855  1.1  joerg   std::pair<FileID, unsigned> LocInfo = SourceMgr.getDecomposedLoc(Loc);
    856  1.1  joerg 
    857  1.1  joerg   bool InvalidTemp = false;
    858  1.1  joerg   StringRef File = SourceMgr.getBufferData(LocInfo.first, &InvalidTemp);
    859  1.1  joerg   if (InvalidTemp)
    860  1.1  joerg     return StringRef();
    861  1.1  joerg 
    862  1.1  joerg   const char *Begin = File.data() + LocInfo.second;
    863  1.1  joerg   return StringRef(Begin, Tok.getLength());
    864  1.1  joerg }
    865  1.1  joerg 
    866  1.1  joerg } // end namespace comments
    867  1.1  joerg } // end namespace clang
    868