1 1.1 joerg //===- TokenLexer.cpp - Lex from a token stream ---------------------------===// 2 1.1 joerg // 3 1.1 joerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 1.1 joerg // See https://llvm.org/LICENSE.txt for license information. 5 1.1 joerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 1.1 joerg // 7 1.1 joerg //===----------------------------------------------------------------------===// 8 1.1 joerg // 9 1.1 joerg // This file implements the TokenLexer interface. 10 1.1 joerg // 11 1.1 joerg //===----------------------------------------------------------------------===// 12 1.1 joerg 13 1.1 joerg #include "clang/Lex/TokenLexer.h" 14 1.1 joerg #include "clang/Basic/Diagnostic.h" 15 1.1 joerg #include "clang/Basic/IdentifierTable.h" 16 1.1 joerg #include "clang/Basic/LangOptions.h" 17 1.1 joerg #include "clang/Basic/SourceLocation.h" 18 1.1 joerg #include "clang/Basic/SourceManager.h" 19 1.1 joerg #include "clang/Basic/TokenKinds.h" 20 1.1 joerg #include "clang/Lex/LexDiagnostic.h" 21 1.1 joerg #include "clang/Lex/Lexer.h" 22 1.1 joerg #include "clang/Lex/MacroArgs.h" 23 1.1 joerg #include "clang/Lex/MacroInfo.h" 24 1.1 joerg #include "clang/Lex/Preprocessor.h" 25 1.1 joerg #include "clang/Lex/Token.h" 26 1.1 joerg #include "clang/Lex/VariadicMacroSupport.h" 27 1.1 joerg #include "llvm/ADT/ArrayRef.h" 28 1.1 joerg #include "llvm/ADT/SmallString.h" 29 1.1 joerg #include "llvm/ADT/SmallVector.h" 30 1.1 joerg #include "llvm/ADT/iterator_range.h" 31 1.1 joerg #include <cassert> 32 1.1 joerg #include <cstring> 33 1.1 joerg 34 1.1 joerg using namespace clang; 35 1.1 joerg 36 1.1 joerg /// Create a TokenLexer for the specified macro with the specified actual 37 1.1 joerg /// arguments. Note that this ctor takes ownership of the ActualArgs pointer. 38 1.1 joerg void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI, 39 1.1 joerg MacroArgs *Actuals) { 40 1.1 joerg // If the client is reusing a TokenLexer, make sure to free any memory 41 1.1 joerg // associated with it. 42 1.1 joerg destroy(); 43 1.1 joerg 44 1.1 joerg Macro = MI; 45 1.1 joerg ActualArgs = Actuals; 46 1.1 joerg CurTokenIdx = 0; 47 1.1 joerg 48 1.1 joerg ExpandLocStart = Tok.getLocation(); 49 1.1 joerg ExpandLocEnd = ELEnd; 50 1.1 joerg AtStartOfLine = Tok.isAtStartOfLine(); 51 1.1 joerg HasLeadingSpace = Tok.hasLeadingSpace(); 52 1.1 joerg NextTokGetsSpace = false; 53 1.1 joerg Tokens = &*Macro->tokens_begin(); 54 1.1 joerg OwnsTokens = false; 55 1.1 joerg DisableMacroExpansion = false; 56 1.1 joerg IsReinject = false; 57 1.1 joerg NumTokens = Macro->tokens_end()-Macro->tokens_begin(); 58 1.1 joerg MacroExpansionStart = SourceLocation(); 59 1.1 joerg 60 1.1 joerg SourceManager &SM = PP.getSourceManager(); 61 1.1 joerg MacroStartSLocOffset = SM.getNextLocalOffset(); 62 1.1 joerg 63 1.1 joerg if (NumTokens > 0) { 64 1.1 joerg assert(Tokens[0].getLocation().isValid()); 65 1.1 joerg assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) && 66 1.1 joerg "Macro defined in macro?"); 67 1.1 joerg assert(ExpandLocStart.isValid()); 68 1.1 joerg 69 1.1 joerg // Reserve a source location entry chunk for the length of the macro 70 1.1 joerg // definition. Tokens that get lexed directly from the definition will 71 1.1 joerg // have their locations pointing inside this chunk. This is to avoid 72 1.1 joerg // creating separate source location entries for each token. 73 1.1 joerg MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation()); 74 1.1 joerg MacroDefLength = Macro->getDefinitionLength(SM); 75 1.1 joerg MacroExpansionStart = SM.createExpansionLoc(MacroDefStart, 76 1.1 joerg ExpandLocStart, 77 1.1 joerg ExpandLocEnd, 78 1.1 joerg MacroDefLength); 79 1.1 joerg } 80 1.1 joerg 81 1.1 joerg // If this is a function-like macro, expand the arguments and change 82 1.1 joerg // Tokens to point to the expanded tokens. 83 1.1 joerg if (Macro->isFunctionLike() && Macro->getNumParams()) 84 1.1 joerg ExpandFunctionArguments(); 85 1.1 joerg 86 1.1 joerg // Mark the macro as currently disabled, so that it is not recursively 87 1.1 joerg // expanded. The macro must be disabled only after argument pre-expansion of 88 1.1 joerg // function-like macro arguments occurs. 89 1.1 joerg Macro->DisableMacro(); 90 1.1 joerg } 91 1.1 joerg 92 1.1 joerg /// Create a TokenLexer for the specified token stream. This does not 93 1.1 joerg /// take ownership of the specified token vector. 94 1.1 joerg void TokenLexer::Init(const Token *TokArray, unsigned NumToks, 95 1.1 joerg bool disableMacroExpansion, bool ownsTokens, 96 1.1 joerg bool isReinject) { 97 1.1 joerg assert(!isReinject || disableMacroExpansion); 98 1.1 joerg // If the client is reusing a TokenLexer, make sure to free any memory 99 1.1 joerg // associated with it. 100 1.1 joerg destroy(); 101 1.1 joerg 102 1.1 joerg Macro = nullptr; 103 1.1 joerg ActualArgs = nullptr; 104 1.1 joerg Tokens = TokArray; 105 1.1 joerg OwnsTokens = ownsTokens; 106 1.1 joerg DisableMacroExpansion = disableMacroExpansion; 107 1.1 joerg IsReinject = isReinject; 108 1.1 joerg NumTokens = NumToks; 109 1.1 joerg CurTokenIdx = 0; 110 1.1 joerg ExpandLocStart = ExpandLocEnd = SourceLocation(); 111 1.1 joerg AtStartOfLine = false; 112 1.1 joerg HasLeadingSpace = false; 113 1.1 joerg NextTokGetsSpace = false; 114 1.1 joerg MacroExpansionStart = SourceLocation(); 115 1.1 joerg 116 1.1 joerg // Set HasLeadingSpace/AtStartOfLine so that the first token will be 117 1.1 joerg // returned unmodified. 118 1.1 joerg if (NumToks != 0) { 119 1.1 joerg AtStartOfLine = TokArray[0].isAtStartOfLine(); 120 1.1 joerg HasLeadingSpace = TokArray[0].hasLeadingSpace(); 121 1.1 joerg } 122 1.1 joerg } 123 1.1 joerg 124 1.1 joerg void TokenLexer::destroy() { 125 1.1 joerg // If this was a function-like macro that actually uses its arguments, delete 126 1.1 joerg // the expanded tokens. 127 1.1 joerg if (OwnsTokens) { 128 1.1 joerg delete [] Tokens; 129 1.1 joerg Tokens = nullptr; 130 1.1 joerg OwnsTokens = false; 131 1.1 joerg } 132 1.1 joerg 133 1.1 joerg // TokenLexer owns its formal arguments. 134 1.1 joerg if (ActualArgs) ActualArgs->destroy(PP); 135 1.1 joerg } 136 1.1 joerg 137 1.1 joerg bool TokenLexer::MaybeRemoveCommaBeforeVaArgs( 138 1.1 joerg SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro, 139 1.1 joerg unsigned MacroArgNo, Preprocessor &PP) { 140 1.1 joerg // Is the macro argument __VA_ARGS__? 141 1.1 joerg if (!Macro->isVariadic() || MacroArgNo != Macro->getNumParams()-1) 142 1.1 joerg return false; 143 1.1 joerg 144 1.1 joerg // In Microsoft-compatibility mode, a comma is removed in the expansion 145 1.1 joerg // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty. This extension is 146 1.1 joerg // not supported by gcc. 147 1.1 joerg if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat) 148 1.1 joerg return false; 149 1.1 joerg 150 1.1 joerg // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if 151 1.1 joerg // __VA_ARGS__ is empty, but not in strict C99 mode where there are no 152 1.1 joerg // named arguments, where it remains. In all other modes, including C99 153 1.1 joerg // with GNU extensions, it is removed regardless of named arguments. 154 1.1 joerg // Microsoft also appears to support this extension, unofficially. 155 1.1 joerg if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode 156 1.1 joerg && Macro->getNumParams() < 2) 157 1.1 joerg return false; 158 1.1 joerg 159 1.1 joerg // Is a comma available to be removed? 160 1.1 joerg if (ResultToks.empty() || !ResultToks.back().is(tok::comma)) 161 1.1 joerg return false; 162 1.1 joerg 163 1.1 joerg // Issue an extension diagnostic for the paste operator. 164 1.1 joerg if (HasPasteOperator) 165 1.1 joerg PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma); 166 1.1 joerg 167 1.1 joerg // Remove the comma. 168 1.1 joerg ResultToks.pop_back(); 169 1.1 joerg 170 1.1 joerg if (!ResultToks.empty()) { 171 1.1 joerg // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"), 172 1.1 joerg // then removal of the comma should produce a placemarker token (in C99 173 1.1 joerg // terms) which we model by popping off the previous ##, giving us a plain 174 1.1 joerg // "X" when __VA_ARGS__ is empty. 175 1.1 joerg if (ResultToks.back().is(tok::hashhash)) 176 1.1 joerg ResultToks.pop_back(); 177 1.1 joerg 178 1.1 joerg // Remember that this comma was elided. 179 1.1 joerg ResultToks.back().setFlag(Token::CommaAfterElided); 180 1.1 joerg } 181 1.1 joerg 182 1.1 joerg // Never add a space, even if the comma, ##, or arg had a space. 183 1.1 joerg NextTokGetsSpace = false; 184 1.1 joerg return true; 185 1.1 joerg } 186 1.1 joerg 187 1.1 joerg void TokenLexer::stringifyVAOPTContents( 188 1.1 joerg SmallVectorImpl<Token> &ResultToks, const VAOptExpansionContext &VCtx, 189 1.1 joerg const SourceLocation VAOPTClosingParenLoc) { 190 1.1 joerg const int NumToksPriorToVAOpt = VCtx.getNumberOfTokensPriorToVAOpt(); 191 1.1 joerg const unsigned int NumVAOptTokens = ResultToks.size() - NumToksPriorToVAOpt; 192 1.1 joerg Token *const VAOPTTokens = 193 1.1 joerg NumVAOptTokens ? &ResultToks[NumToksPriorToVAOpt] : nullptr; 194 1.1 joerg 195 1.1 joerg SmallVector<Token, 64> ConcatenatedVAOPTResultToks; 196 1.1 joerg // FIXME: Should we keep track within VCtx that we did or didnot 197 1.1 joerg // encounter pasting - and only then perform this loop. 198 1.1 joerg 199 1.1 joerg // Perform token pasting (concatenation) prior to stringization. 200 1.1 joerg for (unsigned int CurTokenIdx = 0; CurTokenIdx != NumVAOptTokens; 201 1.1 joerg ++CurTokenIdx) { 202 1.1 joerg if (VAOPTTokens[CurTokenIdx].is(tok::hashhash)) { 203 1.1 joerg assert(CurTokenIdx != 0 && 204 1.1 joerg "Can not have __VAOPT__ contents begin with a ##"); 205 1.1 joerg Token &LHS = VAOPTTokens[CurTokenIdx - 1]; 206 1.1 joerg pasteTokens(LHS, llvm::makeArrayRef(VAOPTTokens, NumVAOptTokens), 207 1.1 joerg CurTokenIdx); 208 1.1 joerg // Replace the token prior to the first ## in this iteration. 209 1.1 joerg ConcatenatedVAOPTResultToks.back() = LHS; 210 1.1 joerg if (CurTokenIdx == NumVAOptTokens) 211 1.1 joerg break; 212 1.1 joerg } 213 1.1 joerg ConcatenatedVAOPTResultToks.push_back(VAOPTTokens[CurTokenIdx]); 214 1.1 joerg } 215 1.1 joerg 216 1.1 joerg ConcatenatedVAOPTResultToks.push_back(VCtx.getEOFTok()); 217 1.1 joerg // Get the SourceLocation that represents the start location within 218 1.1 joerg // the macro definition that marks where this string is substituted 219 1.1 joerg // into: i.e. the __VA_OPT__ and the ')' within the spelling of the 220 1.1 joerg // macro definition, and use it to indicate that the stringified token 221 1.1 joerg // was generated from that location. 222 1.1 joerg const SourceLocation ExpansionLocStartWithinMacro = 223 1.1 joerg getExpansionLocForMacroDefLoc(VCtx.getVAOptLoc()); 224 1.1 joerg const SourceLocation ExpansionLocEndWithinMacro = 225 1.1 joerg getExpansionLocForMacroDefLoc(VAOPTClosingParenLoc); 226 1.1 joerg 227 1.1 joerg Token StringifiedVAOPT = MacroArgs::StringifyArgument( 228 1.1 joerg &ConcatenatedVAOPTResultToks[0], PP, VCtx.hasCharifyBefore() /*Charify*/, 229 1.1 joerg ExpansionLocStartWithinMacro, ExpansionLocEndWithinMacro); 230 1.1 joerg 231 1.1 joerg if (VCtx.getLeadingSpaceForStringifiedToken()) 232 1.1 joerg StringifiedVAOPT.setFlag(Token::LeadingSpace); 233 1.1 joerg 234 1.1 joerg StringifiedVAOPT.setFlag(Token::StringifiedInMacro); 235 1.1 joerg // Resize (shrink) the token stream to just capture this stringified token. 236 1.1 joerg ResultToks.resize(NumToksPriorToVAOpt + 1); 237 1.1 joerg ResultToks.back() = StringifiedVAOPT; 238 1.1 joerg } 239 1.1 joerg 240 1.1 joerg /// Expand the arguments of a function-like macro so that we can quickly 241 1.1 joerg /// return preexpanded tokens from Tokens. 242 1.1 joerg void TokenLexer::ExpandFunctionArguments() { 243 1.1 joerg SmallVector<Token, 128> ResultToks; 244 1.1 joerg 245 1.1 joerg // Loop through 'Tokens', expanding them into ResultToks. Keep 246 1.1 joerg // track of whether we change anything. If not, no need to keep them. If so, 247 1.1 joerg // we install the newly expanded sequence as the new 'Tokens' list. 248 1.1 joerg bool MadeChange = false; 249 1.1 joerg 250 1.1 joerg Optional<bool> CalledWithVariadicArguments; 251 1.1 joerg 252 1.1 joerg VAOptExpansionContext VCtx(PP); 253 1.1 joerg 254 1.1 joerg for (unsigned I = 0, E = NumTokens; I != E; ++I) { 255 1.1 joerg const Token &CurTok = Tokens[I]; 256 1.1 joerg // We don't want a space for the next token after a paste 257 1.1 joerg // operator. In valid code, the token will get smooshed onto the 258 1.1 joerg // preceding one anyway. In assembler-with-cpp mode, invalid 259 1.1 joerg // pastes are allowed through: in this case, we do not want the 260 1.1 joerg // extra whitespace to be added. For example, we want ". ## foo" 261 1.1 joerg // -> ".foo" not ". foo". 262 1.1 joerg if (I != 0 && !Tokens[I-1].is(tok::hashhash) && CurTok.hasLeadingSpace()) 263 1.1 joerg NextTokGetsSpace = true; 264 1.1 joerg 265 1.1 joerg if (VCtx.isVAOptToken(CurTok)) { 266 1.1 joerg MadeChange = true; 267 1.1 joerg assert(Tokens[I + 1].is(tok::l_paren) && 268 1.1 joerg "__VA_OPT__ must be followed by '('"); 269 1.1 joerg 270 1.1 joerg ++I; // Skip the l_paren 271 1.1 joerg VCtx.sawVAOptFollowedByOpeningParens(CurTok.getLocation(), 272 1.1 joerg ResultToks.size()); 273 1.1 joerg 274 1.1 joerg continue; 275 1.1 joerg } 276 1.1 joerg 277 1.1 joerg // We have entered into the __VA_OPT__ context, so handle tokens 278 1.1 joerg // appropriately. 279 1.1 joerg if (VCtx.isInVAOpt()) { 280 1.1 joerg // If we are about to process a token that is either an argument to 281 1.1 joerg // __VA_OPT__ or its closing rparen, then: 282 1.1 joerg // 1) If the token is the closing rparen that exits us out of __VA_OPT__, 283 1.1 joerg // perform any necessary stringification or placemarker processing, 284 1.1 joerg // and/or skip to the next token. 285 1.1 joerg // 2) else if macro was invoked without variadic arguments skip this 286 1.1 joerg // token. 287 1.1 joerg // 3) else (macro was invoked with variadic arguments) process the token 288 1.1 joerg // normally. 289 1.1 joerg 290 1.1 joerg if (Tokens[I].is(tok::l_paren)) 291 1.1 joerg VCtx.sawOpeningParen(Tokens[I].getLocation()); 292 1.1 joerg // Continue skipping tokens within __VA_OPT__ if the macro was not 293 1.1 joerg // called with variadic arguments, else let the rest of the loop handle 294 1.1 joerg // this token. Note sawClosingParen() returns true only if the r_paren matches 295 1.1 joerg // the closing r_paren of the __VA_OPT__. 296 1.1 joerg if (!Tokens[I].is(tok::r_paren) || !VCtx.sawClosingParen()) { 297 1.1 joerg // Lazily expand __VA_ARGS__ when we see the first __VA_OPT__. 298 1.1 joerg if (!CalledWithVariadicArguments.hasValue()) { 299 1.1 joerg CalledWithVariadicArguments = 300 1.1 joerg ActualArgs->invokedWithVariadicArgument(Macro, PP); 301 1.1 joerg } 302 1.1 joerg if (!*CalledWithVariadicArguments) { 303 1.1 joerg // Skip this token. 304 1.1 joerg continue; 305 1.1 joerg } 306 1.1 joerg // ... else the macro was called with variadic arguments, and we do not 307 1.1 joerg // have a closing rparen - so process this token normally. 308 1.1 joerg } else { 309 1.1 joerg // Current token is the closing r_paren which marks the end of the 310 1.1 joerg // __VA_OPT__ invocation, so handle any place-marker pasting (if 311 1.1 joerg // empty) by removing hashhash either before (if exists) or after. And 312 1.1 joerg // also stringify the entire contents if VAOPT was preceded by a hash, 313 1.1 joerg // but do so only after any token concatenation that needs to occur 314 1.1 joerg // within the contents of VAOPT. 315 1.1 joerg 316 1.1 joerg if (VCtx.hasStringifyOrCharifyBefore()) { 317 1.1 joerg // Replace all the tokens just added from within VAOPT into a single 318 1.1 joerg // stringified token. This requires token-pasting to eagerly occur 319 1.1 joerg // within these tokens. If either the contents of VAOPT were empty 320 1.1 joerg // or the macro wasn't called with any variadic arguments, the result 321 1.1 joerg // is a token that represents an empty string. 322 1.1 joerg stringifyVAOPTContents(ResultToks, VCtx, 323 1.1 joerg /*ClosingParenLoc*/ Tokens[I].getLocation()); 324 1.1 joerg 325 1.1 joerg } else if (/*No tokens within VAOPT*/ 326 1.1 joerg ResultToks.size() == VCtx.getNumberOfTokensPriorToVAOpt()) { 327 1.1 joerg // Treat VAOPT as a placemarker token. Eat either the '##' before the 328 1.1 joerg // RHS/VAOPT (if one exists, suggesting that the LHS (if any) to that 329 1.1 joerg // hashhash was not a placemarker) or the '##' 330 1.1 joerg // after VAOPT, but not both. 331 1.1 joerg 332 1.1 joerg if (ResultToks.size() && ResultToks.back().is(tok::hashhash)) { 333 1.1 joerg ResultToks.pop_back(); 334 1.1 joerg } else if ((I + 1 != E) && Tokens[I + 1].is(tok::hashhash)) { 335 1.1 joerg ++I; // Skip the following hashhash. 336 1.1 joerg } 337 1.1 joerg } else { 338 1.1 joerg // If there's a ## before the __VA_OPT__, we might have discovered 339 1.1 joerg // that the __VA_OPT__ begins with a placeholder. We delay action on 340 1.1 joerg // that to now to avoid messing up our stashed count of tokens before 341 1.1 joerg // __VA_OPT__. 342 1.1 joerg if (VCtx.beginsWithPlaceholder()) { 343 1.1 joerg assert(VCtx.getNumberOfTokensPriorToVAOpt() > 0 && 344 1.1 joerg ResultToks.size() >= VCtx.getNumberOfTokensPriorToVAOpt() && 345 1.1 joerg ResultToks[VCtx.getNumberOfTokensPriorToVAOpt() - 1].is( 346 1.1 joerg tok::hashhash) && 347 1.1 joerg "no token paste before __VA_OPT__"); 348 1.1 joerg ResultToks.erase(ResultToks.begin() + 349 1.1 joerg VCtx.getNumberOfTokensPriorToVAOpt() - 1); 350 1.1 joerg } 351 1.1 joerg // If the expansion of __VA_OPT__ ends with a placeholder, eat any 352 1.1 joerg // following '##' token. 353 1.1 joerg if (VCtx.endsWithPlaceholder() && I + 1 != E && 354 1.1 joerg Tokens[I + 1].is(tok::hashhash)) { 355 1.1 joerg ++I; 356 1.1 joerg } 357 1.1 joerg } 358 1.1 joerg VCtx.reset(); 359 1.1 joerg // We processed __VA_OPT__'s closing paren (and the exit out of 360 1.1 joerg // __VA_OPT__), so skip to the next token. 361 1.1 joerg continue; 362 1.1 joerg } 363 1.1 joerg } 364 1.1 joerg 365 1.1 joerg // If we found the stringify operator, get the argument stringified. The 366 1.1 joerg // preprocessor already verified that the following token is a macro 367 1.1 joerg // parameter or __VA_OPT__ when the #define was lexed. 368 1.1 joerg 369 1.1 joerg if (CurTok.isOneOf(tok::hash, tok::hashat)) { 370 1.1 joerg int ArgNo = Macro->getParameterNum(Tokens[I+1].getIdentifierInfo()); 371 1.1 joerg assert((ArgNo != -1 || VCtx.isVAOptToken(Tokens[I + 1])) && 372 1.1 joerg "Token following # is not an argument or __VA_OPT__!"); 373 1.1 joerg 374 1.1 joerg if (ArgNo == -1) { 375 1.1 joerg // Handle the __VA_OPT__ case. 376 1.1 joerg VCtx.sawHashOrHashAtBefore(NextTokGetsSpace, 377 1.1 joerg CurTok.is(tok::hashat)); 378 1.1 joerg continue; 379 1.1 joerg } 380 1.1 joerg // Else handle the simple argument case. 381 1.1 joerg SourceLocation ExpansionLocStart = 382 1.1 joerg getExpansionLocForMacroDefLoc(CurTok.getLocation()); 383 1.1 joerg SourceLocation ExpansionLocEnd = 384 1.1 joerg getExpansionLocForMacroDefLoc(Tokens[I+1].getLocation()); 385 1.1 joerg 386 1.1 joerg bool Charify = CurTok.is(tok::hashat); 387 1.1 joerg const Token *UnexpArg = ActualArgs->getUnexpArgument(ArgNo); 388 1.1 joerg Token Res = MacroArgs::StringifyArgument( 389 1.1 joerg UnexpArg, PP, Charify, ExpansionLocStart, ExpansionLocEnd); 390 1.1 joerg Res.setFlag(Token::StringifiedInMacro); 391 1.1 joerg 392 1.1 joerg // The stringified/charified string leading space flag gets set to match 393 1.1 joerg // the #/#@ operator. 394 1.1 joerg if (NextTokGetsSpace) 395 1.1 joerg Res.setFlag(Token::LeadingSpace); 396 1.1 joerg 397 1.1 joerg ResultToks.push_back(Res); 398 1.1 joerg MadeChange = true; 399 1.1 joerg ++I; // Skip arg name. 400 1.1 joerg NextTokGetsSpace = false; 401 1.1 joerg continue; 402 1.1 joerg } 403 1.1 joerg 404 1.1 joerg // Find out if there is a paste (##) operator before or after the token. 405 1.1 joerg bool NonEmptyPasteBefore = 406 1.1 joerg !ResultToks.empty() && ResultToks.back().is(tok::hashhash); 407 1.1 joerg bool PasteBefore = I != 0 && Tokens[I-1].is(tok::hashhash); 408 1.1 joerg bool PasteAfter = I+1 != E && Tokens[I+1].is(tok::hashhash); 409 1.1 joerg bool RParenAfter = I+1 != E && Tokens[I+1].is(tok::r_paren); 410 1.1 joerg 411 1.1 joerg assert((!NonEmptyPasteBefore || PasteBefore || VCtx.isInVAOpt()) && 412 1.1 joerg "unexpected ## in ResultToks"); 413 1.1 joerg 414 1.1 joerg // Otherwise, if this is not an argument token, just add the token to the 415 1.1 joerg // output buffer. 416 1.1 joerg IdentifierInfo *II = CurTok.getIdentifierInfo(); 417 1.1 joerg int ArgNo = II ? Macro->getParameterNum(II) : -1; 418 1.1 joerg if (ArgNo == -1) { 419 1.1 joerg // This isn't an argument, just add it. 420 1.1 joerg ResultToks.push_back(CurTok); 421 1.1 joerg 422 1.1 joerg if (NextTokGetsSpace) { 423 1.1 joerg ResultToks.back().setFlag(Token::LeadingSpace); 424 1.1 joerg NextTokGetsSpace = false; 425 1.1 joerg } else if (PasteBefore && !NonEmptyPasteBefore) 426 1.1 joerg ResultToks.back().clearFlag(Token::LeadingSpace); 427 1.1 joerg 428 1.1 joerg continue; 429 1.1 joerg } 430 1.1 joerg 431 1.1 joerg // An argument is expanded somehow, the result is different than the 432 1.1 joerg // input. 433 1.1 joerg MadeChange = true; 434 1.1 joerg 435 1.1 joerg // Otherwise, this is a use of the argument. 436 1.1 joerg 437 1.1 joerg // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there 438 1.1 joerg // are no trailing commas if __VA_ARGS__ is empty. 439 1.1 joerg if (!PasteBefore && ActualArgs->isVarargsElidedUse() && 440 1.1 joerg MaybeRemoveCommaBeforeVaArgs(ResultToks, 441 1.1 joerg /*HasPasteOperator=*/false, 442 1.1 joerg Macro, ArgNo, PP)) 443 1.1 joerg continue; 444 1.1 joerg 445 1.1 joerg // If it is not the LHS/RHS of a ## operator, we must pre-expand the 446 1.1 joerg // argument and substitute the expanded tokens into the result. This is 447 1.1 joerg // C99 6.10.3.1p1. 448 1.1 joerg if (!PasteBefore && !PasteAfter) { 449 1.1 joerg const Token *ResultArgToks; 450 1.1 joerg 451 1.1 joerg // Only preexpand the argument if it could possibly need it. This 452 1.1 joerg // avoids some work in common cases. 453 1.1 joerg const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo); 454 1.1 joerg if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP)) 455 1.1 joerg ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0]; 456 1.1 joerg else 457 1.1 joerg ResultArgToks = ArgTok; // Use non-preexpanded tokens. 458 1.1 joerg 459 1.1 joerg // If the arg token expanded into anything, append it. 460 1.1 joerg if (ResultArgToks->isNot(tok::eof)) { 461 1.1 joerg size_t FirstResult = ResultToks.size(); 462 1.1 joerg unsigned NumToks = MacroArgs::getArgLength(ResultArgToks); 463 1.1 joerg ResultToks.append(ResultArgToks, ResultArgToks+NumToks); 464 1.1 joerg 465 1.1 joerg // In Microsoft-compatibility mode, we follow MSVC's preprocessing 466 1.1 joerg // behavior by not considering single commas from nested macro 467 1.1 joerg // expansions as argument separators. Set a flag on the token so we can 468 1.1 joerg // test for this later when the macro expansion is processed. 469 1.1 joerg if (PP.getLangOpts().MSVCCompat && NumToks == 1 && 470 1.1 joerg ResultToks.back().is(tok::comma)) 471 1.1 joerg ResultToks.back().setFlag(Token::IgnoredComma); 472 1.1 joerg 473 1.1 joerg // If the '##' came from expanding an argument, turn it into 'unknown' 474 1.1 joerg // to avoid pasting. 475 1.1 joerg for (Token &Tok : llvm::make_range(ResultToks.begin() + FirstResult, 476 1.1 joerg ResultToks.end())) { 477 1.1 joerg if (Tok.is(tok::hashhash)) 478 1.1 joerg Tok.setKind(tok::unknown); 479 1.1 joerg } 480 1.1 joerg 481 1.1 joerg if(ExpandLocStart.isValid()) { 482 1.1 joerg updateLocForMacroArgTokens(CurTok.getLocation(), 483 1.1 joerg ResultToks.begin()+FirstResult, 484 1.1 joerg ResultToks.end()); 485 1.1 joerg } 486 1.1 joerg 487 1.1 joerg // If any tokens were substituted from the argument, the whitespace 488 1.1 joerg // before the first token should match the whitespace of the arg 489 1.1 joerg // identifier. 490 1.1 joerg ResultToks[FirstResult].setFlagValue(Token::LeadingSpace, 491 1.1 joerg NextTokGetsSpace); 492 1.1 joerg ResultToks[FirstResult].setFlagValue(Token::StartOfLine, false); 493 1.1 joerg NextTokGetsSpace = false; 494 1.1 joerg } else { 495 1.1 joerg // We're creating a placeholder token. Usually this doesn't matter, 496 1.1 joerg // but it can affect paste behavior when at the start or end of a 497 1.1 joerg // __VA_OPT__. 498 1.1 joerg if (NonEmptyPasteBefore) { 499 1.1 joerg // We're imagining a placeholder token is inserted here. If this is 500 1.1 joerg // the first token in a __VA_OPT__ after a ##, delete the ##. 501 1.1 joerg assert(VCtx.isInVAOpt() && "should only happen inside a __VA_OPT__"); 502 1.1 joerg VCtx.hasPlaceholderAfterHashhashAtStart(); 503 1.1 joerg } 504 1.1 joerg if (RParenAfter) 505 1.1 joerg VCtx.hasPlaceholderBeforeRParen(); 506 1.1 joerg } 507 1.1 joerg continue; 508 1.1 joerg } 509 1.1 joerg 510 1.1 joerg // Okay, we have a token that is either the LHS or RHS of a paste (##) 511 1.1 joerg // argument. It gets substituted as its non-pre-expanded tokens. 512 1.1 joerg const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo); 513 1.1 joerg unsigned NumToks = MacroArgs::getArgLength(ArgToks); 514 1.1 joerg if (NumToks) { // Not an empty argument? 515 1.1 joerg bool VaArgsPseudoPaste = false; 516 1.1 joerg // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned 517 1.1 joerg // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when 518 1.1 joerg // the expander tries to paste ',' with the first token of the __VA_ARGS__ 519 1.1 joerg // expansion. 520 1.1 joerg if (NonEmptyPasteBefore && ResultToks.size() >= 2 && 521 1.1 joerg ResultToks[ResultToks.size()-2].is(tok::comma) && 522 1.1 joerg (unsigned)ArgNo == Macro->getNumParams()-1 && 523 1.1 joerg Macro->isVariadic()) { 524 1.1 joerg VaArgsPseudoPaste = true; 525 1.1 joerg // Remove the paste operator, report use of the extension. 526 1.1 joerg PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma); 527 1.1 joerg } 528 1.1 joerg 529 1.1 joerg ResultToks.append(ArgToks, ArgToks+NumToks); 530 1.1 joerg 531 1.1 joerg // If the '##' came from expanding an argument, turn it into 'unknown' 532 1.1 joerg // to avoid pasting. 533 1.1 joerg for (Token &Tok : llvm::make_range(ResultToks.end() - NumToks, 534 1.1 joerg ResultToks.end())) { 535 1.1 joerg if (Tok.is(tok::hashhash)) 536 1.1 joerg Tok.setKind(tok::unknown); 537 1.1 joerg } 538 1.1 joerg 539 1.1 joerg if (ExpandLocStart.isValid()) { 540 1.1 joerg updateLocForMacroArgTokens(CurTok.getLocation(), 541 1.1 joerg ResultToks.end()-NumToks, ResultToks.end()); 542 1.1 joerg } 543 1.1 joerg 544 1.1 joerg // Transfer the leading whitespace information from the token 545 1.1 joerg // (the macro argument) onto the first token of the 546 1.1 joerg // expansion. Note that we don't do this for the GNU 547 1.1 joerg // pseudo-paste extension ", ## __VA_ARGS__". 548 1.1 joerg if (!VaArgsPseudoPaste) { 549 1.1 joerg ResultToks[ResultToks.size() - NumToks].setFlagValue(Token::StartOfLine, 550 1.1 joerg false); 551 1.1 joerg ResultToks[ResultToks.size() - NumToks].setFlagValue( 552 1.1 joerg Token::LeadingSpace, NextTokGetsSpace); 553 1.1 joerg } 554 1.1 joerg 555 1.1 joerg NextTokGetsSpace = false; 556 1.1 joerg continue; 557 1.1 joerg } 558 1.1 joerg 559 1.1 joerg // If an empty argument is on the LHS or RHS of a paste, the standard (C99 560 1.1 joerg // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We 561 1.1 joerg // implement this by eating ## operators when a LHS or RHS expands to 562 1.1 joerg // empty. 563 1.1 joerg if (PasteAfter) { 564 1.1 joerg // Discard the argument token and skip (don't copy to the expansion 565 1.1 joerg // buffer) the paste operator after it. 566 1.1 joerg ++I; 567 1.1 joerg continue; 568 1.1 joerg } 569 1.1 joerg 570 1.1 joerg if (RParenAfter) 571 1.1 joerg VCtx.hasPlaceholderBeforeRParen(); 572 1.1 joerg 573 1.1 joerg // If this is on the RHS of a paste operator, we've already copied the 574 1.1 joerg // paste operator to the ResultToks list, unless the LHS was empty too. 575 1.1 joerg // Remove it. 576 1.1 joerg assert(PasteBefore); 577 1.1 joerg if (NonEmptyPasteBefore) { 578 1.1 joerg assert(ResultToks.back().is(tok::hashhash)); 579 1.1 joerg // Do not remove the paste operator if it is the one before __VA_OPT__ 580 1.1 joerg // (and we are still processing tokens within VA_OPT). We handle the case 581 1.1 joerg // of removing the paste operator if __VA_OPT__ reduces to the notional 582 1.1 joerg // placemarker above when we encounter the closing paren of VA_OPT. 583 1.1 joerg if (!VCtx.isInVAOpt() || 584 1.1 joerg ResultToks.size() > VCtx.getNumberOfTokensPriorToVAOpt()) 585 1.1 joerg ResultToks.pop_back(); 586 1.1 joerg else 587 1.1 joerg VCtx.hasPlaceholderAfterHashhashAtStart(); 588 1.1 joerg } 589 1.1 joerg 590 1.1 joerg // If this is the __VA_ARGS__ token, and if the argument wasn't provided, 591 1.1 joerg // and if the macro had at least one real argument, and if the token before 592 1.1 joerg // the ## was a comma, remove the comma. This is a GCC extension which is 593 1.1 joerg // disabled when using -std=c99. 594 1.1 joerg if (ActualArgs->isVarargsElidedUse()) 595 1.1 joerg MaybeRemoveCommaBeforeVaArgs(ResultToks, 596 1.1 joerg /*HasPasteOperator=*/true, 597 1.1 joerg Macro, ArgNo, PP); 598 1.1 joerg } 599 1.1 joerg 600 1.1 joerg // If anything changed, install this as the new Tokens list. 601 1.1 joerg if (MadeChange) { 602 1.1 joerg assert(!OwnsTokens && "This would leak if we already own the token list"); 603 1.1 joerg // This is deleted in the dtor. 604 1.1 joerg NumTokens = ResultToks.size(); 605 1.1 joerg // The tokens will be added to Preprocessor's cache and will be removed 606 1.1 joerg // when this TokenLexer finishes lexing them. 607 1.1 joerg Tokens = PP.cacheMacroExpandedTokens(this, ResultToks); 608 1.1 joerg 609 1.1 joerg // The preprocessor cache of macro expanded tokens owns these tokens,not us. 610 1.1 joerg OwnsTokens = false; 611 1.1 joerg } 612 1.1 joerg } 613 1.1 joerg 614 1.1 joerg /// Checks if two tokens form wide string literal. 615 1.1 joerg static bool isWideStringLiteralFromMacro(const Token &FirstTok, 616 1.1 joerg const Token &SecondTok) { 617 1.1 joerg return FirstTok.is(tok::identifier) && 618 1.1 joerg FirstTok.getIdentifierInfo()->isStr("L") && SecondTok.isLiteral() && 619 1.1 joerg SecondTok.stringifiedInMacro(); 620 1.1 joerg } 621 1.1 joerg 622 1.1 joerg /// Lex - Lex and return a token from this macro stream. 623 1.1 joerg bool TokenLexer::Lex(Token &Tok) { 624 1.1 joerg // Lexing off the end of the macro, pop this macro off the expansion stack. 625 1.1 joerg if (isAtEnd()) { 626 1.1 joerg // If this is a macro (not a token stream), mark the macro enabled now 627 1.1 joerg // that it is no longer being expanded. 628 1.1 joerg if (Macro) Macro->EnableMacro(); 629 1.1 joerg 630 1.1 joerg Tok.startToken(); 631 1.1 joerg Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); 632 1.1 joerg Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace); 633 1.1 joerg if (CurTokenIdx == 0) 634 1.1 joerg Tok.setFlag(Token::LeadingEmptyMacro); 635 1.1 joerg return PP.HandleEndOfTokenLexer(Tok); 636 1.1 joerg } 637 1.1 joerg 638 1.1 joerg SourceManager &SM = PP.getSourceManager(); 639 1.1 joerg 640 1.1 joerg // If this is the first token of the expanded result, we inherit spacing 641 1.1 joerg // properties later. 642 1.1 joerg bool isFirstToken = CurTokenIdx == 0; 643 1.1 joerg 644 1.1 joerg // Get the next token to return. 645 1.1 joerg Tok = Tokens[CurTokenIdx++]; 646 1.1 joerg if (IsReinject) 647 1.1 joerg Tok.setFlag(Token::IsReinjected); 648 1.1 joerg 649 1.1 joerg bool TokenIsFromPaste = false; 650 1.1 joerg 651 1.1 joerg // If this token is followed by a token paste (##) operator, paste the tokens! 652 1.1 joerg // Note that ## is a normal token when not expanding a macro. 653 1.1 joerg if (!isAtEnd() && Macro && 654 1.1 joerg (Tokens[CurTokenIdx].is(tok::hashhash) || 655 1.1 joerg // Special processing of L#x macros in -fms-compatibility mode. 656 1.1 joerg // Microsoft compiler is able to form a wide string literal from 657 1.1 joerg // 'L#macro_arg' construct in a function-like macro. 658 1.1 joerg (PP.getLangOpts().MSVCCompat && 659 1.1 joerg isWideStringLiteralFromMacro(Tok, Tokens[CurTokenIdx])))) { 660 1.1 joerg // When handling the microsoft /##/ extension, the final token is 661 1.1 joerg // returned by pasteTokens, not the pasted token. 662 1.1 joerg if (pasteTokens(Tok)) 663 1.1 joerg return true; 664 1.1 joerg 665 1.1 joerg TokenIsFromPaste = true; 666 1.1 joerg } 667 1.1 joerg 668 1.1 joerg // The token's current location indicate where the token was lexed from. We 669 1.1 joerg // need this information to compute the spelling of the token, but any 670 1.1 joerg // diagnostics for the expanded token should appear as if they came from 671 1.1 joerg // ExpansionLoc. Pull this information together into a new SourceLocation 672 1.1 joerg // that captures all of this. 673 1.1 joerg if (ExpandLocStart.isValid() && // Don't do this for token streams. 674 1.1 joerg // Check that the token's location was not already set properly. 675 1.1 joerg SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) { 676 1.1 joerg SourceLocation instLoc; 677 1.1 joerg if (Tok.is(tok::comment)) { 678 1.1 joerg instLoc = SM.createExpansionLoc(Tok.getLocation(), 679 1.1 joerg ExpandLocStart, 680 1.1 joerg ExpandLocEnd, 681 1.1 joerg Tok.getLength()); 682 1.1 joerg } else { 683 1.1 joerg instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation()); 684 1.1 joerg } 685 1.1 joerg 686 1.1 joerg Tok.setLocation(instLoc); 687 1.1 joerg } 688 1.1 joerg 689 1.1 joerg // If this is the first token, set the lexical properties of the token to 690 1.1 joerg // match the lexical properties of the macro identifier. 691 1.1 joerg if (isFirstToken) { 692 1.1 joerg Tok.setFlagValue(Token::StartOfLine , AtStartOfLine); 693 1.1 joerg Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace); 694 1.1 joerg } else { 695 1.1 joerg // If this is not the first token, we may still need to pass through 696 1.1 joerg // leading whitespace if we've expanded a macro. 697 1.1 joerg if (AtStartOfLine) Tok.setFlag(Token::StartOfLine); 698 1.1 joerg if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace); 699 1.1 joerg } 700 1.1 joerg AtStartOfLine = false; 701 1.1 joerg HasLeadingSpace = false; 702 1.1 joerg 703 1.1 joerg // Handle recursive expansion! 704 1.1 joerg if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) { 705 1.1 joerg // Change the kind of this identifier to the appropriate token kind, e.g. 706 1.1 joerg // turning "for" into a keyword. 707 1.1 joerg IdentifierInfo *II = Tok.getIdentifierInfo(); 708 1.1 joerg Tok.setKind(II->getTokenID()); 709 1.1 joerg 710 1.1 joerg // If this identifier was poisoned and from a paste, emit an error. This 711 1.1 joerg // won't be handled by Preprocessor::HandleIdentifier because this is coming 712 1.1 joerg // from a macro expansion. 713 1.1 joerg if (II->isPoisoned() && TokenIsFromPaste) { 714 1.1 joerg PP.HandlePoisonedIdentifier(Tok); 715 1.1 joerg } 716 1.1 joerg 717 1.1 joerg if (!DisableMacroExpansion && II->isHandleIdentifierCase()) 718 1.1 joerg return PP.HandleIdentifier(Tok); 719 1.1 joerg } 720 1.1 joerg 721 1.1 joerg // Otherwise, return a normal token. 722 1.1 joerg return true; 723 1.1 joerg } 724 1.1 joerg 725 1.1 joerg bool TokenLexer::pasteTokens(Token &Tok) { 726 1.1 joerg return pasteTokens(Tok, llvm::makeArrayRef(Tokens, NumTokens), CurTokenIdx); 727 1.1 joerg } 728 1.1 joerg 729 1.1 joerg /// LHSTok is the LHS of a ## operator, and CurTokenIdx is the ## 730 1.1 joerg /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there 731 1.1 joerg /// are more ## after it, chomp them iteratively. Return the result as LHSTok. 732 1.1 joerg /// If this returns true, the caller should immediately return the token. 733 1.1 joerg bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, 734 1.1 joerg unsigned int &CurIdx) { 735 1.1 joerg assert(CurIdx > 0 && "## can not be the first token within tokens"); 736 1.1 joerg assert((TokenStream[CurIdx].is(tok::hashhash) || 737 1.1 joerg (PP.getLangOpts().MSVCCompat && 738 1.1 joerg isWideStringLiteralFromMacro(LHSTok, TokenStream[CurIdx]))) && 739 1.1 joerg "Token at this Index must be ## or part of the MSVC 'L " 740 1.1 joerg "#macro-arg' pasting pair"); 741 1.1 joerg 742 1.1 joerg // MSVC: If previous token was pasted, this must be a recovery from an invalid 743 1.1 joerg // paste operation. Ignore spaces before this token to mimic MSVC output. 744 1.1 joerg // Required for generating valid UUID strings in some MS headers. 745 1.1 joerg if (PP.getLangOpts().MicrosoftExt && (CurIdx >= 2) && 746 1.1 joerg TokenStream[CurIdx - 2].is(tok::hashhash)) 747 1.1 joerg LHSTok.clearFlag(Token::LeadingSpace); 748 1.1 joerg 749 1.1 joerg SmallString<128> Buffer; 750 1.1 joerg const char *ResultTokStrPtr = nullptr; 751 1.1 joerg SourceLocation StartLoc = LHSTok.getLocation(); 752 1.1 joerg SourceLocation PasteOpLoc; 753 1.1 joerg 754 1.1 joerg auto IsAtEnd = [&TokenStream, &CurIdx] { 755 1.1 joerg return TokenStream.size() == CurIdx; 756 1.1 joerg }; 757 1.1 joerg 758 1.1 joerg do { 759 1.1 joerg // Consume the ## operator if any. 760 1.1 joerg PasteOpLoc = TokenStream[CurIdx].getLocation(); 761 1.1 joerg if (TokenStream[CurIdx].is(tok::hashhash)) 762 1.1 joerg ++CurIdx; 763 1.1 joerg assert(!IsAtEnd() && "No token on the RHS of a paste operator!"); 764 1.1 joerg 765 1.1 joerg // Get the RHS token. 766 1.1 joerg const Token &RHS = TokenStream[CurIdx]; 767 1.1 joerg 768 1.1 joerg // Allocate space for the result token. This is guaranteed to be enough for 769 1.1 joerg // the two tokens. 770 1.1 joerg Buffer.resize(LHSTok.getLength() + RHS.getLength()); 771 1.1 joerg 772 1.1 joerg // Get the spelling of the LHS token in Buffer. 773 1.1 joerg const char *BufPtr = &Buffer[0]; 774 1.1 joerg bool Invalid = false; 775 1.1 joerg unsigned LHSLen = PP.getSpelling(LHSTok, BufPtr, &Invalid); 776 1.1 joerg if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer! 777 1.1 joerg memcpy(&Buffer[0], BufPtr, LHSLen); 778 1.1 joerg if (Invalid) 779 1.1 joerg return true; 780 1.1 joerg 781 1.1 joerg BufPtr = Buffer.data() + LHSLen; 782 1.1 joerg unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid); 783 1.1 joerg if (Invalid) 784 1.1 joerg return true; 785 1.1 joerg if (RHSLen && BufPtr != &Buffer[LHSLen]) 786 1.1 joerg // Really, we want the chars in Buffer! 787 1.1 joerg memcpy(&Buffer[LHSLen], BufPtr, RHSLen); 788 1.1 joerg 789 1.1 joerg // Trim excess space. 790 1.1 joerg Buffer.resize(LHSLen+RHSLen); 791 1.1 joerg 792 1.1 joerg // Plop the pasted result (including the trailing newline and null) into a 793 1.1 joerg // scratch buffer where we can lex it. 794 1.1 joerg Token ResultTokTmp; 795 1.1 joerg ResultTokTmp.startToken(); 796 1.1 joerg 797 1.1 joerg // Claim that the tmp token is a string_literal so that we can get the 798 1.1 joerg // character pointer back from CreateString in getLiteralData(). 799 1.1 joerg ResultTokTmp.setKind(tok::string_literal); 800 1.1 joerg PP.CreateString(Buffer, ResultTokTmp); 801 1.1 joerg SourceLocation ResultTokLoc = ResultTokTmp.getLocation(); 802 1.1 joerg ResultTokStrPtr = ResultTokTmp.getLiteralData(); 803 1.1 joerg 804 1.1 joerg // Lex the resultant pasted token into Result. 805 1.1 joerg Token Result; 806 1.1 joerg 807 1.1 joerg if (LHSTok.isAnyIdentifier() && RHS.isAnyIdentifier()) { 808 1.1 joerg // Common paste case: identifier+identifier = identifier. Avoid creating 809 1.1 joerg // a lexer and other overhead. 810 1.1 joerg PP.IncrementPasteCounter(true); 811 1.1 joerg Result.startToken(); 812 1.1 joerg Result.setKind(tok::raw_identifier); 813 1.1 joerg Result.setRawIdentifierData(ResultTokStrPtr); 814 1.1 joerg Result.setLocation(ResultTokLoc); 815 1.1 joerg Result.setLength(LHSLen+RHSLen); 816 1.1 joerg } else { 817 1.1 joerg PP.IncrementPasteCounter(false); 818 1.1 joerg 819 1.1 joerg assert(ResultTokLoc.isFileID() && 820 1.1 joerg "Should be a raw location into scratch buffer"); 821 1.1 joerg SourceManager &SourceMgr = PP.getSourceManager(); 822 1.1 joerg FileID LocFileID = SourceMgr.getFileID(ResultTokLoc); 823 1.1 joerg 824 1.1 joerg bool Invalid = false; 825 1.1 joerg const char *ScratchBufStart 826 1.1 joerg = SourceMgr.getBufferData(LocFileID, &Invalid).data(); 827 1.1 joerg if (Invalid) 828 1.1 joerg return false; 829 1.1 joerg 830 1.1 joerg // Make a lexer to lex this string from. Lex just this one token. 831 1.1 joerg // Make a lexer object so that we lex and expand the paste result. 832 1.1 joerg Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID), 833 1.1 joerg PP.getLangOpts(), ScratchBufStart, 834 1.1 joerg ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen); 835 1.1 joerg 836 1.1 joerg // Lex a token in raw mode. This way it won't look up identifiers 837 1.1 joerg // automatically, lexing off the end will return an eof token, and 838 1.1 joerg // warnings are disabled. This returns true if the result token is the 839 1.1 joerg // entire buffer. 840 1.1 joerg bool isInvalid = !TL.LexFromRawLexer(Result); 841 1.1 joerg 842 1.1 joerg // If we got an EOF token, we didn't form even ONE token. For example, we 843 1.1 joerg // did "/ ## /" to get "//". 844 1.1 joerg isInvalid |= Result.is(tok::eof); 845 1.1 joerg 846 1.1 joerg // If pasting the two tokens didn't form a full new token, this is an 847 1.1 joerg // error. This occurs with "x ## +" and other stuff. Return with LHSTok 848 1.1 joerg // unmodified and with RHS as the next token to lex. 849 1.1 joerg if (isInvalid) { 850 1.1 joerg // Explicitly convert the token location to have proper expansion 851 1.1 joerg // information so that the user knows where it came from. 852 1.1 joerg SourceManager &SM = PP.getSourceManager(); 853 1.1 joerg SourceLocation Loc = 854 1.1 joerg SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2); 855 1.1 joerg 856 1.1 joerg // Test for the Microsoft extension of /##/ turning into // here on the 857 1.1 joerg // error path. 858 1.1 joerg if (PP.getLangOpts().MicrosoftExt && LHSTok.is(tok::slash) && 859 1.1 joerg RHS.is(tok::slash)) { 860 1.1 joerg HandleMicrosoftCommentPaste(LHSTok, Loc); 861 1.1 joerg return true; 862 1.1 joerg } 863 1.1 joerg 864 1.1 joerg // Do not emit the error when preprocessing assembler code. 865 1.1 joerg if (!PP.getLangOpts().AsmPreprocessor) { 866 1.1 joerg // If we're in microsoft extensions mode, downgrade this from a hard 867 1.1 joerg // error to an extension that defaults to an error. This allows 868 1.1 joerg // disabling it. 869 1.1 joerg PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms 870 1.1 joerg : diag::err_pp_bad_paste) 871 1.1 joerg << Buffer; 872 1.1 joerg } 873 1.1 joerg 874 1.1 joerg // An error has occurred so exit loop. 875 1.1 joerg break; 876 1.1 joerg } 877 1.1 joerg 878 1.1 joerg // Turn ## into 'unknown' to avoid # ## # from looking like a paste 879 1.1 joerg // operator. 880 1.1 joerg if (Result.is(tok::hashhash)) 881 1.1 joerg Result.setKind(tok::unknown); 882 1.1 joerg } 883 1.1 joerg 884 1.1 joerg // Transfer properties of the LHS over the Result. 885 1.1 joerg Result.setFlagValue(Token::StartOfLine , LHSTok.isAtStartOfLine()); 886 1.1 joerg Result.setFlagValue(Token::LeadingSpace, LHSTok.hasLeadingSpace()); 887 1.1 joerg 888 1.1 joerg // Finally, replace LHS with the result, consume the RHS, and iterate. 889 1.1 joerg ++CurIdx; 890 1.1 joerg LHSTok = Result; 891 1.1 joerg } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash)); 892 1.1 joerg 893 1.1 joerg SourceLocation EndLoc = TokenStream[CurIdx - 1].getLocation(); 894 1.1 joerg 895 1.1 joerg // The token's current location indicate where the token was lexed from. We 896 1.1 joerg // need this information to compute the spelling of the token, but any 897 1.1 joerg // diagnostics for the expanded token should appear as if the token was 898 1.1 joerg // expanded from the full ## expression. Pull this information together into 899 1.1 joerg // a new SourceLocation that captures all of this. 900 1.1 joerg SourceManager &SM = PP.getSourceManager(); 901 1.1 joerg if (StartLoc.isFileID()) 902 1.1 joerg StartLoc = getExpansionLocForMacroDefLoc(StartLoc); 903 1.1 joerg if (EndLoc.isFileID()) 904 1.1 joerg EndLoc = getExpansionLocForMacroDefLoc(EndLoc); 905 1.1 joerg FileID MacroFID = SM.getFileID(MacroExpansionStart); 906 1.1 joerg while (SM.getFileID(StartLoc) != MacroFID) 907 1.1 joerg StartLoc = SM.getImmediateExpansionRange(StartLoc).getBegin(); 908 1.1 joerg while (SM.getFileID(EndLoc) != MacroFID) 909 1.1 joerg EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd(); 910 1.1 joerg 911 1.1 joerg LHSTok.setLocation(SM.createExpansionLoc(LHSTok.getLocation(), StartLoc, EndLoc, 912 1.1 joerg LHSTok.getLength())); 913 1.1 joerg 914 1.1 joerg // Now that we got the result token, it will be subject to expansion. Since 915 1.1 joerg // token pasting re-lexes the result token in raw mode, identifier information 916 1.1 joerg // isn't looked up. As such, if the result is an identifier, look up id info. 917 1.1 joerg if (LHSTok.is(tok::raw_identifier)) { 918 1.1 joerg // Look up the identifier info for the token. We disabled identifier lookup 919 1.1 joerg // by saying we're skipping contents, so we need to do this manually. 920 1.1 joerg PP.LookUpIdentifierInfo(LHSTok); 921 1.1 joerg } 922 1.1 joerg return false; 923 1.1 joerg } 924 1.1 joerg 925 1.1 joerg /// isNextTokenLParen - If the next token lexed will pop this macro off the 926 1.1 joerg /// expansion stack, return 2. If the next unexpanded token is a '(', return 927 1.1 joerg /// 1, otherwise return 0. 928 1.1 joerg unsigned TokenLexer::isNextTokenLParen() const { 929 1.1 joerg // Out of tokens? 930 1.1 joerg if (isAtEnd()) 931 1.1 joerg return 2; 932 1.1 joerg return Tokens[CurTokenIdx].is(tok::l_paren); 933 1.1 joerg } 934 1.1 joerg 935 1.1 joerg /// isParsingPreprocessorDirective - Return true if we are in the middle of a 936 1.1 joerg /// preprocessor directive. 937 1.1 joerg bool TokenLexer::isParsingPreprocessorDirective() const { 938 1.1 joerg return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd(); 939 1.1 joerg } 940 1.1 joerg 941 1.1 joerg /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes 942 1.1 joerg /// together to form a comment that comments out everything in the current 943 1.1 joerg /// macro, other active macros, and anything left on the current physical 944 1.1 joerg /// source line of the expanded buffer. Handle this by returning the 945 1.1 joerg /// first token on the next line. 946 1.1 joerg void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc) { 947 1.1 joerg PP.Diag(OpLoc, diag::ext_comment_paste_microsoft); 948 1.1 joerg 949 1.1 joerg // We 'comment out' the rest of this macro by just ignoring the rest of the 950 1.1 joerg // tokens that have not been lexed yet, if any. 951 1.1 joerg 952 1.1 joerg // Since this must be a macro, mark the macro enabled now that it is no longer 953 1.1 joerg // being expanded. 954 1.1 joerg assert(Macro && "Token streams can't paste comments"); 955 1.1 joerg Macro->EnableMacro(); 956 1.1 joerg 957 1.1 joerg PP.HandleMicrosoftCommentPaste(Tok); 958 1.1 joerg } 959 1.1 joerg 960 1.1 joerg /// If \arg loc is a file ID and points inside the current macro 961 1.1 joerg /// definition, returns the appropriate source location pointing at the 962 1.1 joerg /// macro expansion source location entry, otherwise it returns an invalid 963 1.1 joerg /// SourceLocation. 964 1.1 joerg SourceLocation 965 1.1 joerg TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const { 966 1.1 joerg assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() && 967 1.1 joerg "Not appropriate for token streams"); 968 1.1 joerg assert(loc.isValid() && loc.isFileID()); 969 1.1 joerg 970 1.1 joerg SourceManager &SM = PP.getSourceManager(); 971 1.1 joerg assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) && 972 1.1 joerg "Expected loc to come from the macro definition"); 973 1.1 joerg 974 1.1 joerg unsigned relativeOffset = 0; 975 1.1 joerg SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset); 976 1.1 joerg return MacroExpansionStart.getLocWithOffset(relativeOffset); 977 1.1 joerg } 978 1.1 joerg 979 1.1 joerg /// Finds the tokens that are consecutive (from the same FileID) 980 1.1 joerg /// creates a single SLocEntry, and assigns SourceLocations to each token that 981 1.1 joerg /// point to that SLocEntry. e.g for 982 1.1 joerg /// assert(foo == bar); 983 1.1 joerg /// There will be a single SLocEntry for the "foo == bar" chunk and locations 984 1.1 joerg /// for the 'foo', '==', 'bar' tokens will point inside that chunk. 985 1.1 joerg /// 986 1.1 joerg /// \arg begin_tokens will be updated to a position past all the found 987 1.1 joerg /// consecutive tokens. 988 1.1 joerg static void updateConsecutiveMacroArgTokens(SourceManager &SM, 989 1.1 joerg SourceLocation InstLoc, 990 1.1 joerg Token *&begin_tokens, 991 1.1 joerg Token * end_tokens) { 992 1.1 joerg assert(begin_tokens < end_tokens); 993 1.1 joerg 994 1.1 joerg SourceLocation FirstLoc = begin_tokens->getLocation(); 995 1.1 joerg SourceLocation CurLoc = FirstLoc; 996 1.1 joerg 997 1.1 joerg // Compare the source location offset of tokens and group together tokens that 998 1.1 joerg // are close, even if their locations point to different FileIDs. e.g. 999 1.1 joerg // 1000 1.1 joerg // |bar | foo | cake | (3 tokens from 3 consecutive FileIDs) 1001 1.1 joerg // ^ ^ 1002 1.1 joerg // |bar foo cake| (one SLocEntry chunk for all tokens) 1003 1.1 joerg // 1004 1.1 joerg // we can perform this "merge" since the token's spelling location depends 1005 1.1 joerg // on the relative offset. 1006 1.1 joerg 1007 1.1 joerg Token *NextTok = begin_tokens + 1; 1008 1.1 joerg for (; NextTok < end_tokens; ++NextTok) { 1009 1.1 joerg SourceLocation NextLoc = NextTok->getLocation(); 1010 1.1 joerg if (CurLoc.isFileID() != NextLoc.isFileID()) 1011 1.1 joerg break; // Token from different kind of FileID. 1012 1.1 joerg 1013 1.1 joerg int RelOffs; 1014 1.1 joerg if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs)) 1015 1.1 joerg break; // Token from different local/loaded location. 1016 1.1 joerg // Check that token is not before the previous token or more than 50 1017 1.1 joerg // "characters" away. 1018 1.1 joerg if (RelOffs < 0 || RelOffs > 50) 1019 1.1 joerg break; 1020 1.1 joerg 1021 1.1 joerg if (CurLoc.isMacroID() && !SM.isWrittenInSameFile(CurLoc, NextLoc)) 1022 1.1 joerg break; // Token from a different macro. 1023 1.1 joerg 1024 1.1 joerg CurLoc = NextLoc; 1025 1.1 joerg } 1026 1.1 joerg 1027 1.1 joerg // For the consecutive tokens, find the length of the SLocEntry to contain 1028 1.1 joerg // all of them. 1029 1.1 joerg Token &LastConsecutiveTok = *(NextTok-1); 1030 1.1 joerg int LastRelOffs = 0; 1031 1.1 joerg SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(), 1032 1.1 joerg &LastRelOffs); 1033 1.1 joerg unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength(); 1034 1.1 joerg 1035 1.1 joerg // Create a macro expansion SLocEntry that will "contain" all of the tokens. 1036 1.1 joerg SourceLocation Expansion = 1037 1.1 joerg SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength); 1038 1.1 joerg 1039 1.1 joerg // Change the location of the tokens from the spelling location to the new 1040 1.1 joerg // expanded location. 1041 1.1 joerg for (; begin_tokens < NextTok; ++begin_tokens) { 1042 1.1 joerg Token &Tok = *begin_tokens; 1043 1.1 joerg int RelOffs = 0; 1044 1.1 joerg SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs); 1045 1.1 joerg Tok.setLocation(Expansion.getLocWithOffset(RelOffs)); 1046 1.1 joerg } 1047 1.1 joerg } 1048 1.1 joerg 1049 1.1 joerg /// Creates SLocEntries and updates the locations of macro argument 1050 1.1 joerg /// tokens to their new expanded locations. 1051 1.1 joerg /// 1052 1.1 joerg /// \param ArgIdSpellLoc the location of the macro argument id inside the macro 1053 1.1 joerg /// definition. 1054 1.1 joerg void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc, 1055 1.1 joerg Token *begin_tokens, 1056 1.1 joerg Token *end_tokens) { 1057 1.1 joerg SourceManager &SM = PP.getSourceManager(); 1058 1.1 joerg 1059 1.1 joerg SourceLocation InstLoc = 1060 1.1 joerg getExpansionLocForMacroDefLoc(ArgIdSpellLoc); 1061 1.1 joerg 1062 1.1 joerg while (begin_tokens < end_tokens) { 1063 1.1 joerg // If there's only one token just create a SLocEntry for it. 1064 1.1 joerg if (end_tokens - begin_tokens == 1) { 1065 1.1 joerg Token &Tok = *begin_tokens; 1066 1.1 joerg Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(), 1067 1.1 joerg InstLoc, 1068 1.1 joerg Tok.getLength())); 1069 1.1 joerg return; 1070 1.1 joerg } 1071 1.1 joerg 1072 1.1 joerg updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens); 1073 1.1 joerg } 1074 1.1 joerg } 1075 1.1 joerg 1076 1.1 joerg void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) { 1077 1.1 joerg AtStartOfLine = Result.isAtStartOfLine(); 1078 1.1 joerg HasLeadingSpace = Result.hasLeadingSpace(); 1079 1.1 joerg } 1080