clang: TokenLexer.cpp Source File

Go to the documentation of this file.
00001 //===--- TokenLexer.cpp - Lex from a token stream -------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements the TokenLexer interface.
00011 //
00012 //===----------------------------------------------------------------------===//
00013 
00014 #include "clang/Lex/TokenLexer.h"
00015 #include "clang/Basic/SourceManager.h"
00016 #include "clang/Lex/LexDiagnostic.h"
00017 #include "clang/Lex/MacroArgs.h"
00018 #include "clang/Lex/MacroInfo.h"
00019 #include "clang/Lex/Preprocessor.h"
00020 #include "llvm/ADT/SmallString.h"
00021 using namespace clang;
00022 
00023 
00024 /// Create a TokenLexer for the specified macro with the specified actual
00025 /// arguments.  Note that this ctor takes ownership of the ActualArgs pointer.
00026 void TokenLexer::Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
00027                       MacroArgs *Actuals) {
00028   // If the client is reusing a TokenLexer, make sure to free any memory
00029   // associated with it.
00030   destroy();
00031 
00032   Macro = MI;
00033   ActualArgs = Actuals;
00034   CurToken = 0;
00035 
00036   ExpandLocStart = Tok.getLocation();
00037   ExpandLocEnd = ELEnd;
00038   AtStartOfLine = Tok.isAtStartOfLine();
00039   HasLeadingSpace = Tok.hasLeadingSpace();
00040   NextTokGetsSpace = false;
00041   Tokens = &*Macro->tokens_begin();
00042   OwnsTokens = false;
00043   DisableMacroExpansion = false;
00044   NumTokens = Macro->tokens_end()-Macro->tokens_begin();
00045   MacroExpansionStart = SourceLocation();
00046 
00047   SourceManager &SM = PP.getSourceManager();
00048   MacroStartSLocOffset = SM.getNextLocalOffset();
00049 
00050   if (NumTokens > 0) {
00051     assert(Tokens[0].getLocation().isValid());
00052     assert((Tokens[0].getLocation().isFileID() || Tokens[0].is(tok::comment)) &&
00053            "Macro defined in macro?");
00054     assert(ExpandLocStart.isValid());
00055 
00056     // Reserve a source location entry chunk for the length of the macro
00057     // definition. Tokens that get lexed directly from the definition will
00058     // have their locations pointing inside this chunk. This is to avoid
00059     // creating separate source location entries for each token.
00060     MacroDefStart = SM.getExpansionLoc(Tokens[0].getLocation());
00061     MacroDefLength = Macro->getDefinitionLength(SM);
00062     MacroExpansionStart = SM.createExpansionLoc(MacroDefStart,
00063                                                 ExpandLocStart,
00064                                                 ExpandLocEnd,
00065                                                 MacroDefLength);
00066   }
00067 
00068   // If this is a function-like macro, expand the arguments and change
00069   // Tokens to point to the expanded tokens.
00070   if (Macro->isFunctionLike() && Macro->getNumArgs())
00071     ExpandFunctionArguments();
00072 
00073   // Mark the macro as currently disabled, so that it is not recursively
00074   // expanded.  The macro must be disabled only after argument pre-expansion of
00075   // function-like macro arguments occurs.
00076   Macro->DisableMacro();
00077 }
00078 
00079 
00080 
00081 /// Create a TokenLexer for the specified token stream.  This does not
00082 /// take ownership of the specified token vector.
00083 void TokenLexer::Init(const Token *TokArray, unsigned NumToks,
00084                       bool disableMacroExpansion, bool ownsTokens) {
00085   // If the client is reusing a TokenLexer, make sure to free any memory
00086   // associated with it.
00087   destroy();
00088 
00089   Macro = nullptr;
00090   ActualArgs = nullptr;
00091   Tokens = TokArray;
00092   OwnsTokens = ownsTokens;
00093   DisableMacroExpansion = disableMacroExpansion;
00094   NumTokens = NumToks;
00095   CurToken = 0;
00096   ExpandLocStart = ExpandLocEnd = SourceLocation();
00097   AtStartOfLine = false;
00098   HasLeadingSpace = false;
00099   NextTokGetsSpace = false;
00100   MacroExpansionStart = SourceLocation();
00101 
00102   // Set HasLeadingSpace/AtStartOfLine so that the first token will be
00103   // returned unmodified.
00104   if (NumToks != 0) {
00105     AtStartOfLine   = TokArray[0].isAtStartOfLine();
00106     HasLeadingSpace = TokArray[0].hasLeadingSpace();
00107   }
00108 }
00109 
00110 
00111 void TokenLexer::destroy() {
00112   // If this was a function-like macro that actually uses its arguments, delete
00113   // the expanded tokens.
00114   if (OwnsTokens) {
00115     delete [] Tokens;
00116     Tokens = nullptr;
00117     OwnsTokens = false;
00118   }
00119 
00120   // TokenLexer owns its formal arguments.
00121   if (ActualArgs) ActualArgs->destroy(PP);
00122 }
00123 
00124 bool TokenLexer::MaybeRemoveCommaBeforeVaArgs(
00125     SmallVectorImpl<Token> &ResultToks, bool HasPasteOperator, MacroInfo *Macro,
00126     unsigned MacroArgNo, Preprocessor &PP) {
00127   // Is the macro argument __VA_ARGS__?
00128   if (!Macro->isVariadic() || MacroArgNo != Macro->getNumArgs()-1)
00129     return false;
00130 
00131   // In Microsoft-compatibility mode, a comma is removed in the expansion
00132   // of " ... , __VA_ARGS__ " if __VA_ARGS__ is empty.  This extension is
00133   // not supported by gcc.
00134   if (!HasPasteOperator && !PP.getLangOpts().MSVCCompat)
00135     return false;
00136 
00137   // GCC removes the comma in the expansion of " ... , ## __VA_ARGS__ " if
00138   // __VA_ARGS__ is empty, but not in strict C99 mode where there are no
00139   // named arguments, where it remains.  In all other modes, including C99
00140   // with GNU extensions, it is removed regardless of named arguments.
00141   // Microsoft also appears to support this extension, unofficially.
00142   if (PP.getLangOpts().C99 && !PP.getLangOpts().GNUMode
00143         && Macro->getNumArgs() < 2)
00144     return false;
00145 
00146   // Is a comma available to be removed?
00147   if (ResultToks.empty() || !ResultToks.back().is(tok::comma))
00148     return false;
00149 
00150   // Issue an extension diagnostic for the paste operator.
00151   if (HasPasteOperator)
00152     PP.Diag(ResultToks.back().getLocation(), diag::ext_paste_comma);
00153 
00154   // Remove the comma.
00155   ResultToks.pop_back();
00156 
00157   // If the comma was right after another paste (e.g. "X##,##__VA_ARGS__"),
00158   // then removal of the comma should produce a placemarker token (in C99
00159   // terms) which we model by popping off the previous ##, giving us a plain
00160   // "X" when __VA_ARGS__ is empty.
00161   if (!ResultToks.empty() && ResultToks.back().is(tok::hashhash))
00162     ResultToks.pop_back();
00163 
00164   // Never add a space, even if the comma, ##, or arg had a space.
00165   NextTokGetsSpace = false;
00166   return true;
00167 }
00168 
00169 /// Expand the arguments of a function-like macro so that we can quickly
00170 /// return preexpanded tokens from Tokens.
00171 void TokenLexer::ExpandFunctionArguments() {
00172 
00173   SmallVector<Token, 128> ResultToks;
00174 
00175   // Loop through 'Tokens', expanding them into ResultToks.  Keep
00176   // track of whether we change anything.  If not, no need to keep them.  If so,
00177   // we install the newly expanded sequence as the new 'Tokens' list.
00178   bool MadeChange = false;
00179 
00180   for (unsigned i = 0, e = NumTokens; i != e; ++i) {
00181     // If we found the stringify operator, get the argument stringified.  The
00182     // preprocessor already verified that the following token is a macro name
00183     // when the #define was parsed.
00184     const Token &CurTok = Tokens[i];
00185     if (i != 0 && !Tokens[i-1].is(tok::hashhash) && CurTok.hasLeadingSpace())
00186       NextTokGetsSpace = true;
00187 
00188     if (CurTok.is(tok::hash) || CurTok.is(tok::hashat)) {
00189       int ArgNo = Macro->getArgumentNum(Tokens[i+1].getIdentifierInfo());
00190       assert(ArgNo != -1 && "Token following # is not an argument?");
00191 
00192       SourceLocation ExpansionLocStart =
00193           getExpansionLocForMacroDefLoc(CurTok.getLocation());
00194       SourceLocation ExpansionLocEnd =
00195           getExpansionLocForMacroDefLoc(Tokens[i+1].getLocation());
00196 
00197       Token Res;
00198       if (CurTok.is(tok::hash))  // Stringify
00199         Res = ActualArgs->getStringifiedArgument(ArgNo, PP,
00200                                                  ExpansionLocStart,
00201                                                  ExpansionLocEnd);
00202       else {
00203         // 'charify': don't bother caching these.
00204         Res = MacroArgs::StringifyArgument(ActualArgs->getUnexpArgument(ArgNo),
00205                                            PP, true,
00206                                            ExpansionLocStart,
00207                                            ExpansionLocEnd);
00208       }
00209 
00210       // The stringified/charified string leading space flag gets set to match
00211       // the #/#@ operator.
00212       if (NextTokGetsSpace)
00213         Res.setFlag(Token::LeadingSpace);
00214 
00215       ResultToks.push_back(Res);
00216       MadeChange = true;
00217       ++i;  // Skip arg name.
00218       NextTokGetsSpace = false;
00219       continue;
00220     }
00221 
00222     // Find out if there is a paste (##) operator before or after the token.
00223     bool NonEmptyPasteBefore =
00224       !ResultToks.empty() && ResultToks.back().is(tok::hashhash);
00225     bool PasteBefore = i != 0 && Tokens[i-1].is(tok::hashhash);
00226     bool PasteAfter = i+1 != e && Tokens[i+1].is(tok::hashhash);
00227     assert(!NonEmptyPasteBefore || PasteBefore);
00228 
00229     // Otherwise, if this is not an argument token, just add the token to the
00230     // output buffer.
00231     IdentifierInfo *II = CurTok.getIdentifierInfo();
00232     int ArgNo = II ? Macro->getArgumentNum(II) : -1;
00233     if (ArgNo == -1) {
00234       // This isn't an argument, just add it.
00235       ResultToks.push_back(CurTok);
00236 
00237       if (NextTokGetsSpace) {
00238         ResultToks.back().setFlag(Token::LeadingSpace);
00239         NextTokGetsSpace = false;
00240       } else if (PasteBefore && !NonEmptyPasteBefore)
00241         ResultToks.back().clearFlag(Token::LeadingSpace);
00242 
00243       continue;
00244     }
00245 
00246     // An argument is expanded somehow, the result is different than the
00247     // input.
00248     MadeChange = true;
00249 
00250     // Otherwise, this is a use of the argument.
00251 
00252     // In Microsoft mode, remove the comma before __VA_ARGS__ to ensure there
00253     // are no trailing commas if __VA_ARGS__ is empty.
00254     if (!PasteBefore && ActualArgs->isVarargsElidedUse() &&
00255         MaybeRemoveCommaBeforeVaArgs(ResultToks,
00256                                      /*HasPasteOperator=*/false,
00257                                      Macro, ArgNo, PP))
00258       continue;
00259 
00260     // If it is not the LHS/RHS of a ## operator, we must pre-expand the
00261     // argument and substitute the expanded tokens into the result.  This is
00262     // C99 6.10.3.1p1.
00263     if (!PasteBefore && !PasteAfter) {
00264       const Token *ResultArgToks;
00265 
00266       // Only preexpand the argument if it could possibly need it.  This
00267       // avoids some work in common cases.
00268       const Token *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
00269       if (ActualArgs->ArgNeedsPreexpansion(ArgTok, PP))
00270         ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, Macro, PP)[0];
00271       else
00272         ResultArgToks = ArgTok;  // Use non-preexpanded tokens.
00273 
00274       // If the arg token expanded into anything, append it.
00275       if (ResultArgToks->isNot(tok::eof)) {
00276         unsigned FirstResult = ResultToks.size();
00277         unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
00278         ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
00279 
00280         // In Microsoft-compatibility mode, we follow MSVC's preprocessing
00281         // behavior by not considering single commas from nested macro
00282         // expansions as argument separators. Set a flag on the token so we can
00283         // test for this later when the macro expansion is processed.
00284         if (PP.getLangOpts().MSVCCompat && NumToks == 1 &&
00285             ResultToks.back().is(tok::comma))
00286           ResultToks.back().setFlag(Token::IgnoredComma);
00287 
00288         // If the '##' came from expanding an argument, turn it into 'unknown'
00289         // to avoid pasting.
00290         for (unsigned i = FirstResult, e = ResultToks.size(); i != e; ++i) {
00291           Token &Tok = ResultToks[i];
00292           if (Tok.is(tok::hashhash))
00293             Tok.setKind(tok::unknown);
00294         }
00295 
00296         if(ExpandLocStart.isValid()) {
00297           updateLocForMacroArgTokens(CurTok.getLocation(),
00298                                      ResultToks.begin()+FirstResult,
00299                                      ResultToks.end());
00300         }
00301 
00302         // If any tokens were substituted from the argument, the whitespace
00303         // before the first token should match the whitespace of the arg
00304         // identifier.
00305         ResultToks[FirstResult].setFlagValue(Token::LeadingSpace,
00306                                              NextTokGetsSpace);
00307         NextTokGetsSpace = false;
00308       }
00309       continue;
00310     }
00311 
00312     // Okay, we have a token that is either the LHS or RHS of a paste (##)
00313     // argument.  It gets substituted as its non-pre-expanded tokens.
00314     const Token *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
00315     unsigned NumToks = MacroArgs::getArgLength(ArgToks);
00316     if (NumToks) {  // Not an empty argument?
00317       // If this is the GNU ", ## __VA_ARGS__" extension, and we just learned
00318       // that __VA_ARGS__ expands to multiple tokens, avoid a pasting error when
00319       // the expander trys to paste ',' with the first token of the __VA_ARGS__
00320       // expansion.
00321       if (NonEmptyPasteBefore && ResultToks.size() >= 2 &&
00322           ResultToks[ResultToks.size()-2].is(tok::comma) &&
00323           (unsigned)ArgNo == Macro->getNumArgs()-1 &&
00324           Macro->isVariadic()) {
00325         // Remove the paste operator, report use of the extension.
00326         PP.Diag(ResultToks.pop_back_val().getLocation(), diag::ext_paste_comma);
00327       }
00328 
00329       ResultToks.append(ArgToks, ArgToks+NumToks);
00330 
00331       // If the '##' came from expanding an argument, turn it into 'unknown'
00332       // to avoid pasting.
00333       for (unsigned i = ResultToks.size() - NumToks, e = ResultToks.size();
00334              i != e; ++i) {
00335         Token &Tok = ResultToks[i];
00336         if (Tok.is(tok::hashhash))
00337           Tok.setKind(tok::unknown);
00338       }
00339 
00340       if (ExpandLocStart.isValid()) {
00341         updateLocForMacroArgTokens(CurTok.getLocation(),
00342                                    ResultToks.end()-NumToks, ResultToks.end());
00343       }
00344 
00345       // If this token (the macro argument) was supposed to get leading
00346       // whitespace, transfer this information onto the first token of the
00347       // expansion.
00348       //
00349       // Do not do this if the paste operator occurs before the macro argument,
00350       // as in "A ## MACROARG".  In valid code, the first token will get
00351       // smooshed onto the preceding one anyway (forming AMACROARG).  In
00352       // assembler-with-cpp mode, invalid pastes are allowed through: in this
00353       // case, we do not want the extra whitespace to be added.  For example,
00354       // we want ". ## foo" -> ".foo" not ". foo".
00355       if (NextTokGetsSpace)
00356         ResultToks[ResultToks.size()-NumToks].setFlag(Token::LeadingSpace);
00357 
00358       NextTokGetsSpace = false;
00359       continue;
00360     }
00361 
00362     // If an empty argument is on the LHS or RHS of a paste, the standard (C99
00363     // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur.  We
00364     // implement this by eating ## operators when a LHS or RHS expands to
00365     // empty.
00366     if (PasteAfter) {
00367       // Discard the argument token and skip (don't copy to the expansion
00368       // buffer) the paste operator after it.
00369       ++i;
00370       continue;
00371     }
00372 
00373     // If this is on the RHS of a paste operator, we've already copied the
00374     // paste operator to the ResultToks list, unless the LHS was empty too.
00375     // Remove it.
00376     assert(PasteBefore);
00377     if (NonEmptyPasteBefore) {
00378       assert(ResultToks.back().is(tok::hashhash));
00379       ResultToks.pop_back();
00380     }
00381 
00382     // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
00383     // and if the macro had at least one real argument, and if the token before
00384     // the ## was a comma, remove the comma.  This is a GCC extension which is
00385     // disabled when using -std=c99.
00386     if (ActualArgs->isVarargsElidedUse())
00387       MaybeRemoveCommaBeforeVaArgs(ResultToks,
00388                                    /*HasPasteOperator=*/true,
00389                                    Macro, ArgNo, PP);
00390 
00391     continue;
00392   }
00393 
00394   // If anything changed, install this as the new Tokens list.
00395   if (MadeChange) {
00396     assert(!OwnsTokens && "This would leak if we already own the token list");
00397     // This is deleted in the dtor.
00398     NumTokens = ResultToks.size();
00399     // The tokens will be added to Preprocessor's cache and will be removed
00400     // when this TokenLexer finishes lexing them.
00401     Tokens = PP.cacheMacroExpandedTokens(this, ResultToks);
00402 
00403     // The preprocessor cache of macro expanded tokens owns these tokens,not us.
00404     OwnsTokens = false;
00405   }
00406 }
00407 
00408 /// Lex - Lex and return a token from this macro stream.
00409 ///
00410 bool TokenLexer::Lex(Token &Tok) {
00411   // Lexing off the end of the macro, pop this macro off the expansion stack.
00412   if (isAtEnd()) {
00413     // If this is a macro (not a token stream), mark the macro enabled now
00414     // that it is no longer being expanded.
00415     if (Macro) Macro->EnableMacro();
00416 
00417     Tok.startToken();
00418     Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
00419     Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace || NextTokGetsSpace);
00420     if (CurToken == 0)
00421       Tok.setFlag(Token::LeadingEmptyMacro);
00422     return PP.HandleEndOfTokenLexer(Tok);
00423   }
00424 
00425   SourceManager &SM = PP.getSourceManager();
00426 
00427   // If this is the first token of the expanded result, we inherit spacing
00428   // properties later.
00429   bool isFirstToken = CurToken == 0;
00430 
00431   // Get the next token to return.
00432   Tok = Tokens[CurToken++];
00433 
00434   bool TokenIsFromPaste = false;
00435 
00436   // If this token is followed by a token paste (##) operator, paste the tokens!
00437   // Note that ## is a normal token when not expanding a macro.
00438   if (!isAtEnd() && Tokens[CurToken].is(tok::hashhash) && Macro) {
00439     // When handling the microsoft /##/ extension, the final token is
00440     // returned by PasteTokens, not the pasted token.
00441     if (PasteTokens(Tok))
00442       return true;
00443 
00444     TokenIsFromPaste = true;
00445   }
00446 
00447   // The token's current location indicate where the token was lexed from.  We
00448   // need this information to compute the spelling of the token, but any
00449   // diagnostics for the expanded token should appear as if they came from
00450   // ExpansionLoc.  Pull this information together into a new SourceLocation
00451   // that captures all of this.
00452   if (ExpandLocStart.isValid() &&   // Don't do this for token streams.
00453       // Check that the token's location was not already set properly.
00454       SM.isBeforeInSLocAddrSpace(Tok.getLocation(), MacroStartSLocOffset)) {
00455     SourceLocation instLoc;
00456     if (Tok.is(tok::comment)) {
00457       instLoc = SM.createExpansionLoc(Tok.getLocation(),
00458                                       ExpandLocStart,
00459                                       ExpandLocEnd,
00460                                       Tok.getLength());
00461     } else {
00462       instLoc = getExpansionLocForMacroDefLoc(Tok.getLocation());
00463     }
00464 
00465     Tok.setLocation(instLoc);
00466   }
00467 
00468   // If this is the first token, set the lexical properties of the token to
00469   // match the lexical properties of the macro identifier.
00470   if (isFirstToken) {
00471     Tok.setFlagValue(Token::StartOfLine , AtStartOfLine);
00472     Tok.setFlagValue(Token::LeadingSpace, HasLeadingSpace);
00473   } else {
00474     // If this is not the first token, we may still need to pass through
00475     // leading whitespace if we've expanded a macro.
00476     if (AtStartOfLine) Tok.setFlag(Token::StartOfLine);
00477     if (HasLeadingSpace) Tok.setFlag(Token::LeadingSpace);
00478   }
00479   AtStartOfLine = false;
00480   HasLeadingSpace = false;
00481 
00482   // Handle recursive expansion!
00483   if (!Tok.isAnnotation() && Tok.getIdentifierInfo() != nullptr) {
00484     // Change the kind of this identifier to the appropriate token kind, e.g.
00485     // turning "for" into a keyword.
00486     IdentifierInfo *II = Tok.getIdentifierInfo();
00487     Tok.setKind(II->getTokenID());
00488 
00489     // If this identifier was poisoned and from a paste, emit an error.  This
00490     // won't be handled by Preprocessor::HandleIdentifier because this is coming
00491     // from a macro expansion.
00492     if (II->isPoisoned() && TokenIsFromPaste) {
00493       PP.HandlePoisonedIdentifier(Tok);
00494     }
00495 
00496     if (!DisableMacroExpansion && II->isHandleIdentifierCase())
00497       return PP.HandleIdentifier(Tok);
00498   }
00499 
00500   // Otherwise, return a normal token.
00501   return true;
00502 }
00503 
00504 /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
00505 /// operator.  Read the ## and RHS, and paste the LHS/RHS together.  If there
00506 /// are more ## after it, chomp them iteratively.  Return the result as Tok.
00507 /// If this returns true, the caller should immediately return the token.
00508 bool TokenLexer::PasteTokens(Token &Tok) {
00509   SmallString<128> Buffer;
00510   const char *ResultTokStrPtr = nullptr;
00511   SourceLocation StartLoc = Tok.getLocation();
00512   SourceLocation PasteOpLoc;
00513   do {
00514     // Consume the ## operator.
00515     PasteOpLoc = Tokens[CurToken].getLocation();
00516     ++CurToken;
00517     assert(!isAtEnd() && "No token on the RHS of a paste operator!");
00518 
00519     // Get the RHS token.
00520     const Token &RHS = Tokens[CurToken];
00521 
00522     // Allocate space for the result token.  This is guaranteed to be enough for
00523     // the two tokens.
00524     Buffer.resize(Tok.getLength() + RHS.getLength());
00525 
00526     // Get the spelling of the LHS token in Buffer.
00527     const char *BufPtr = &Buffer[0];
00528     bool Invalid = false;
00529     unsigned LHSLen = PP.getSpelling(Tok, BufPtr, &Invalid);
00530     if (BufPtr != &Buffer[0])   // Really, we want the chars in Buffer!
00531       memcpy(&Buffer[0], BufPtr, LHSLen);
00532     if (Invalid)
00533       return true;
00534 
00535     BufPtr = Buffer.data() + LHSLen;
00536     unsigned RHSLen = PP.getSpelling(RHS, BufPtr, &Invalid);
00537     if (Invalid)
00538       return true;
00539     if (RHSLen && BufPtr != &Buffer[LHSLen])
00540       // Really, we want the chars in Buffer!
00541       memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
00542 
00543     // Trim excess space.
00544     Buffer.resize(LHSLen+RHSLen);
00545 
00546     // Plop the pasted result (including the trailing newline and null) into a
00547     // scratch buffer where we can lex it.
00548     Token ResultTokTmp;
00549     ResultTokTmp.startToken();
00550 
00551     // Claim that the tmp token is a string_literal so that we can get the
00552     // character pointer back from CreateString in getLiteralData().
00553     ResultTokTmp.setKind(tok::string_literal);
00554     PP.CreateString(Buffer, ResultTokTmp);
00555     SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
00556     ResultTokStrPtr = ResultTokTmp.getLiteralData();
00557 
00558     // Lex the resultant pasted token into Result.
00559     Token Result;
00560 
00561     if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
00562       // Common paste case: identifier+identifier = identifier.  Avoid creating
00563       // a lexer and other overhead.
00564       PP.IncrementPasteCounter(true);
00565       Result.startToken();
00566       Result.setKind(tok::raw_identifier);
00567       Result.setRawIdentifierData(ResultTokStrPtr);
00568       Result.setLocation(ResultTokLoc);
00569       Result.setLength(LHSLen+RHSLen);
00570     } else {
00571       PP.IncrementPasteCounter(false);
00572 
00573       assert(ResultTokLoc.isFileID() &&
00574              "Should be a raw location into scratch buffer");
00575       SourceManager &SourceMgr = PP.getSourceManager();
00576       FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
00577 
00578       bool Invalid = false;
00579       const char *ScratchBufStart
00580         = SourceMgr.getBufferData(LocFileID, &Invalid).data();
00581       if (Invalid)
00582         return false;
00583 
00584       // Make a lexer to lex this string from.  Lex just this one token.
00585       // Make a lexer object so that we lex and expand the paste result.
00586       Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
00587                PP.getLangOpts(), ScratchBufStart,
00588                ResultTokStrPtr, ResultTokStrPtr+LHSLen+RHSLen);
00589 
00590       // Lex a token in raw mode.  This way it won't look up identifiers
00591       // automatically, lexing off the end will return an eof token, and
00592       // warnings are disabled.  This returns true if the result token is the
00593       // entire buffer.
00594       bool isInvalid = !TL.LexFromRawLexer(Result);
00595 
00596       // If we got an EOF token, we didn't form even ONE token.  For example, we
00597       // did "/ ## /" to get "//".
00598       isInvalid |= Result.is(tok::eof);
00599 
00600       // If pasting the two tokens didn't form a full new token, this is an
00601       // error.  This occurs with "x ## +"  and other stuff.  Return with Tok
00602       // unmodified and with RHS as the next token to lex.
00603       if (isInvalid) {
00604         // Test for the Microsoft extension of /##/ turning into // here on the
00605         // error path.
00606         if (PP.getLangOpts().MicrosoftExt && Tok.is(tok::slash) &&
00607             RHS.is(tok::slash)) {
00608           HandleMicrosoftCommentPaste(Tok);
00609           return true;
00610         }
00611 
00612         // Do not emit the error when preprocessing assembler code.
00613         if (!PP.getLangOpts().AsmPreprocessor) {
00614           // Explicitly convert the token location to have proper expansion
00615           // information so that the user knows where it came from.
00616           SourceManager &SM = PP.getSourceManager();
00617           SourceLocation Loc =
00618             SM.createExpansionLoc(PasteOpLoc, ExpandLocStart, ExpandLocEnd, 2);
00619           // If we're in microsoft extensions mode, downgrade this from a hard
00620           // error to an extension that defaults to an error.  This allows
00621           // disabling it.
00622           PP.Diag(Loc, PP.getLangOpts().MicrosoftExt ? diag::ext_pp_bad_paste_ms
00623                                                      : diag::err_pp_bad_paste)
00624               << Buffer.str();
00625         }
00626 
00627         // An error has occurred so exit loop.
00628         break;
00629       }
00630 
00631       // Turn ## into 'unknown' to avoid # ## # from looking like a paste
00632       // operator.
00633       if (Result.is(tok::hashhash))
00634         Result.setKind(tok::unknown);
00635     }
00636 
00637     // Transfer properties of the LHS over the Result.
00638     Result.setFlagValue(Token::StartOfLine , Tok.isAtStartOfLine());
00639     Result.setFlagValue(Token::LeadingSpace, Tok.hasLeadingSpace());
00640     
00641     // Finally, replace LHS with the result, consume the RHS, and iterate.
00642     ++CurToken;
00643     Tok = Result;
00644   } while (!isAtEnd() && Tokens[CurToken].is(tok::hashhash));
00645 
00646   SourceLocation EndLoc = Tokens[CurToken - 1].getLocation();
00647 
00648   // The token's current location indicate where the token was lexed from.  We
00649   // need this information to compute the spelling of the token, but any
00650   // diagnostics for the expanded token should appear as if the token was
00651   // expanded from the full ## expression. Pull this information together into
00652   // a new SourceLocation that captures all of this.
00653   SourceManager &SM = PP.getSourceManager();
00654   if (StartLoc.isFileID())
00655     StartLoc = getExpansionLocForMacroDefLoc(StartLoc);
00656   if (EndLoc.isFileID())
00657     EndLoc = getExpansionLocForMacroDefLoc(EndLoc);
00658   FileID MacroFID = SM.getFileID(MacroExpansionStart);
00659   while (SM.getFileID(StartLoc) != MacroFID)
00660     StartLoc = SM.getImmediateExpansionRange(StartLoc).first;
00661   while (SM.getFileID(EndLoc) != MacroFID)
00662     EndLoc = SM.getImmediateExpansionRange(EndLoc).second;
00663     
00664   Tok.setLocation(SM.createExpansionLoc(Tok.getLocation(), StartLoc, EndLoc,
00665                                         Tok.getLength()));
00666 
00667   // Now that we got the result token, it will be subject to expansion.  Since
00668   // token pasting re-lexes the result token in raw mode, identifier information
00669   // isn't looked up.  As such, if the result is an identifier, look up id info.
00670   if (Tok.is(tok::raw_identifier)) {
00671     // Look up the identifier info for the token.  We disabled identifier lookup
00672     // by saying we're skipping contents, so we need to do this manually.
00673     PP.LookUpIdentifierInfo(Tok);
00674   }
00675   return false;
00676 }
00677 
00678 /// isNextTokenLParen - If the next token lexed will pop this macro off the
00679 /// expansion stack, return 2.  If the next unexpanded token is a '(', return
00680 /// 1, otherwise return 0.
00681 unsigned TokenLexer::isNextTokenLParen() const {
00682   // Out of tokens?
00683   if (isAtEnd())
00684     return 2;
00685   return Tokens[CurToken].is(tok::l_paren);
00686 }
00687 
00688 /// isParsingPreprocessorDirective - Return true if we are in the middle of a
00689 /// preprocessor directive.
00690 bool TokenLexer::isParsingPreprocessorDirective() const {
00691   return Tokens[NumTokens-1].is(tok::eod) && !isAtEnd();
00692 }
00693 
00694 /// HandleMicrosoftCommentPaste - In microsoft compatibility mode, /##/ pastes
00695 /// together to form a comment that comments out everything in the current
00696 /// macro, other active macros, and anything left on the current physical
00697 /// source line of the expanded buffer.  Handle this by returning the
00698 /// first token on the next line.
00699 void TokenLexer::HandleMicrosoftCommentPaste(Token &Tok) {
00700   // We 'comment out' the rest of this macro by just ignoring the rest of the
00701   // tokens that have not been lexed yet, if any.
00702 
00703   // Since this must be a macro, mark the macro enabled now that it is no longer
00704   // being expanded.
00705   assert(Macro && "Token streams can't paste comments");
00706   Macro->EnableMacro();
00707 
00708   PP.HandleMicrosoftCommentPaste(Tok);
00709 }
00710 
00711 /// \brief If \arg loc is a file ID and points inside the current macro
00712 /// definition, returns the appropriate source location pointing at the
00713 /// macro expansion source location entry, otherwise it returns an invalid
00714 /// SourceLocation.
00715 SourceLocation
00716 TokenLexer::getExpansionLocForMacroDefLoc(SourceLocation loc) const {
00717   assert(ExpandLocStart.isValid() && MacroExpansionStart.isValid() &&
00718          "Not appropriate for token streams");
00719   assert(loc.isValid() && loc.isFileID());
00720   
00721   SourceManager &SM = PP.getSourceManager();
00722   assert(SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength) &&
00723          "Expected loc to come from the macro definition");
00724 
00725   unsigned relativeOffset = 0;
00726   SM.isInSLocAddrSpace(loc, MacroDefStart, MacroDefLength, &relativeOffset);
00727   return MacroExpansionStart.getLocWithOffset(relativeOffset);
00728 }
00729 
00730 /// \brief Finds the tokens that are consecutive (from the same FileID)
00731 /// creates a single SLocEntry, and assigns SourceLocations to each token that
00732 /// point to that SLocEntry. e.g for
00733 ///   assert(foo == bar);
00734 /// There will be a single SLocEntry for the "foo == bar" chunk and locations
00735 /// for the 'foo', '==', 'bar' tokens will point inside that chunk.
00736 ///
00737 /// \arg begin_tokens will be updated to a position past all the found
00738 /// consecutive tokens.
00739 static void updateConsecutiveMacroArgTokens(SourceManager &SM,
00740                                             SourceLocation InstLoc,
00741                                             Token *&begin_tokens,
00742                                             Token * end_tokens) {
00743   assert(begin_tokens < end_tokens);
00744 
00745   SourceLocation FirstLoc = begin_tokens->getLocation();
00746   SourceLocation CurLoc = FirstLoc;
00747 
00748   // Compare the source location offset of tokens and group together tokens that
00749   // are close, even if their locations point to different FileIDs. e.g.
00750   //
00751   //  |bar    |  foo | cake   |  (3 tokens from 3 consecutive FileIDs)
00752   //  ^                    ^
00753   //  |bar       foo   cake|     (one SLocEntry chunk for all tokens)
00754   //
00755   // we can perform this "merge" since the token's spelling location depends
00756   // on the relative offset.
00757 
00758   Token *NextTok = begin_tokens + 1;
00759   for (; NextTok < end_tokens; ++NextTok) {
00760     SourceLocation NextLoc = NextTok->getLocation();
00761     if (CurLoc.isFileID() != NextLoc.isFileID())
00762       break; // Token from different kind of FileID.
00763 
00764     int RelOffs;
00765     if (!SM.isInSameSLocAddrSpace(CurLoc, NextLoc, &RelOffs))
00766       break; // Token from different local/loaded location.
00767     // Check that token is not before the previous token or more than 50
00768     // "characters" away.
00769     if (RelOffs < 0 || RelOffs > 50)
00770       break;
00771     CurLoc = NextLoc;
00772   }
00773 
00774   // For the consecutive tokens, find the length of the SLocEntry to contain
00775   // all of them.
00776   Token &LastConsecutiveTok = *(NextTok-1);
00777   int LastRelOffs = 0;
00778   SM.isInSameSLocAddrSpace(FirstLoc, LastConsecutiveTok.getLocation(),
00779                            &LastRelOffs);
00780   unsigned FullLength = LastRelOffs + LastConsecutiveTok.getLength();
00781 
00782   // Create a macro expansion SLocEntry that will "contain" all of the tokens.
00783   SourceLocation Expansion =
00784       SM.createMacroArgExpansionLoc(FirstLoc, InstLoc,FullLength);
00785 
00786   // Change the location of the tokens from the spelling location to the new
00787   // expanded location.
00788   for (; begin_tokens < NextTok; ++begin_tokens) {
00789     Token &Tok = *begin_tokens;
00790     int RelOffs = 0;
00791     SM.isInSameSLocAddrSpace(FirstLoc, Tok.getLocation(), &RelOffs);
00792     Tok.setLocation(Expansion.getLocWithOffset(RelOffs));
00793   }
00794 }
00795 
00796 /// \brief Creates SLocEntries and updates the locations of macro argument
00797 /// tokens to their new expanded locations.
00798 ///
00799 /// \param ArgIdDefLoc the location of the macro argument id inside the macro
00800 /// definition.
00801 /// \param Tokens the macro argument tokens to update.
00802 void TokenLexer::updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
00803                                             Token *begin_tokens,
00804                                             Token *end_tokens) {
00805   SourceManager &SM = PP.getSourceManager();
00806 
00807   SourceLocation InstLoc =
00808       getExpansionLocForMacroDefLoc(ArgIdSpellLoc);
00809   
00810   while (begin_tokens < end_tokens) {
00811     // If there's only one token just create a SLocEntry for it.
00812     if (end_tokens - begin_tokens == 1) {
00813       Token &Tok = *begin_tokens;
00814       Tok.setLocation(SM.createMacroArgExpansionLoc(Tok.getLocation(),
00815                                                     InstLoc,
00816                                                     Tok.getLength()));
00817       return;
00818     }
00819 
00820     updateConsecutiveMacroArgTokens(SM, InstLoc, begin_tokens, end_tokens);
00821   }
00822 }
00823 
00824 void TokenLexer::PropagateLineStartLeadingSpaceInfo(Token &Result) {
00825   AtStartOfLine = Result.isAtStartOfLine();
00826   HasLeadingSpace = Result.hasLeadingSpace();
00827 }