clang API Documentation
00001 //===--- Preprocess.cpp - C Language Family Preprocessor Implementation ---===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file implements the Preprocessor interface. 00011 // 00012 //===----------------------------------------------------------------------===// 00013 // 00014 // Options to support: 00015 // -H - Print the name of each header file used. 00016 // -d[DNI] - Dump various things. 00017 // -fworking-directory - #line's with preprocessor's working dir. 00018 // -fpreprocessed 00019 // -dependency-file,-M,-MM,-MF,-MG,-MP,-MT,-MQ,-MD,-MMD 00020 // -W* 00021 // -w 00022 // 00023 // Messages to emit: 00024 // "Multiple include guards may be useful for:\n" 00025 // 00026 //===----------------------------------------------------------------------===// 00027 00028 #include "clang/Lex/Preprocessor.h" 00029 #include "clang/Basic/FileManager.h" 00030 #include "clang/Basic/FileSystemStatCache.h" 00031 #include "clang/Basic/SourceManager.h" 00032 #include "clang/Basic/TargetInfo.h" 00033 #include "clang/Lex/CodeCompletionHandler.h" 00034 #include "clang/Lex/ExternalPreprocessorSource.h" 00035 #include "clang/Lex/HeaderSearch.h" 00036 #include "clang/Lex/LexDiagnostic.h" 00037 #include "clang/Lex/LiteralSupport.h" 00038 #include "clang/Lex/MacroArgs.h" 00039 #include "clang/Lex/MacroInfo.h" 00040 #include "clang/Lex/ModuleLoader.h" 00041 #include "clang/Lex/Pragma.h" 00042 #include "clang/Lex/PreprocessingRecord.h" 00043 #include "clang/Lex/PreprocessorOptions.h" 00044 #include "clang/Lex/ScratchBuffer.h" 00045 #include "llvm/ADT/APFloat.h" 00046 #include "llvm/ADT/STLExtras.h" 00047 #include "llvm/ADT/SmallString.h" 00048 #include "llvm/ADT/StringExtras.h" 00049 #include "llvm/Support/Capacity.h" 00050 #include "llvm/Support/ConvertUTF.h" 00051 #include "llvm/Support/MemoryBuffer.h" 00052 #include "llvm/Support/raw_ostream.h" 00053 using namespace clang; 00054 00055 //===----------------------------------------------------------------------===// 00056 ExternalPreprocessorSource::~ExternalPreprocessorSource() { } 00057 00058 Preprocessor::Preprocessor(IntrusiveRefCntPtr<PreprocessorOptions> PPOpts, 00059 DiagnosticsEngine &diags, LangOptions &opts, 00060 SourceManager &SM, HeaderSearch &Headers, 00061 ModuleLoader &TheModuleLoader, 00062 IdentifierInfoLookup *IILookup, bool OwnsHeaders, 00063 TranslationUnitKind TUKind) 00064 : PPOpts(PPOpts), Diags(&diags), LangOpts(opts), Target(nullptr), 00065 FileMgr(Headers.getFileMgr()), SourceMgr(SM), 00066 ScratchBuf(new ScratchBuffer(SourceMgr)),HeaderInfo(Headers), 00067 TheModuleLoader(TheModuleLoader), ExternalSource(nullptr), 00068 Identifiers(opts, IILookup), 00069 PragmaHandlers(new PragmaNamespace(StringRef())), 00070 IncrementalProcessing(false), TUKind(TUKind), 00071 CodeComplete(nullptr), CodeCompletionFile(nullptr), 00072 CodeCompletionOffset(0), LastTokenWasAt(false), 00073 ModuleImportExpectsIdentifier(false), CodeCompletionReached(0), 00074 SkipMainFilePreamble(0, true), CurPPLexer(nullptr), 00075 CurDirLookup(nullptr), CurLexerKind(CLK_Lexer), CurSubmodule(nullptr), 00076 Callbacks(nullptr), MacroArgCache(nullptr), Record(nullptr), 00077 MIChainHead(nullptr), DeserialMIChainHead(nullptr) { 00078 OwnsHeaderSearch = OwnsHeaders; 00079 00080 CounterValue = 0; // __COUNTER__ starts at 0. 00081 00082 // Clear stats. 00083 NumDirectives = NumDefined = NumUndefined = NumPragma = 0; 00084 NumIf = NumElse = NumEndif = 0; 00085 NumEnteredSourceFiles = 0; 00086 NumMacroExpanded = NumFnMacroExpanded = NumBuiltinMacroExpanded = 0; 00087 NumFastMacroExpanded = NumTokenPaste = NumFastTokenPaste = 0; 00088 MaxIncludeStackDepth = 0; 00089 NumSkipped = 0; 00090 00091 // Default to discarding comments. 00092 KeepComments = false; 00093 KeepMacroComments = false; 00094 SuppressIncludeNotFoundError = false; 00095 00096 // Macro expansion is enabled. 00097 DisableMacroExpansion = false; 00098 MacroExpansionInDirectivesOverride = false; 00099 InMacroArgs = false; 00100 InMacroArgPreExpansion = false; 00101 NumCachedTokenLexers = 0; 00102 PragmasEnabled = true; 00103 ParsingIfOrElifDirective = false; 00104 PreprocessedOutput = false; 00105 00106 CachedLexPos = 0; 00107 00108 // We haven't read anything from the external source. 00109 ReadMacrosFromExternalSource = false; 00110 00111 // "Poison" __VA_ARGS__, which can only appear in the expansion of a macro. 00112 // This gets unpoisoned where it is allowed. 00113 (Ident__VA_ARGS__ = getIdentifierInfo("__VA_ARGS__"))->setIsPoisoned(); 00114 SetPoisonReason(Ident__VA_ARGS__,diag::ext_pp_bad_vaargs_use); 00115 00116 // Initialize the pragma handlers. 00117 RegisterBuiltinPragmas(); 00118 00119 // Initialize builtin macros like __LINE__ and friends. 00120 RegisterBuiltinMacros(); 00121 00122 if(LangOpts.Borland) { 00123 Ident__exception_info = getIdentifierInfo("_exception_info"); 00124 Ident___exception_info = getIdentifierInfo("__exception_info"); 00125 Ident_GetExceptionInfo = getIdentifierInfo("GetExceptionInformation"); 00126 Ident__exception_code = getIdentifierInfo("_exception_code"); 00127 Ident___exception_code = getIdentifierInfo("__exception_code"); 00128 Ident_GetExceptionCode = getIdentifierInfo("GetExceptionCode"); 00129 Ident__abnormal_termination = getIdentifierInfo("_abnormal_termination"); 00130 Ident___abnormal_termination = getIdentifierInfo("__abnormal_termination"); 00131 Ident_AbnormalTermination = getIdentifierInfo("AbnormalTermination"); 00132 } else { 00133 Ident__exception_info = Ident__exception_code = nullptr; 00134 Ident__abnormal_termination = Ident___exception_info = nullptr; 00135 Ident___exception_code = Ident___abnormal_termination = nullptr; 00136 Ident_GetExceptionInfo = Ident_GetExceptionCode = nullptr; 00137 Ident_AbnormalTermination = nullptr; 00138 } 00139 } 00140 00141 Preprocessor::~Preprocessor() { 00142 assert(BacktrackPositions.empty() && "EnableBacktrack/Backtrack imbalance!"); 00143 00144 IncludeMacroStack.clear(); 00145 00146 // Destroy any macro definitions. 00147 while (MacroInfoChain *I = MIChainHead) { 00148 MIChainHead = I->Next; 00149 I->~MacroInfoChain(); 00150 } 00151 00152 // Free any cached macro expanders. 00153 // This populates MacroArgCache, so all TokenLexers need to be destroyed 00154 // before the code below that frees up the MacroArgCache list. 00155 std::fill(TokenLexerCache, TokenLexerCache + NumCachedTokenLexers, nullptr); 00156 CurTokenLexer.reset(); 00157 00158 while (DeserializedMacroInfoChain *I = DeserialMIChainHead) { 00159 DeserialMIChainHead = I->Next; 00160 I->~DeserializedMacroInfoChain(); 00161 } 00162 00163 // Free any cached MacroArgs. 00164 for (MacroArgs *ArgList = MacroArgCache; ArgList;) 00165 ArgList = ArgList->deallocate(); 00166 00167 // Delete the header search info, if we own it. 00168 if (OwnsHeaderSearch) 00169 delete &HeaderInfo; 00170 } 00171 00172 void Preprocessor::Initialize(const TargetInfo &Target) { 00173 assert((!this->Target || this->Target == &Target) && 00174 "Invalid override of target information"); 00175 this->Target = &Target; 00176 00177 // Initialize information about built-ins. 00178 BuiltinInfo.InitializeTarget(Target); 00179 HeaderInfo.setTarget(Target); 00180 } 00181 00182 void Preprocessor::InitializeForModelFile() { 00183 NumEnteredSourceFiles = 0; 00184 00185 // Reset pragmas 00186 PragmaHandlersBackup = std::move(PragmaHandlers); 00187 PragmaHandlers = llvm::make_unique<PragmaNamespace>(StringRef()); 00188 RegisterBuiltinPragmas(); 00189 00190 // Reset PredefinesFileID 00191 PredefinesFileID = FileID(); 00192 } 00193 00194 void Preprocessor::FinalizeForModelFile() { 00195 NumEnteredSourceFiles = 1; 00196 00197 PragmaHandlers = std::move(PragmaHandlersBackup); 00198 } 00199 00200 void Preprocessor::setPTHManager(PTHManager* pm) { 00201 PTH.reset(pm); 00202 FileMgr.addStatCache(PTH->createStatCache()); 00203 } 00204 00205 void Preprocessor::DumpToken(const Token &Tok, bool DumpFlags) const { 00206 llvm::errs() << tok::getTokenName(Tok.getKind()) << " '" 00207 << getSpelling(Tok) << "'"; 00208 00209 if (!DumpFlags) return; 00210 00211 llvm::errs() << "\t"; 00212 if (Tok.isAtStartOfLine()) 00213 llvm::errs() << " [StartOfLine]"; 00214 if (Tok.hasLeadingSpace()) 00215 llvm::errs() << " [LeadingSpace]"; 00216 if (Tok.isExpandDisabled()) 00217 llvm::errs() << " [ExpandDisabled]"; 00218 if (Tok.needsCleaning()) { 00219 const char *Start = SourceMgr.getCharacterData(Tok.getLocation()); 00220 llvm::errs() << " [UnClean='" << StringRef(Start, Tok.getLength()) 00221 << "']"; 00222 } 00223 00224 llvm::errs() << "\tLoc=<"; 00225 DumpLocation(Tok.getLocation()); 00226 llvm::errs() << ">"; 00227 } 00228 00229 void Preprocessor::DumpLocation(SourceLocation Loc) const { 00230 Loc.dump(SourceMgr); 00231 } 00232 00233 void Preprocessor::DumpMacro(const MacroInfo &MI) const { 00234 llvm::errs() << "MACRO: "; 00235 for (unsigned i = 0, e = MI.getNumTokens(); i != e; ++i) { 00236 DumpToken(MI.getReplacementToken(i)); 00237 llvm::errs() << " "; 00238 } 00239 llvm::errs() << "\n"; 00240 } 00241 00242 void Preprocessor::PrintStats() { 00243 llvm::errs() << "\n*** Preprocessor Stats:\n"; 00244 llvm::errs() << NumDirectives << " directives found:\n"; 00245 llvm::errs() << " " << NumDefined << " #define.\n"; 00246 llvm::errs() << " " << NumUndefined << " #undef.\n"; 00247 llvm::errs() << " #include/#include_next/#import:\n"; 00248 llvm::errs() << " " << NumEnteredSourceFiles << " source files entered.\n"; 00249 llvm::errs() << " " << MaxIncludeStackDepth << " max include stack depth\n"; 00250 llvm::errs() << " " << NumIf << " #if/#ifndef/#ifdef.\n"; 00251 llvm::errs() << " " << NumElse << " #else/#elif.\n"; 00252 llvm::errs() << " " << NumEndif << " #endif.\n"; 00253 llvm::errs() << " " << NumPragma << " #pragma.\n"; 00254 llvm::errs() << NumSkipped << " #if/#ifndef#ifdef regions skipped\n"; 00255 00256 llvm::errs() << NumMacroExpanded << "/" << NumFnMacroExpanded << "/" 00257 << NumBuiltinMacroExpanded << " obj/fn/builtin macros expanded, " 00258 << NumFastMacroExpanded << " on the fast path.\n"; 00259 llvm::errs() << (NumFastTokenPaste+NumTokenPaste) 00260 << " token paste (##) operations performed, " 00261 << NumFastTokenPaste << " on the fast path.\n"; 00262 00263 llvm::errs() << "\nPreprocessor Memory: " << getTotalMemory() << "B total"; 00264 00265 llvm::errs() << "\n BumpPtr: " << BP.getTotalMemory(); 00266 llvm::errs() << "\n Macro Expanded Tokens: " 00267 << llvm::capacity_in_bytes(MacroExpandedTokens); 00268 llvm::errs() << "\n Predefines Buffer: " << Predefines.capacity(); 00269 llvm::errs() << "\n Macros: " << llvm::capacity_in_bytes(Macros); 00270 llvm::errs() << "\n #pragma push_macro Info: " 00271 << llvm::capacity_in_bytes(PragmaPushMacroInfo); 00272 llvm::errs() << "\n Poison Reasons: " 00273 << llvm::capacity_in_bytes(PoisonReasons); 00274 llvm::errs() << "\n Comment Handlers: " 00275 << llvm::capacity_in_bytes(CommentHandlers) << "\n"; 00276 } 00277 00278 Preprocessor::macro_iterator 00279 Preprocessor::macro_begin(bool IncludeExternalMacros) const { 00280 if (IncludeExternalMacros && ExternalSource && 00281 !ReadMacrosFromExternalSource) { 00282 ReadMacrosFromExternalSource = true; 00283 ExternalSource->ReadDefinedMacros(); 00284 } 00285 00286 return Macros.begin(); 00287 } 00288 00289 size_t Preprocessor::getTotalMemory() const { 00290 return BP.getTotalMemory() 00291 + llvm::capacity_in_bytes(MacroExpandedTokens) 00292 + Predefines.capacity() /* Predefines buffer. */ 00293 + llvm::capacity_in_bytes(Macros) 00294 + llvm::capacity_in_bytes(PragmaPushMacroInfo) 00295 + llvm::capacity_in_bytes(PoisonReasons) 00296 + llvm::capacity_in_bytes(CommentHandlers); 00297 } 00298 00299 Preprocessor::macro_iterator 00300 Preprocessor::macro_end(bool IncludeExternalMacros) const { 00301 if (IncludeExternalMacros && ExternalSource && 00302 !ReadMacrosFromExternalSource) { 00303 ReadMacrosFromExternalSource = true; 00304 ExternalSource->ReadDefinedMacros(); 00305 } 00306 00307 return Macros.end(); 00308 } 00309 00310 /// \brief Compares macro tokens with a specified token value sequence. 00311 static bool MacroDefinitionEquals(const MacroInfo *MI, 00312 ArrayRef<TokenValue> Tokens) { 00313 return Tokens.size() == MI->getNumTokens() && 00314 std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin()); 00315 } 00316 00317 StringRef Preprocessor::getLastMacroWithSpelling( 00318 SourceLocation Loc, 00319 ArrayRef<TokenValue> Tokens) const { 00320 SourceLocation BestLocation; 00321 StringRef BestSpelling; 00322 for (Preprocessor::macro_iterator I = macro_begin(), E = macro_end(); 00323 I != E; ++I) { 00324 if (!I->second->getMacroInfo()->isObjectLike()) 00325 continue; 00326 const MacroDirective::DefInfo 00327 Def = I->second->findDirectiveAtLoc(Loc, SourceMgr); 00328 if (!Def) 00329 continue; 00330 if (!MacroDefinitionEquals(Def.getMacroInfo(), Tokens)) 00331 continue; 00332 SourceLocation Location = Def.getLocation(); 00333 // Choose the macro defined latest. 00334 if (BestLocation.isInvalid() || 00335 (Location.isValid() && 00336 SourceMgr.isBeforeInTranslationUnit(BestLocation, Location))) { 00337 BestLocation = Location; 00338 BestSpelling = I->first->getName(); 00339 } 00340 } 00341 return BestSpelling; 00342 } 00343 00344 void Preprocessor::recomputeCurLexerKind() { 00345 if (CurLexer) 00346 CurLexerKind = CLK_Lexer; 00347 else if (CurPTHLexer) 00348 CurLexerKind = CLK_PTHLexer; 00349 else if (CurTokenLexer) 00350 CurLexerKind = CLK_TokenLexer; 00351 else 00352 CurLexerKind = CLK_CachingLexer; 00353 } 00354 00355 bool Preprocessor::SetCodeCompletionPoint(const FileEntry *File, 00356 unsigned CompleteLine, 00357 unsigned CompleteColumn) { 00358 assert(File); 00359 assert(CompleteLine && CompleteColumn && "Starts from 1:1"); 00360 assert(!CodeCompletionFile && "Already set"); 00361 00362 using llvm::MemoryBuffer; 00363 00364 // Load the actual file's contents. 00365 bool Invalid = false; 00366 const MemoryBuffer *Buffer = SourceMgr.getMemoryBufferForFile(File, &Invalid); 00367 if (Invalid) 00368 return true; 00369 00370 // Find the byte position of the truncation point. 00371 const char *Position = Buffer->getBufferStart(); 00372 for (unsigned Line = 1; Line < CompleteLine; ++Line) { 00373 for (; *Position; ++Position) { 00374 if (*Position != '\r' && *Position != '\n') 00375 continue; 00376 00377 // Eat \r\n or \n\r as a single line. 00378 if ((Position[1] == '\r' || Position[1] == '\n') && 00379 Position[0] != Position[1]) 00380 ++Position; 00381 ++Position; 00382 break; 00383 } 00384 } 00385 00386 Position += CompleteColumn - 1; 00387 00388 // If pointing inside the preamble, adjust the position at the beginning of 00389 // the file after the preamble. 00390 if (SkipMainFilePreamble.first && 00391 SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()) == File) { 00392 if (Position - Buffer->getBufferStart() < SkipMainFilePreamble.first) 00393 Position = Buffer->getBufferStart() + SkipMainFilePreamble.first; 00394 } 00395 00396 if (Position > Buffer->getBufferEnd()) 00397 Position = Buffer->getBufferEnd(); 00398 00399 CodeCompletionFile = File; 00400 CodeCompletionOffset = Position - Buffer->getBufferStart(); 00401 00402 std::unique_ptr<MemoryBuffer> NewBuffer = 00403 MemoryBuffer::getNewUninitMemBuffer(Buffer->getBufferSize() + 1, 00404 Buffer->getBufferIdentifier()); 00405 char *NewBuf = const_cast<char*>(NewBuffer->getBufferStart()); 00406 char *NewPos = std::copy(Buffer->getBufferStart(), Position, NewBuf); 00407 *NewPos = '\0'; 00408 std::copy(Position, Buffer->getBufferEnd(), NewPos+1); 00409 SourceMgr.overrideFileContents(File, std::move(NewBuffer)); 00410 00411 return false; 00412 } 00413 00414 void Preprocessor::CodeCompleteNaturalLanguage() { 00415 if (CodeComplete) 00416 CodeComplete->CodeCompleteNaturalLanguage(); 00417 setCodeCompletionReached(); 00418 } 00419 00420 /// getSpelling - This method is used to get the spelling of a token into a 00421 /// SmallVector. Note that the returned StringRef may not point to the 00422 /// supplied buffer if a copy can be avoided. 00423 StringRef Preprocessor::getSpelling(const Token &Tok, 00424 SmallVectorImpl<char> &Buffer, 00425 bool *Invalid) const { 00426 // NOTE: this has to be checked *before* testing for an IdentifierInfo. 00427 if (Tok.isNot(tok::raw_identifier) && !Tok.hasUCN()) { 00428 // Try the fast path. 00429 if (const IdentifierInfo *II = Tok.getIdentifierInfo()) 00430 return II->getName(); 00431 } 00432 00433 // Resize the buffer if we need to copy into it. 00434 if (Tok.needsCleaning()) 00435 Buffer.resize(Tok.getLength()); 00436 00437 const char *Ptr = Buffer.data(); 00438 unsigned Len = getSpelling(Tok, Ptr, Invalid); 00439 return StringRef(Ptr, Len); 00440 } 00441 00442 /// CreateString - Plop the specified string into a scratch buffer and return a 00443 /// location for it. If specified, the source location provides a source 00444 /// location for the token. 00445 void Preprocessor::CreateString(StringRef Str, Token &Tok, 00446 SourceLocation ExpansionLocStart, 00447 SourceLocation ExpansionLocEnd) { 00448 Tok.setLength(Str.size()); 00449 00450 const char *DestPtr; 00451 SourceLocation Loc = ScratchBuf->getToken(Str.data(), Str.size(), DestPtr); 00452 00453 if (ExpansionLocStart.isValid()) 00454 Loc = SourceMgr.createExpansionLoc(Loc, ExpansionLocStart, 00455 ExpansionLocEnd, Str.size()); 00456 Tok.setLocation(Loc); 00457 00458 // If this is a raw identifier or a literal token, set the pointer data. 00459 if (Tok.is(tok::raw_identifier)) 00460 Tok.setRawIdentifierData(DestPtr); 00461 else if (Tok.isLiteral()) 00462 Tok.setLiteralData(DestPtr); 00463 } 00464 00465 Module *Preprocessor::getCurrentModule() { 00466 if (getLangOpts().CurrentModule.empty()) 00467 return nullptr; 00468 00469 return getHeaderSearchInfo().lookupModule(getLangOpts().CurrentModule); 00470 } 00471 00472 //===----------------------------------------------------------------------===// 00473 // Preprocessor Initialization Methods 00474 //===----------------------------------------------------------------------===// 00475 00476 00477 /// EnterMainSourceFile - Enter the specified FileID as the main source file, 00478 /// which implicitly adds the builtin defines etc. 00479 void Preprocessor::EnterMainSourceFile() { 00480 // We do not allow the preprocessor to reenter the main file. Doing so will 00481 // cause FileID's to accumulate information from both runs (e.g. #line 00482 // information) and predefined macros aren't guaranteed to be set properly. 00483 assert(NumEnteredSourceFiles == 0 && "Cannot reenter the main file!"); 00484 FileID MainFileID = SourceMgr.getMainFileID(); 00485 00486 // If MainFileID is loaded it means we loaded an AST file, no need to enter 00487 // a main file. 00488 if (!SourceMgr.isLoadedFileID(MainFileID)) { 00489 // Enter the main file source buffer. 00490 EnterSourceFile(MainFileID, nullptr, SourceLocation()); 00491 00492 // If we've been asked to skip bytes in the main file (e.g., as part of a 00493 // precompiled preamble), do so now. 00494 if (SkipMainFilePreamble.first > 0) 00495 CurLexer->SkipBytes(SkipMainFilePreamble.first, 00496 SkipMainFilePreamble.second); 00497 00498 // Tell the header info that the main file was entered. If the file is later 00499 // #imported, it won't be re-entered. 00500 if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID)) 00501 HeaderInfo.IncrementIncludeCount(FE); 00502 } 00503 00504 // Preprocess Predefines to populate the initial preprocessor state. 00505 std::unique_ptr<llvm::MemoryBuffer> SB = 00506 llvm::MemoryBuffer::getMemBufferCopy(Predefines, "<built-in>"); 00507 assert(SB && "Cannot create predefined source buffer"); 00508 FileID FID = SourceMgr.createFileID(std::move(SB)); 00509 assert(!FID.isInvalid() && "Could not create FileID for predefines?"); 00510 setPredefinesFileID(FID); 00511 00512 // Start parsing the predefines. 00513 EnterSourceFile(FID, nullptr, SourceLocation()); 00514 } 00515 00516 void Preprocessor::EndSourceFile() { 00517 // Notify the client that we reached the end of the source file. 00518 if (Callbacks) 00519 Callbacks->EndOfMainFile(); 00520 } 00521 00522 //===----------------------------------------------------------------------===// 00523 // Lexer Event Handling. 00524 //===----------------------------------------------------------------------===// 00525 00526 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the 00527 /// identifier information for the token and install it into the token, 00528 /// updating the token kind accordingly. 00529 IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const { 00530 assert(!Identifier.getRawIdentifier().empty() && "No raw identifier data!"); 00531 00532 // Look up this token, see if it is a macro, or if it is a language keyword. 00533 IdentifierInfo *II; 00534 if (!Identifier.needsCleaning() && !Identifier.hasUCN()) { 00535 // No cleaning needed, just use the characters from the lexed buffer. 00536 II = getIdentifierInfo(Identifier.getRawIdentifier()); 00537 } else { 00538 // Cleaning needed, alloca a buffer, clean into it, then use the buffer. 00539 SmallString<64> IdentifierBuffer; 00540 StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer); 00541 00542 if (Identifier.hasUCN()) { 00543 SmallString<64> UCNIdentifierBuffer; 00544 expandUCNs(UCNIdentifierBuffer, CleanedStr); 00545 II = getIdentifierInfo(UCNIdentifierBuffer); 00546 } else { 00547 II = getIdentifierInfo(CleanedStr); 00548 } 00549 } 00550 00551 // Update the token info (identifier info and appropriate token kind). 00552 Identifier.setIdentifierInfo(II); 00553 Identifier.setKind(II->getTokenID()); 00554 00555 return II; 00556 } 00557 00558 void Preprocessor::SetPoisonReason(IdentifierInfo *II, unsigned DiagID) { 00559 PoisonReasons[II] = DiagID; 00560 } 00561 00562 void Preprocessor::PoisonSEHIdentifiers(bool Poison) { 00563 assert(Ident__exception_code && Ident__exception_info); 00564 assert(Ident___exception_code && Ident___exception_info); 00565 Ident__exception_code->setIsPoisoned(Poison); 00566 Ident___exception_code->setIsPoisoned(Poison); 00567 Ident_GetExceptionCode->setIsPoisoned(Poison); 00568 Ident__exception_info->setIsPoisoned(Poison); 00569 Ident___exception_info->setIsPoisoned(Poison); 00570 Ident_GetExceptionInfo->setIsPoisoned(Poison); 00571 Ident__abnormal_termination->setIsPoisoned(Poison); 00572 Ident___abnormal_termination->setIsPoisoned(Poison); 00573 Ident_AbnormalTermination->setIsPoisoned(Poison); 00574 } 00575 00576 void Preprocessor::HandlePoisonedIdentifier(Token & Identifier) { 00577 assert(Identifier.getIdentifierInfo() && 00578 "Can't handle identifiers without identifier info!"); 00579 llvm::DenseMap<IdentifierInfo*,unsigned>::const_iterator it = 00580 PoisonReasons.find(Identifier.getIdentifierInfo()); 00581 if(it == PoisonReasons.end()) 00582 Diag(Identifier, diag::err_pp_used_poisoned_id); 00583 else 00584 Diag(Identifier,it->second) << Identifier.getIdentifierInfo(); 00585 } 00586 00587 /// HandleIdentifier - This callback is invoked when the lexer reads an 00588 /// identifier. This callback looks up the identifier in the map and/or 00589 /// potentially macro expands it or turns it into a named token (like 'for'). 00590 /// 00591 /// Note that callers of this method are guarded by checking the 00592 /// IdentifierInfo's 'isHandleIdentifierCase' bit. If this method changes, the 00593 /// IdentifierInfo methods that compute these properties will need to change to 00594 /// match. 00595 bool Preprocessor::HandleIdentifier(Token &Identifier) { 00596 assert(Identifier.getIdentifierInfo() && 00597 "Can't handle identifiers without identifier info!"); 00598 00599 IdentifierInfo &II = *Identifier.getIdentifierInfo(); 00600 00601 // If the information about this identifier is out of date, update it from 00602 // the external source. 00603 // We have to treat __VA_ARGS__ in a special way, since it gets 00604 // serialized with isPoisoned = true, but our preprocessor may have 00605 // unpoisoned it if we're defining a C99 macro. 00606 if (II.isOutOfDate()) { 00607 bool CurrentIsPoisoned = false; 00608 if (&II == Ident__VA_ARGS__) 00609 CurrentIsPoisoned = Ident__VA_ARGS__->isPoisoned(); 00610 00611 ExternalSource->updateOutOfDateIdentifier(II); 00612 Identifier.setKind(II.getTokenID()); 00613 00614 if (&II == Ident__VA_ARGS__) 00615 II.setIsPoisoned(CurrentIsPoisoned); 00616 } 00617 00618 // If this identifier was poisoned, and if it was not produced from a macro 00619 // expansion, emit an error. 00620 if (II.isPoisoned() && CurPPLexer) { 00621 HandlePoisonedIdentifier(Identifier); 00622 } 00623 00624 // If this is a macro to be expanded, do it. 00625 if (MacroDirective *MD = getMacroDirective(&II)) { 00626 MacroInfo *MI = MD->getMacroInfo(); 00627 if (!DisableMacroExpansion) { 00628 if (!Identifier.isExpandDisabled() && MI->isEnabled()) { 00629 // C99 6.10.3p10: If the preprocessing token immediately after the 00630 // macro name isn't a '(', this macro should not be expanded. 00631 if (!MI->isFunctionLike() || isNextPPTokenLParen()) 00632 return HandleMacroExpandedIdentifier(Identifier, MD); 00633 } else { 00634 // C99 6.10.3.4p2 says that a disabled macro may never again be 00635 // expanded, even if it's in a context where it could be expanded in the 00636 // future. 00637 Identifier.setFlag(Token::DisableExpand); 00638 if (MI->isObjectLike() || isNextPPTokenLParen()) 00639 Diag(Identifier, diag::pp_disabled_macro_expansion); 00640 } 00641 } 00642 } 00643 00644 // If this identifier is a keyword in C++11, produce a warning. Don't warn if 00645 // we're not considering macro expansion, since this identifier might be the 00646 // name of a macro. 00647 // FIXME: This warning is disabled in cases where it shouldn't be, like 00648 // "#define constexpr constexpr", "int constexpr;" 00649 if (II.isCXX11CompatKeyword() && !DisableMacroExpansion) { 00650 Diag(Identifier, diag::warn_cxx11_keyword) << II.getName(); 00651 // Don't diagnose this keyword again in this translation unit. 00652 II.setIsCXX11CompatKeyword(false); 00653 } 00654 00655 // C++ 2.11p2: If this is an alternative representation of a C++ operator, 00656 // then we act as if it is the actual operator and not the textual 00657 // representation of it. 00658 if (II.isCPlusPlusOperatorKeyword()) 00659 Identifier.setIdentifierInfo(nullptr); 00660 00661 // If this is an extension token, diagnose its use. 00662 // We avoid diagnosing tokens that originate from macro definitions. 00663 // FIXME: This warning is disabled in cases where it shouldn't be, 00664 // like "#define TY typeof", "TY(1) x". 00665 if (II.isExtensionToken() && !DisableMacroExpansion) 00666 Diag(Identifier, diag::ext_token_used); 00667 00668 // If this is the 'import' contextual keyword following an '@', note 00669 // that the next token indicates a module name. 00670 // 00671 // Note that we do not treat 'import' as a contextual 00672 // keyword when we're in a caching lexer, because caching lexers only get 00673 // used in contexts where import declarations are disallowed. 00674 if (LastTokenWasAt && II.isModulesImport() && !InMacroArgs && 00675 !DisableMacroExpansion && getLangOpts().Modules && 00676 CurLexerKind != CLK_CachingLexer) { 00677 ModuleImportLoc = Identifier.getLocation(); 00678 ModuleImportPath.clear(); 00679 ModuleImportExpectsIdentifier = true; 00680 CurLexerKind = CLK_LexAfterModuleImport; 00681 } 00682 return true; 00683 } 00684 00685 void Preprocessor::Lex(Token &Result) { 00686 // We loop here until a lex function retuns a token; this avoids recursion. 00687 bool ReturnedToken; 00688 do { 00689 switch (CurLexerKind) { 00690 case CLK_Lexer: 00691 ReturnedToken = CurLexer->Lex(Result); 00692 break; 00693 case CLK_PTHLexer: 00694 ReturnedToken = CurPTHLexer->Lex(Result); 00695 break; 00696 case CLK_TokenLexer: 00697 ReturnedToken = CurTokenLexer->Lex(Result); 00698 break; 00699 case CLK_CachingLexer: 00700 CachingLex(Result); 00701 ReturnedToken = true; 00702 break; 00703 case CLK_LexAfterModuleImport: 00704 LexAfterModuleImport(Result); 00705 ReturnedToken = true; 00706 break; 00707 } 00708 } while (!ReturnedToken); 00709 00710 LastTokenWasAt = Result.is(tok::at); 00711 } 00712 00713 00714 /// \brief Lex a token following the 'import' contextual keyword. 00715 /// 00716 void Preprocessor::LexAfterModuleImport(Token &Result) { 00717 // Figure out what kind of lexer we actually have. 00718 recomputeCurLexerKind(); 00719 00720 // Lex the next token. 00721 Lex(Result); 00722 00723 // The token sequence 00724 // 00725 // import identifier (. identifier)* 00726 // 00727 // indicates a module import directive. We already saw the 'import' 00728 // contextual keyword, so now we're looking for the identifiers. 00729 if (ModuleImportExpectsIdentifier && Result.getKind() == tok::identifier) { 00730 // We expected to see an identifier here, and we did; continue handling 00731 // identifiers. 00732 ModuleImportPath.push_back(std::make_pair(Result.getIdentifierInfo(), 00733 Result.getLocation())); 00734 ModuleImportExpectsIdentifier = false; 00735 CurLexerKind = CLK_LexAfterModuleImport; 00736 return; 00737 } 00738 00739 // If we're expecting a '.' or a ';', and we got a '.', then wait until we 00740 // see the next identifier. 00741 if (!ModuleImportExpectsIdentifier && Result.getKind() == tok::period) { 00742 ModuleImportExpectsIdentifier = true; 00743 CurLexerKind = CLK_LexAfterModuleImport; 00744 return; 00745 } 00746 00747 // If we have a non-empty module path, load the named module. 00748 if (!ModuleImportPath.empty() && getLangOpts().Modules) { 00749 Module *Imported = TheModuleLoader.loadModule(ModuleImportLoc, 00750 ModuleImportPath, 00751 Module::MacrosVisible, 00752 /*IsIncludeDirective=*/false); 00753 if (Callbacks) 00754 Callbacks->moduleImport(ModuleImportLoc, ModuleImportPath, Imported); 00755 } 00756 } 00757 00758 bool Preprocessor::FinishLexStringLiteral(Token &Result, std::string &String, 00759 const char *DiagnosticTag, 00760 bool AllowMacroExpansion) { 00761 // We need at least one string literal. 00762 if (Result.isNot(tok::string_literal)) { 00763 Diag(Result, diag::err_expected_string_literal) 00764 << /*Source='in...'*/0 << DiagnosticTag; 00765 return false; 00766 } 00767 00768 // Lex string literal tokens, optionally with macro expansion. 00769 SmallVector<Token, 4> StrToks; 00770 do { 00771 StrToks.push_back(Result); 00772 00773 if (Result.hasUDSuffix()) 00774 Diag(Result, diag::err_invalid_string_udl); 00775 00776 if (AllowMacroExpansion) 00777 Lex(Result); 00778 else 00779 LexUnexpandedToken(Result); 00780 } while (Result.is(tok::string_literal)); 00781 00782 // Concatenate and parse the strings. 00783 StringLiteralParser Literal(StrToks, *this); 00784 assert(Literal.isAscii() && "Didn't allow wide strings in"); 00785 00786 if (Literal.hadError) 00787 return false; 00788 00789 if (Literal.Pascal) { 00790 Diag(StrToks[0].getLocation(), diag::err_expected_string_literal) 00791 << /*Source='in...'*/0 << DiagnosticTag; 00792 return false; 00793 } 00794 00795 String = Literal.GetString(); 00796 return true; 00797 } 00798 00799 bool Preprocessor::parseSimpleIntegerLiteral(Token &Tok, uint64_t &Value) { 00800 assert(Tok.is(tok::numeric_constant)); 00801 SmallString<8> IntegerBuffer; 00802 bool NumberInvalid = false; 00803 StringRef Spelling = getSpelling(Tok, IntegerBuffer, &NumberInvalid); 00804 if (NumberInvalid) 00805 return false; 00806 NumericLiteralParser Literal(Spelling, Tok.getLocation(), *this); 00807 if (Literal.hadError || !Literal.isIntegerLiteral() || Literal.hasUDSuffix()) 00808 return false; 00809 llvm::APInt APVal(64, 0); 00810 if (Literal.GetIntegerValue(APVal)) 00811 return false; 00812 Lex(Tok); 00813 Value = APVal.getLimitedValue(); 00814 return true; 00815 } 00816 00817 void Preprocessor::addCommentHandler(CommentHandler *Handler) { 00818 assert(Handler && "NULL comment handler"); 00819 assert(std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler) == 00820 CommentHandlers.end() && "Comment handler already registered"); 00821 CommentHandlers.push_back(Handler); 00822 } 00823 00824 void Preprocessor::removeCommentHandler(CommentHandler *Handler) { 00825 std::vector<CommentHandler *>::iterator Pos 00826 = std::find(CommentHandlers.begin(), CommentHandlers.end(), Handler); 00827 assert(Pos != CommentHandlers.end() && "Comment handler not registered"); 00828 CommentHandlers.erase(Pos); 00829 } 00830 00831 bool Preprocessor::HandleComment(Token &result, SourceRange Comment) { 00832 bool AnyPendingTokens = false; 00833 for (std::vector<CommentHandler *>::iterator H = CommentHandlers.begin(), 00834 HEnd = CommentHandlers.end(); 00835 H != HEnd; ++H) { 00836 if ((*H)->HandleComment(*this, Comment)) 00837 AnyPendingTokens = true; 00838 } 00839 if (!AnyPendingTokens || getCommentRetentionState()) 00840 return false; 00841 Lex(result); 00842 return true; 00843 } 00844 00845 ModuleLoader::~ModuleLoader() { } 00846 00847 CommentHandler::~CommentHandler() { } 00848 00849 CodeCompletionHandler::~CodeCompletionHandler() { } 00850 00851 void Preprocessor::createPreprocessingRecord() { 00852 if (Record) 00853 return; 00854 00855 Record = new PreprocessingRecord(getSourceManager()); 00856 addPPCallbacks(std::unique_ptr<PPCallbacks>(Record)); 00857 }