clang API Documentation
00001 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// 00010 /// \file 00011 /// \brief This file contains the implementation of the UnwrappedLineParser, 00012 /// which turns a stream of tokens into UnwrappedLines. 00013 /// 00014 //===----------------------------------------------------------------------===// 00015 00016 #include "UnwrappedLineParser.h" 00017 #include "llvm/Support/Debug.h" 00018 00019 #define DEBUG_TYPE "format-parser" 00020 00021 namespace clang { 00022 namespace format { 00023 00024 class FormatTokenSource { 00025 public: 00026 virtual ~FormatTokenSource() {} 00027 virtual FormatToken *getNextToken() = 0; 00028 00029 virtual unsigned getPosition() = 0; 00030 virtual FormatToken *setPosition(unsigned Position) = 0; 00031 }; 00032 00033 namespace { 00034 00035 class ScopedDeclarationState { 00036 public: 00037 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 00038 bool MustBeDeclaration) 00039 : Line(Line), Stack(Stack) { 00040 Line.MustBeDeclaration = MustBeDeclaration; 00041 Stack.push_back(MustBeDeclaration); 00042 } 00043 ~ScopedDeclarationState() { 00044 Stack.pop_back(); 00045 if (!Stack.empty()) 00046 Line.MustBeDeclaration = Stack.back(); 00047 else 00048 Line.MustBeDeclaration = true; 00049 } 00050 00051 private: 00052 UnwrappedLine &Line; 00053 std::vector<bool> &Stack; 00054 }; 00055 00056 class ScopedMacroState : public FormatTokenSource { 00057 public: 00058 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 00059 FormatToken *&ResetToken, bool &StructuralError) 00060 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 00061 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 00062 StructuralError(StructuralError), 00063 PreviousStructuralError(StructuralError), Token(nullptr) { 00064 TokenSource = this; 00065 Line.Level = 0; 00066 Line.InPPDirective = true; 00067 } 00068 00069 ~ScopedMacroState() { 00070 TokenSource = PreviousTokenSource; 00071 ResetToken = Token; 00072 Line.InPPDirective = false; 00073 Line.Level = PreviousLineLevel; 00074 StructuralError = PreviousStructuralError; 00075 } 00076 00077 FormatToken *getNextToken() override { 00078 // The \c UnwrappedLineParser guards against this by never calling 00079 // \c getNextToken() after it has encountered the first eof token. 00080 assert(!eof()); 00081 Token = PreviousTokenSource->getNextToken(); 00082 if (eof()) 00083 return getFakeEOF(); 00084 return Token; 00085 } 00086 00087 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 00088 00089 FormatToken *setPosition(unsigned Position) override { 00090 Token = PreviousTokenSource->setPosition(Position); 00091 return Token; 00092 } 00093 00094 private: 00095 bool eof() { return Token && Token->HasUnescapedNewline; } 00096 00097 FormatToken *getFakeEOF() { 00098 static bool EOFInitialized = false; 00099 static FormatToken FormatTok; 00100 if (!EOFInitialized) { 00101 FormatTok.Tok.startToken(); 00102 FormatTok.Tok.setKind(tok::eof); 00103 EOFInitialized = true; 00104 } 00105 return &FormatTok; 00106 } 00107 00108 UnwrappedLine &Line; 00109 FormatTokenSource *&TokenSource; 00110 FormatToken *&ResetToken; 00111 unsigned PreviousLineLevel; 00112 FormatTokenSource *PreviousTokenSource; 00113 bool &StructuralError; 00114 bool PreviousStructuralError; 00115 00116 FormatToken *Token; 00117 }; 00118 00119 } // end anonymous namespace 00120 00121 class ScopedLineState { 00122 public: 00123 ScopedLineState(UnwrappedLineParser &Parser, 00124 bool SwitchToPreprocessorLines = false) 00125 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 00126 if (SwitchToPreprocessorLines) 00127 Parser.CurrentLines = &Parser.PreprocessorDirectives; 00128 else if (!Parser.Line->Tokens.empty()) 00129 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 00130 PreBlockLine = std::move(Parser.Line); 00131 Parser.Line = llvm::make_unique<UnwrappedLine>(); 00132 Parser.Line->Level = PreBlockLine->Level; 00133 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 00134 } 00135 00136 ~ScopedLineState() { 00137 if (!Parser.Line->Tokens.empty()) { 00138 Parser.addUnwrappedLine(); 00139 } 00140 assert(Parser.Line->Tokens.empty()); 00141 Parser.Line = std::move(PreBlockLine); 00142 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 00143 Parser.MustBreakBeforeNextToken = true; 00144 Parser.CurrentLines = OriginalLines; 00145 } 00146 00147 private: 00148 UnwrappedLineParser &Parser; 00149 00150 std::unique_ptr<UnwrappedLine> PreBlockLine; 00151 SmallVectorImpl<UnwrappedLine> *OriginalLines; 00152 }; 00153 00154 class CompoundStatementIndenter { 00155 public: 00156 CompoundStatementIndenter(UnwrappedLineParser *Parser, 00157 const FormatStyle &Style, unsigned &LineLevel) 00158 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 00159 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) { 00160 Parser->addUnwrappedLine(); 00161 } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 00162 Parser->addUnwrappedLine(); 00163 ++LineLevel; 00164 } 00165 } 00166 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 00167 00168 private: 00169 unsigned &LineLevel; 00170 unsigned OldLineLevel; 00171 }; 00172 00173 namespace { 00174 00175 class IndexedTokenSource : public FormatTokenSource { 00176 public: 00177 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 00178 : Tokens(Tokens), Position(-1) {} 00179 00180 FormatToken *getNextToken() override { 00181 ++Position; 00182 return Tokens[Position]; 00183 } 00184 00185 unsigned getPosition() override { 00186 assert(Position >= 0); 00187 return Position; 00188 } 00189 00190 FormatToken *setPosition(unsigned P) override { 00191 Position = P; 00192 return Tokens[Position]; 00193 } 00194 00195 void reset() { Position = -1; } 00196 00197 private: 00198 ArrayRef<FormatToken *> Tokens; 00199 int Position; 00200 }; 00201 00202 } // end anonymous namespace 00203 00204 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 00205 const AdditionalKeywords &Keywords, 00206 ArrayRef<FormatToken *> Tokens, 00207 UnwrappedLineConsumer &Callback) 00208 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 00209 CurrentLines(&Lines), StructuralError(false), Style(Style), 00210 Keywords(Keywords), Tokens(nullptr), Callback(Callback), 00211 AllTokens(Tokens), PPBranchLevel(-1) {} 00212 00213 void UnwrappedLineParser::reset() { 00214 PPBranchLevel = -1; 00215 Line.reset(new UnwrappedLine); 00216 CommentsBeforeNextToken.clear(); 00217 FormatTok = nullptr; 00218 MustBreakBeforeNextToken = false; 00219 PreprocessorDirectives.clear(); 00220 CurrentLines = &Lines; 00221 DeclarationScopeStack.clear(); 00222 StructuralError = false; 00223 PPStack.clear(); 00224 } 00225 00226 bool UnwrappedLineParser::parse() { 00227 IndexedTokenSource TokenSource(AllTokens); 00228 do { 00229 DEBUG(llvm::dbgs() << "----\n"); 00230 reset(); 00231 Tokens = &TokenSource; 00232 TokenSource.reset(); 00233 00234 readToken(); 00235 parseFile(); 00236 // Create line with eof token. 00237 pushToken(FormatTok); 00238 addUnwrappedLine(); 00239 00240 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 00241 E = Lines.end(); 00242 I != E; ++I) { 00243 Callback.consumeUnwrappedLine(*I); 00244 } 00245 Callback.finishRun(); 00246 Lines.clear(); 00247 while (!PPLevelBranchIndex.empty() && 00248 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 00249 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 00250 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 00251 } 00252 if (!PPLevelBranchIndex.empty()) { 00253 ++PPLevelBranchIndex.back(); 00254 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 00255 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 00256 } 00257 } while (!PPLevelBranchIndex.empty()); 00258 00259 return StructuralError; 00260 } 00261 00262 void UnwrappedLineParser::parseFile() { 00263 ScopedDeclarationState DeclarationState( 00264 *Line, DeclarationScopeStack, 00265 /*MustBeDeclaration=*/ !Line->InPPDirective); 00266 parseLevel(/*HasOpeningBrace=*/false); 00267 // Make sure to format the remaining tokens. 00268 flushComments(true); 00269 addUnwrappedLine(); 00270 } 00271 00272 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 00273 bool SwitchLabelEncountered = false; 00274 do { 00275 switch (FormatTok->Tok.getKind()) { 00276 case tok::comment: 00277 nextToken(); 00278 addUnwrappedLine(); 00279 break; 00280 case tok::l_brace: 00281 // FIXME: Add parameter whether this can happen - if this happens, we must 00282 // be in a non-declaration context. 00283 parseBlock(/*MustBeDeclaration=*/false); 00284 addUnwrappedLine(); 00285 break; 00286 case tok::r_brace: 00287 if (HasOpeningBrace) 00288 return; 00289 StructuralError = true; 00290 nextToken(); 00291 addUnwrappedLine(); 00292 break; 00293 case tok::kw_default: 00294 case tok::kw_case: 00295 if (!SwitchLabelEncountered && 00296 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 00297 ++Line->Level; 00298 SwitchLabelEncountered = true; 00299 parseStructuralElement(); 00300 break; 00301 default: 00302 parseStructuralElement(); 00303 break; 00304 } 00305 } while (!eof()); 00306 } 00307 00308 void UnwrappedLineParser::calculateBraceTypes() { 00309 // We'll parse forward through the tokens until we hit 00310 // a closing brace or eof - note that getNextToken() will 00311 // parse macros, so this will magically work inside macro 00312 // definitions, too. 00313 unsigned StoredPosition = Tokens->getPosition(); 00314 FormatToken *Tok = FormatTok; 00315 // Keep a stack of positions of lbrace tokens. We will 00316 // update information about whether an lbrace starts a 00317 // braced init list or a different block during the loop. 00318 SmallVector<FormatToken *, 8> LBraceStack; 00319 assert(Tok->Tok.is(tok::l_brace)); 00320 do { 00321 // Get next none-comment token. 00322 FormatToken *NextTok; 00323 unsigned ReadTokens = 0; 00324 do { 00325 NextTok = Tokens->getNextToken(); 00326 ++ReadTokens; 00327 } while (NextTok->is(tok::comment)); 00328 00329 switch (Tok->Tok.getKind()) { 00330 case tok::l_brace: 00331 LBraceStack.push_back(Tok); 00332 break; 00333 case tok::r_brace: 00334 if (!LBraceStack.empty()) { 00335 if (LBraceStack.back()->BlockKind == BK_Unknown) { 00336 bool ProbablyBracedList = false; 00337 if (Style.Language == FormatStyle::LK_Proto) { 00338 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 00339 } else { 00340 // Using OriginalColumn to distinguish between ObjC methods and 00341 // binary operators is a bit hacky. 00342 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 00343 NextTok->OriginalColumn == 0; 00344 00345 // If there is a comma, semicolon or right paren after the closing 00346 // brace, we assume this is a braced initializer list. Note that 00347 // regardless how we mark inner braces here, we will overwrite the 00348 // BlockKind later if we parse a braced list (where all blocks 00349 // inside are by default braced lists), or when we explicitly detect 00350 // blocks (for example while parsing lambdas). 00351 // 00352 // We exclude + and - as they can be ObjC visibility modifiers. 00353 ProbablyBracedList = 00354 NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon, 00355 tok::r_paren, tok::r_square, tok::l_brace, 00356 tok::l_paren, tok::ellipsis) || 00357 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 00358 } 00359 if (ProbablyBracedList) { 00360 Tok->BlockKind = BK_BracedInit; 00361 LBraceStack.back()->BlockKind = BK_BracedInit; 00362 } else { 00363 Tok->BlockKind = BK_Block; 00364 LBraceStack.back()->BlockKind = BK_Block; 00365 } 00366 } 00367 LBraceStack.pop_back(); 00368 } 00369 break; 00370 case tok::at: 00371 case tok::semi: 00372 case tok::kw_if: 00373 case tok::kw_while: 00374 case tok::kw_for: 00375 case tok::kw_switch: 00376 case tok::kw_try: 00377 if (!LBraceStack.empty()) 00378 LBraceStack.back()->BlockKind = BK_Block; 00379 break; 00380 default: 00381 break; 00382 } 00383 Tok = NextTok; 00384 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 00385 // Assume other blocks for all unclosed opening braces. 00386 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 00387 if (LBraceStack[i]->BlockKind == BK_Unknown) 00388 LBraceStack[i]->BlockKind = BK_Block; 00389 } 00390 00391 FormatTok = Tokens->setPosition(StoredPosition); 00392 } 00393 00394 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 00395 bool MunchSemi) { 00396 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 00397 unsigned InitialLevel = Line->Level; 00398 nextToken(); 00399 00400 addUnwrappedLine(); 00401 00402 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 00403 MustBeDeclaration); 00404 if (AddLevel) 00405 ++Line->Level; 00406 parseLevel(/*HasOpeningBrace=*/true); 00407 00408 if (!FormatTok->Tok.is(tok::r_brace)) { 00409 Line->Level = InitialLevel; 00410 StructuralError = true; 00411 return; 00412 } 00413 00414 nextToken(); // Munch the closing brace. 00415 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 00416 nextToken(); 00417 Line->Level = InitialLevel; 00418 } 00419 00420 static bool IsGoogScope(const UnwrappedLine &Line) { 00421 if (Line.Tokens.size() < 4) 00422 return false; 00423 auto I = Line.Tokens.begin(); 00424 if (I->Tok->TokenText != "goog") 00425 return false; 00426 ++I; 00427 if (I->Tok->isNot(tok::period)) 00428 return false; 00429 ++I; 00430 if (I->Tok->TokenText != "scope") 00431 return false; 00432 ++I; 00433 return I->Tok->is(tok::l_paren); 00434 } 00435 00436 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 00437 const FormatToken &InitialToken) { 00438 switch (Style.BreakBeforeBraces) { 00439 case FormatStyle::BS_Linux: 00440 return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class); 00441 case FormatStyle::BS_Allman: 00442 case FormatStyle::BS_GNU: 00443 return true; 00444 default: 00445 return false; 00446 } 00447 } 00448 00449 void UnwrappedLineParser::parseChildBlock() { 00450 FormatTok->BlockKind = BK_Block; 00451 nextToken(); 00452 { 00453 bool GoogScope = 00454 Style.Language == FormatStyle::LK_JavaScript && IsGoogScope(*Line); 00455 ScopedLineState LineState(*this); 00456 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 00457 /*MustBeDeclaration=*/false); 00458 Line->Level += GoogScope ? 0 : 1; 00459 parseLevel(/*HasOpeningBrace=*/true); 00460 Line->Level -= GoogScope ? 0 : 1; 00461 } 00462 nextToken(); 00463 } 00464 00465 void UnwrappedLineParser::parsePPDirective() { 00466 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 00467 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 00468 nextToken(); 00469 00470 if (!FormatTok->Tok.getIdentifierInfo()) { 00471 parsePPUnknown(); 00472 return; 00473 } 00474 00475 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 00476 case tok::pp_define: 00477 parsePPDefine(); 00478 return; 00479 case tok::pp_if: 00480 parsePPIf(/*IfDef=*/false); 00481 break; 00482 case tok::pp_ifdef: 00483 case tok::pp_ifndef: 00484 parsePPIf(/*IfDef=*/true); 00485 break; 00486 case tok::pp_else: 00487 parsePPElse(); 00488 break; 00489 case tok::pp_elif: 00490 parsePPElIf(); 00491 break; 00492 case tok::pp_endif: 00493 parsePPEndIf(); 00494 break; 00495 default: 00496 parsePPUnknown(); 00497 break; 00498 } 00499 } 00500 00501 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 00502 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 00503 PPStack.push_back(PP_Unreachable); 00504 else 00505 PPStack.push_back(PP_Conditional); 00506 } 00507 00508 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 00509 ++PPBranchLevel; 00510 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 00511 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 00512 PPLevelBranchIndex.push_back(0); 00513 PPLevelBranchCount.push_back(0); 00514 } 00515 PPChainBranchIndex.push(0); 00516 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 00517 conditionalCompilationCondition(Unreachable || Skip); 00518 } 00519 00520 void UnwrappedLineParser::conditionalCompilationAlternative() { 00521 if (!PPStack.empty()) 00522 PPStack.pop_back(); 00523 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 00524 if (!PPChainBranchIndex.empty()) 00525 ++PPChainBranchIndex.top(); 00526 conditionalCompilationCondition( 00527 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 00528 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 00529 } 00530 00531 void UnwrappedLineParser::conditionalCompilationEnd() { 00532 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 00533 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 00534 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 00535 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 00536 } 00537 } 00538 // Guard against #endif's without #if. 00539 if (PPBranchLevel > 0) 00540 --PPBranchLevel; 00541 if (!PPChainBranchIndex.empty()) 00542 PPChainBranchIndex.pop(); 00543 if (!PPStack.empty()) 00544 PPStack.pop_back(); 00545 } 00546 00547 void UnwrappedLineParser::parsePPIf(bool IfDef) { 00548 nextToken(); 00549 bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && 00550 StringRef(FormatTok->Tok.getLiteralData(), 00551 FormatTok->Tok.getLength()) == "0") || 00552 FormatTok->Tok.is(tok::kw_false); 00553 conditionalCompilationStart(!IfDef && IsLiteralFalse); 00554 parsePPUnknown(); 00555 } 00556 00557 void UnwrappedLineParser::parsePPElse() { 00558 conditionalCompilationAlternative(); 00559 parsePPUnknown(); 00560 } 00561 00562 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 00563 00564 void UnwrappedLineParser::parsePPEndIf() { 00565 conditionalCompilationEnd(); 00566 parsePPUnknown(); 00567 } 00568 00569 void UnwrappedLineParser::parsePPDefine() { 00570 nextToken(); 00571 00572 if (FormatTok->Tok.getKind() != tok::identifier) { 00573 parsePPUnknown(); 00574 return; 00575 } 00576 nextToken(); 00577 if (FormatTok->Tok.getKind() == tok::l_paren && 00578 FormatTok->WhitespaceRange.getBegin() == 00579 FormatTok->WhitespaceRange.getEnd()) { 00580 parseParens(); 00581 } 00582 addUnwrappedLine(); 00583 Line->Level = 1; 00584 00585 // Errors during a preprocessor directive can only affect the layout of the 00586 // preprocessor directive, and thus we ignore them. An alternative approach 00587 // would be to use the same approach we use on the file level (no 00588 // re-indentation if there was a structural error) within the macro 00589 // definition. 00590 parseFile(); 00591 } 00592 00593 void UnwrappedLineParser::parsePPUnknown() { 00594 do { 00595 nextToken(); 00596 } while (!eof()); 00597 addUnwrappedLine(); 00598 } 00599 00600 // Here we blacklist certain tokens that are not usually the first token in an 00601 // unwrapped line. This is used in attempt to distinguish macro calls without 00602 // trailing semicolons from other constructs split to several lines. 00603 bool tokenCanStartNewLine(clang::Token Tok) { 00604 // Semicolon can be a null-statement, l_square can be a start of a macro or 00605 // a C++11 attribute, but this doesn't seem to be common. 00606 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 00607 Tok.isNot(tok::l_square) && 00608 // Tokens that can only be used as binary operators and a part of 00609 // overloaded operator names. 00610 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 00611 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 00612 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 00613 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 00614 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 00615 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 00616 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 00617 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 00618 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 00619 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 00620 Tok.isNot(tok::lesslessequal) && 00621 // Colon is used in labels, base class lists, initializer lists, 00622 // range-based for loops, ternary operator, but should never be the 00623 // first token in an unwrapped line. 00624 Tok.isNot(tok::colon) && 00625 // 'noexcept' is a trailing annotation. 00626 Tok.isNot(tok::kw_noexcept); 00627 } 00628 00629 void UnwrappedLineParser::parseStructuralElement() { 00630 assert(!FormatTok->Tok.is(tok::l_brace)); 00631 switch (FormatTok->Tok.getKind()) { 00632 case tok::at: 00633 nextToken(); 00634 if (FormatTok->Tok.is(tok::l_brace)) { 00635 parseBracedList(); 00636 break; 00637 } 00638 switch (FormatTok->Tok.getObjCKeywordID()) { 00639 case tok::objc_public: 00640 case tok::objc_protected: 00641 case tok::objc_package: 00642 case tok::objc_private: 00643 return parseAccessSpecifier(); 00644 case tok::objc_interface: 00645 case tok::objc_implementation: 00646 return parseObjCInterfaceOrImplementation(); 00647 case tok::objc_protocol: 00648 return parseObjCProtocol(); 00649 case tok::objc_end: 00650 return; // Handled by the caller. 00651 case tok::objc_optional: 00652 case tok::objc_required: 00653 nextToken(); 00654 addUnwrappedLine(); 00655 return; 00656 default: 00657 break; 00658 } 00659 break; 00660 case tok::kw_asm: 00661 FormatTok->Finalized = true; 00662 nextToken(); 00663 if (FormatTok->is(tok::l_brace)) { 00664 while (FormatTok && FormatTok->isNot(tok::eof)) { 00665 FormatTok->Finalized = true; 00666 if (FormatTok->is(tok::r_brace)) { 00667 nextToken(); 00668 break; 00669 } 00670 nextToken(); 00671 } 00672 } 00673 break; 00674 case tok::kw_namespace: 00675 parseNamespace(); 00676 return; 00677 case tok::kw_inline: 00678 nextToken(); 00679 if (FormatTok->Tok.is(tok::kw_namespace)) { 00680 parseNamespace(); 00681 return; 00682 } 00683 break; 00684 case tok::kw_public: 00685 case tok::kw_protected: 00686 case tok::kw_private: 00687 if (Style.Language == FormatStyle::LK_Java) 00688 nextToken(); 00689 else 00690 parseAccessSpecifier(); 00691 return; 00692 case tok::kw_if: 00693 parseIfThenElse(); 00694 return; 00695 case tok::kw_for: 00696 case tok::kw_while: 00697 parseForOrWhileLoop(); 00698 return; 00699 case tok::kw_do: 00700 parseDoWhile(); 00701 return; 00702 case tok::kw_switch: 00703 parseSwitch(); 00704 return; 00705 case tok::kw_default: 00706 nextToken(); 00707 parseLabel(); 00708 return; 00709 case tok::kw_case: 00710 parseCaseLabel(); 00711 return; 00712 case tok::kw_try: 00713 parseTryCatch(); 00714 return; 00715 case tok::kw_extern: 00716 nextToken(); 00717 if (FormatTok->Tok.is(tok::string_literal)) { 00718 nextToken(); 00719 if (FormatTok->Tok.is(tok::l_brace)) { 00720 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 00721 addUnwrappedLine(); 00722 return; 00723 } 00724 } 00725 break; 00726 case tok::identifier: 00727 if (FormatTok->IsForEachMacro) { 00728 parseForOrWhileLoop(); 00729 return; 00730 } 00731 // In all other cases, parse the declaration. 00732 break; 00733 default: 00734 break; 00735 } 00736 do { 00737 switch (FormatTok->Tok.getKind()) { 00738 case tok::at: 00739 nextToken(); 00740 if (FormatTok->Tok.is(tok::l_brace)) 00741 parseBracedList(); 00742 break; 00743 case tok::kw_enum: 00744 parseEnum(); 00745 break; 00746 case tok::kw_typedef: 00747 nextToken(); 00748 if (FormatTok->is(Keywords.kw_NS_ENUM)) 00749 parseEnum(); 00750 break; 00751 case tok::kw_struct: 00752 case tok::kw_union: 00753 case tok::kw_class: 00754 parseRecord(); 00755 // A record declaration or definition is always the start of a structural 00756 // element. 00757 break; 00758 case tok::semi: 00759 nextToken(); 00760 addUnwrappedLine(); 00761 return; 00762 case tok::r_brace: 00763 addUnwrappedLine(); 00764 return; 00765 case tok::l_paren: 00766 parseParens(); 00767 break; 00768 case tok::caret: 00769 nextToken(); 00770 if (FormatTok->Tok.isAnyIdentifier() || 00771 FormatTok->isSimpleTypeSpecifier()) 00772 nextToken(); 00773 if (FormatTok->is(tok::l_paren)) 00774 parseParens(); 00775 if (FormatTok->is(tok::l_brace)) 00776 parseChildBlock(); 00777 break; 00778 case tok::l_brace: 00779 if (!tryToParseBracedList()) { 00780 // A block outside of parentheses must be the last part of a 00781 // structural element. 00782 // FIXME: Figure out cases where this is not true, and add projections 00783 // for them (the one we know is missing are lambdas). 00784 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 00785 addUnwrappedLine(); 00786 FormatTok->Type = TT_FunctionLBrace; 00787 parseBlock(/*MustBeDeclaration=*/false); 00788 addUnwrappedLine(); 00789 return; 00790 } 00791 // Otherwise this was a braced init list, and the structural 00792 // element continues. 00793 break; 00794 case tok::kw_try: 00795 // We arrive here when parsing function-try blocks. 00796 parseTryCatch(); 00797 return; 00798 case tok::identifier: { 00799 StringRef Text = FormatTok->TokenText; 00800 // Parse function literal unless 'function' is the first token in a line 00801 // in which case this should be treated as a free-standing function. 00802 if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" && 00803 Line->Tokens.size() > 0) { 00804 tryToParseJSFunction(); 00805 break; 00806 } 00807 nextToken(); 00808 if (Line->Tokens.size() == 1) { 00809 if (FormatTok->Tok.is(tok::colon)) { 00810 parseLabel(); 00811 return; 00812 } 00813 // Recognize function-like macro usages without trailing semicolon as 00814 // well as free-standing macrose like Q_OBJECT. 00815 bool FunctionLike = FormatTok->is(tok::l_paren); 00816 if (FunctionLike) 00817 parseParens(); 00818 if (FormatTok->NewlinesBefore > 0 && 00819 (Text.size() >= 5 || FunctionLike) && 00820 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 00821 addUnwrappedLine(); 00822 return; 00823 } 00824 } 00825 break; 00826 } 00827 case tok::equal: 00828 nextToken(); 00829 if (FormatTok->Tok.is(tok::l_brace)) { 00830 parseBracedList(); 00831 } 00832 break; 00833 case tok::l_square: 00834 parseSquare(); 00835 break; 00836 default: 00837 nextToken(); 00838 break; 00839 } 00840 } while (!eof()); 00841 } 00842 00843 bool UnwrappedLineParser::tryToParseLambda() { 00844 // FIXME: This is a dirty way to access the previous token. Find a better 00845 // solution. 00846 if (!Line->Tokens.empty() && 00847 (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator, 00848 tok::kw_new, tok::kw_delete) || 00849 Line->Tokens.back().Tok->closesScope() || 00850 Line->Tokens.back().Tok->isSimpleTypeSpecifier())) { 00851 nextToken(); 00852 return false; 00853 } 00854 assert(FormatTok->is(tok::l_square)); 00855 FormatToken &LSquare = *FormatTok; 00856 if (!tryToParseLambdaIntroducer()) 00857 return false; 00858 00859 while (FormatTok->isNot(tok::l_brace)) { 00860 if (FormatTok->isSimpleTypeSpecifier()) { 00861 nextToken(); 00862 continue; 00863 } 00864 switch (FormatTok->Tok.getKind()) { 00865 case tok::l_brace: 00866 break; 00867 case tok::l_paren: 00868 parseParens(); 00869 break; 00870 case tok::less: 00871 case tok::greater: 00872 case tok::identifier: 00873 case tok::coloncolon: 00874 case tok::kw_mutable: 00875 nextToken(); 00876 break; 00877 case tok::arrow: 00878 FormatTok->Type = TT_TrailingReturnArrow; 00879 nextToken(); 00880 break; 00881 default: 00882 return true; 00883 } 00884 } 00885 LSquare.Type = TT_LambdaLSquare; 00886 parseChildBlock(); 00887 return true; 00888 } 00889 00890 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 00891 nextToken(); 00892 if (FormatTok->is(tok::equal)) { 00893 nextToken(); 00894 if (FormatTok->is(tok::r_square)) { 00895 nextToken(); 00896 return true; 00897 } 00898 if (FormatTok->isNot(tok::comma)) 00899 return false; 00900 nextToken(); 00901 } else if (FormatTok->is(tok::amp)) { 00902 nextToken(); 00903 if (FormatTok->is(tok::r_square)) { 00904 nextToken(); 00905 return true; 00906 } 00907 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 00908 return false; 00909 } 00910 if (FormatTok->is(tok::comma)) 00911 nextToken(); 00912 } else if (FormatTok->is(tok::r_square)) { 00913 nextToken(); 00914 return true; 00915 } 00916 do { 00917 if (FormatTok->is(tok::amp)) 00918 nextToken(); 00919 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 00920 return false; 00921 nextToken(); 00922 if (FormatTok->is(tok::ellipsis)) 00923 nextToken(); 00924 if (FormatTok->is(tok::comma)) { 00925 nextToken(); 00926 } else if (FormatTok->is(tok::r_square)) { 00927 nextToken(); 00928 return true; 00929 } else { 00930 return false; 00931 } 00932 } while (!eof()); 00933 return false; 00934 } 00935 00936 void UnwrappedLineParser::tryToParseJSFunction() { 00937 nextToken(); 00938 00939 // Consume function name. 00940 if (FormatTok->is(tok::identifier)) 00941 nextToken(); 00942 00943 if (FormatTok->isNot(tok::l_paren)) 00944 return; 00945 nextToken(); 00946 while (FormatTok->isNot(tok::l_brace)) { 00947 // Err on the side of caution in order to avoid consuming the full file in 00948 // case of incomplete code. 00949 if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren, 00950 tok::comment)) 00951 return; 00952 nextToken(); 00953 } 00954 parseChildBlock(); 00955 } 00956 00957 bool UnwrappedLineParser::tryToParseBracedList() { 00958 if (FormatTok->BlockKind == BK_Unknown) 00959 calculateBraceTypes(); 00960 assert(FormatTok->BlockKind != BK_Unknown); 00961 if (FormatTok->BlockKind == BK_Block) 00962 return false; 00963 parseBracedList(); 00964 return true; 00965 } 00966 00967 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { 00968 bool HasError = false; 00969 nextToken(); 00970 00971 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 00972 // replace this by using parseAssigmentExpression() inside. 00973 do { 00974 if (Style.Language == FormatStyle::LK_JavaScript && 00975 FormatTok->is(Keywords.kw_function)) { 00976 tryToParseJSFunction(); 00977 continue; 00978 } 00979 switch (FormatTok->Tok.getKind()) { 00980 case tok::caret: 00981 nextToken(); 00982 if (FormatTok->is(tok::l_brace)) { 00983 parseChildBlock(); 00984 } 00985 break; 00986 case tok::l_square: 00987 tryToParseLambda(); 00988 break; 00989 case tok::l_brace: 00990 // Assume there are no blocks inside a braced init list apart 00991 // from the ones we explicitly parse out (like lambdas). 00992 FormatTok->BlockKind = BK_BracedInit; 00993 parseBracedList(); 00994 break; 00995 case tok::r_brace: 00996 nextToken(); 00997 return !HasError; 00998 case tok::semi: 00999 HasError = true; 01000 if (!ContinueOnSemicolons) 01001 return !HasError; 01002 nextToken(); 01003 break; 01004 case tok::comma: 01005 nextToken(); 01006 break; 01007 default: 01008 nextToken(); 01009 break; 01010 } 01011 } while (!eof()); 01012 return false; 01013 } 01014 01015 void UnwrappedLineParser::parseParens() { 01016 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 01017 nextToken(); 01018 do { 01019 switch (FormatTok->Tok.getKind()) { 01020 case tok::l_paren: 01021 parseParens(); 01022 break; 01023 case tok::r_paren: 01024 nextToken(); 01025 return; 01026 case tok::r_brace: 01027 // A "}" inside parenthesis is an error if there wasn't a matching "{". 01028 return; 01029 case tok::l_square: 01030 tryToParseLambda(); 01031 break; 01032 case tok::l_brace: { 01033 if (!tryToParseBracedList()) { 01034 parseChildBlock(); 01035 } 01036 break; 01037 } 01038 case tok::at: 01039 nextToken(); 01040 if (FormatTok->Tok.is(tok::l_brace)) 01041 parseBracedList(); 01042 break; 01043 case tok::identifier: 01044 if (Style.Language == FormatStyle::LK_JavaScript && 01045 FormatTok->is(Keywords.kw_function)) 01046 tryToParseJSFunction(); 01047 else 01048 nextToken(); 01049 break; 01050 default: 01051 nextToken(); 01052 break; 01053 } 01054 } while (!eof()); 01055 } 01056 01057 void UnwrappedLineParser::parseSquare() { 01058 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 01059 if (tryToParseLambda()) 01060 return; 01061 do { 01062 switch (FormatTok->Tok.getKind()) { 01063 case tok::l_paren: 01064 parseParens(); 01065 break; 01066 case tok::r_square: 01067 nextToken(); 01068 return; 01069 case tok::r_brace: 01070 // A "}" inside parenthesis is an error if there wasn't a matching "{". 01071 return; 01072 case tok::l_square: 01073 parseSquare(); 01074 break; 01075 case tok::l_brace: { 01076 if (!tryToParseBracedList()) { 01077 parseChildBlock(); 01078 } 01079 break; 01080 } 01081 case tok::at: 01082 nextToken(); 01083 if (FormatTok->Tok.is(tok::l_brace)) 01084 parseBracedList(); 01085 break; 01086 default: 01087 nextToken(); 01088 break; 01089 } 01090 } while (!eof()); 01091 } 01092 01093 void UnwrappedLineParser::parseIfThenElse() { 01094 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 01095 nextToken(); 01096 if (FormatTok->Tok.is(tok::l_paren)) 01097 parseParens(); 01098 bool NeedsUnwrappedLine = false; 01099 if (FormatTok->Tok.is(tok::l_brace)) { 01100 CompoundStatementIndenter Indenter(this, Style, Line->Level); 01101 parseBlock(/*MustBeDeclaration=*/false); 01102 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 01103 Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 01104 addUnwrappedLine(); 01105 } else { 01106 NeedsUnwrappedLine = true; 01107 } 01108 } else { 01109 addUnwrappedLine(); 01110 ++Line->Level; 01111 parseStructuralElement(); 01112 --Line->Level; 01113 } 01114 if (FormatTok->Tok.is(tok::kw_else)) { 01115 if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) 01116 addUnwrappedLine(); 01117 nextToken(); 01118 if (FormatTok->Tok.is(tok::l_brace)) { 01119 CompoundStatementIndenter Indenter(this, Style, Line->Level); 01120 parseBlock(/*MustBeDeclaration=*/false); 01121 addUnwrappedLine(); 01122 } else if (FormatTok->Tok.is(tok::kw_if)) { 01123 parseIfThenElse(); 01124 } else { 01125 addUnwrappedLine(); 01126 ++Line->Level; 01127 parseStructuralElement(); 01128 --Line->Level; 01129 } 01130 } else if (NeedsUnwrappedLine) { 01131 addUnwrappedLine(); 01132 } 01133 } 01134 01135 void UnwrappedLineParser::parseTryCatch() { 01136 assert(FormatTok->is(tok::kw_try) && "'try' expected"); 01137 nextToken(); 01138 bool NeedsUnwrappedLine = false; 01139 if (FormatTok->is(tok::colon)) { 01140 // We are in a function try block, what comes is an initializer list. 01141 nextToken(); 01142 while (FormatTok->is(tok::identifier)) { 01143 nextToken(); 01144 if (FormatTok->is(tok::l_paren)) 01145 parseParens(); 01146 else 01147 StructuralError = true; 01148 if (FormatTok->is(tok::comma)) 01149 nextToken(); 01150 } 01151 } 01152 if (FormatTok->is(tok::l_brace)) { 01153 CompoundStatementIndenter Indenter(this, Style, Line->Level); 01154 parseBlock(/*MustBeDeclaration=*/false); 01155 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 01156 Style.BreakBeforeBraces == FormatStyle::BS_GNU || 01157 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { 01158 addUnwrappedLine(); 01159 } else { 01160 NeedsUnwrappedLine = true; 01161 } 01162 } else if (!FormatTok->is(tok::kw_catch)) { 01163 // The C++ standard requires a compound-statement after a try. 01164 // If there's none, we try to assume there's a structuralElement 01165 // and try to continue. 01166 StructuralError = true; 01167 addUnwrappedLine(); 01168 ++Line->Level; 01169 parseStructuralElement(); 01170 --Line->Level; 01171 } 01172 while (FormatTok->is(tok::kw_catch) || 01173 ((Style.Language == FormatStyle::LK_Java || 01174 Style.Language == FormatStyle::LK_JavaScript) && 01175 FormatTok->is(Keywords.kw_finally))) { 01176 nextToken(); 01177 while (FormatTok->isNot(tok::l_brace)) { 01178 if (FormatTok->is(tok::l_paren)) { 01179 parseParens(); 01180 continue; 01181 } 01182 if (FormatTok->isOneOf(tok::semi, tok::r_brace)) 01183 return; 01184 nextToken(); 01185 } 01186 NeedsUnwrappedLine = false; 01187 CompoundStatementIndenter Indenter(this, Style, Line->Level); 01188 parseBlock(/*MustBeDeclaration=*/false); 01189 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 01190 Style.BreakBeforeBraces == FormatStyle::BS_GNU || 01191 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { 01192 addUnwrappedLine(); 01193 } else { 01194 NeedsUnwrappedLine = true; 01195 } 01196 } 01197 if (NeedsUnwrappedLine) { 01198 addUnwrappedLine(); 01199 } 01200 } 01201 01202 void UnwrappedLineParser::parseNamespace() { 01203 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 01204 01205 const FormatToken &InitialToken = *FormatTok; 01206 nextToken(); 01207 if (FormatTok->Tok.is(tok::identifier)) 01208 nextToken(); 01209 if (FormatTok->Tok.is(tok::l_brace)) { 01210 if (ShouldBreakBeforeBrace(Style, InitialToken)) 01211 addUnwrappedLine(); 01212 01213 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 01214 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 01215 DeclarationScopeStack.size() > 1); 01216 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 01217 // Munch the semicolon after a namespace. This is more common than one would 01218 // think. Puttin the semicolon into its own line is very ugly. 01219 if (FormatTok->Tok.is(tok::semi)) 01220 nextToken(); 01221 addUnwrappedLine(); 01222 } 01223 // FIXME: Add error handling. 01224 } 01225 01226 void UnwrappedLineParser::parseForOrWhileLoop() { 01227 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) || 01228 FormatTok->IsForEachMacro) && 01229 "'for', 'while' or foreach macro expected"); 01230 nextToken(); 01231 if (FormatTok->Tok.is(tok::l_paren)) 01232 parseParens(); 01233 if (FormatTok->Tok.is(tok::l_brace)) { 01234 CompoundStatementIndenter Indenter(this, Style, Line->Level); 01235 parseBlock(/*MustBeDeclaration=*/false); 01236 addUnwrappedLine(); 01237 } else { 01238 addUnwrappedLine(); 01239 ++Line->Level; 01240 parseStructuralElement(); 01241 --Line->Level; 01242 } 01243 } 01244 01245 void UnwrappedLineParser::parseDoWhile() { 01246 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 01247 nextToken(); 01248 if (FormatTok->Tok.is(tok::l_brace)) { 01249 CompoundStatementIndenter Indenter(this, Style, Line->Level); 01250 parseBlock(/*MustBeDeclaration=*/false); 01251 if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) 01252 addUnwrappedLine(); 01253 } else { 01254 addUnwrappedLine(); 01255 ++Line->Level; 01256 parseStructuralElement(); 01257 --Line->Level; 01258 } 01259 01260 // FIXME: Add error handling. 01261 if (!FormatTok->Tok.is(tok::kw_while)) { 01262 addUnwrappedLine(); 01263 return; 01264 } 01265 01266 nextToken(); 01267 parseStructuralElement(); 01268 } 01269 01270 void UnwrappedLineParser::parseLabel() { 01271 nextToken(); 01272 unsigned OldLineLevel = Line->Level; 01273 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 01274 --Line->Level; 01275 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 01276 CompoundStatementIndenter Indenter(this, Style, Line->Level); 01277 parseBlock(/*MustBeDeclaration=*/false); 01278 if (FormatTok->Tok.is(tok::kw_break)) { 01279 // "break;" after "}" on its own line only for BS_Allman and BS_GNU 01280 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 01281 Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 01282 addUnwrappedLine(); 01283 } 01284 parseStructuralElement(); 01285 } 01286 addUnwrappedLine(); 01287 } else { 01288 addUnwrappedLine(); 01289 } 01290 Line->Level = OldLineLevel; 01291 } 01292 01293 void UnwrappedLineParser::parseCaseLabel() { 01294 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 01295 // FIXME: fix handling of complex expressions here. 01296 do { 01297 nextToken(); 01298 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 01299 parseLabel(); 01300 } 01301 01302 void UnwrappedLineParser::parseSwitch() { 01303 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 01304 nextToken(); 01305 if (FormatTok->Tok.is(tok::l_paren)) 01306 parseParens(); 01307 if (FormatTok->Tok.is(tok::l_brace)) { 01308 CompoundStatementIndenter Indenter(this, Style, Line->Level); 01309 parseBlock(/*MustBeDeclaration=*/false); 01310 addUnwrappedLine(); 01311 } else { 01312 addUnwrappedLine(); 01313 ++Line->Level; 01314 parseStructuralElement(); 01315 --Line->Level; 01316 } 01317 } 01318 01319 void UnwrappedLineParser::parseAccessSpecifier() { 01320 nextToken(); 01321 // Understand Qt's slots. 01322 if (FormatTok->is(tok::identifier) && 01323 (FormatTok->TokenText == "slots" || FormatTok->TokenText == "Q_SLOTS")) 01324 nextToken(); 01325 // Otherwise, we don't know what it is, and we'd better keep the next token. 01326 if (FormatTok->Tok.is(tok::colon)) 01327 nextToken(); 01328 addUnwrappedLine(); 01329 } 01330 01331 void UnwrappedLineParser::parseEnum() { 01332 // Won't be 'enum' for NS_ENUMs. 01333 if (FormatTok->Tok.is(tok::kw_enum)) 01334 nextToken(); 01335 01336 // Eat up enum class ... 01337 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 01338 nextToken(); 01339 while (FormatTok->Tok.getIdentifierInfo() || 01340 FormatTok->isOneOf(tok::colon, tok::coloncolon)) { 01341 nextToken(); 01342 // We can have macros or attributes in between 'enum' and the enum name. 01343 if (FormatTok->Tok.is(tok::l_paren)) 01344 parseParens(); 01345 if (FormatTok->Tok.is(tok::identifier)) 01346 nextToken(); 01347 } 01348 01349 // Just a declaration or something is wrong. 01350 if (!FormatTok->is(tok::l_brace)) 01351 return; 01352 FormatTok->BlockKind = BK_Block; 01353 01354 if (Style.Language == FormatStyle::LK_Java) { 01355 // Java enums are different. 01356 parseJavaEnumBody(); 01357 return; 01358 } 01359 01360 // Parse enum body. 01361 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 01362 if (HasError) { 01363 if (FormatTok->is(tok::semi)) 01364 nextToken(); 01365 addUnwrappedLine(); 01366 } 01367 01368 // We fall through to parsing a structural element afterwards, so that in 01369 // enum A {} n, m; 01370 // "} n, m;" will end up in one unwrapped line. 01371 } 01372 01373 void UnwrappedLineParser::parseJavaEnumBody() { 01374 // Determine whether the enum is simple, i.e. does not have a semicolon or 01375 // constants with class bodies. Simple enums can be formatted like braced 01376 // lists, contracted to a single line, etc. 01377 unsigned StoredPosition = Tokens->getPosition(); 01378 bool IsSimple = true; 01379 FormatToken *Tok = Tokens->getNextToken(); 01380 while (Tok) { 01381 if (Tok->is(tok::r_brace)) 01382 break; 01383 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 01384 IsSimple = false; 01385 break; 01386 } 01387 // FIXME: This will also mark enums with braces in the arguments to enum 01388 // constants as "not simple". This is probably fine in practice, though. 01389 Tok = Tokens->getNextToken(); 01390 } 01391 FormatTok = Tokens->setPosition(StoredPosition); 01392 01393 if (IsSimple) { 01394 parseBracedList(); 01395 addUnwrappedLine(); 01396 return; 01397 } 01398 01399 // Parse the body of a more complex enum. 01400 // First add a line for everything up to the "{". 01401 nextToken(); 01402 addUnwrappedLine(); 01403 ++Line->Level; 01404 01405 // Parse the enum constants. 01406 while (FormatTok) { 01407 if (FormatTok->is(tok::l_brace)) { 01408 // Parse the constant's class body. 01409 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 01410 /*MunchSemi=*/false); 01411 } else if (FormatTok->is(tok::l_paren)) { 01412 parseParens(); 01413 } else if (FormatTok->is(tok::comma)) { 01414 nextToken(); 01415 addUnwrappedLine(); 01416 } else if (FormatTok->is(tok::semi)) { 01417 nextToken(); 01418 addUnwrappedLine(); 01419 break; 01420 } else if (FormatTok->is(tok::r_brace)) { 01421 addUnwrappedLine(); 01422 break; 01423 } else { 01424 nextToken(); 01425 } 01426 } 01427 01428 // Parse the class body after the enum's ";" if any. 01429 parseLevel(/*HasOpeningBrace=*/true); 01430 nextToken(); 01431 --Line->Level; 01432 addUnwrappedLine(); 01433 } 01434 01435 void UnwrappedLineParser::parseRecord() { 01436 const FormatToken &InitialToken = *FormatTok; 01437 nextToken(); 01438 if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute, 01439 tok::kw___declspec, tok::kw_alignas)) { 01440 nextToken(); 01441 // We can have macros or attributes in between 'class' and the class name. 01442 if (FormatTok->Tok.is(tok::l_paren)) { 01443 parseParens(); 01444 } 01445 // The actual identifier can be a nested name specifier, and in macros 01446 // it is often token-pasted. 01447 while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) || 01448 FormatTok->is(tok::hashhash) || 01449 (Style.Language == FormatStyle::LK_Java && 01450 FormatTok->isOneOf(tok::period, tok::comma))) 01451 nextToken(); 01452 01453 // Note that parsing away template declarations here leads to incorrectly 01454 // accepting function declarations as record declarations. 01455 // In general, we cannot solve this problem. Consider: 01456 // class A<int> B() {} 01457 // which can be a function definition or a class definition when B() is a 01458 // macro. If we find enough real-world cases where this is a problem, we 01459 // can parse for the 'template' keyword in the beginning of the statement, 01460 // and thus rule out the record production in case there is no template 01461 // (this would still leave us with an ambiguity between template function 01462 // and class declarations). 01463 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { 01464 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { 01465 if (FormatTok->Tok.is(tok::semi)) 01466 return; 01467 nextToken(); 01468 } 01469 } 01470 } 01471 if (FormatTok->Tok.is(tok::l_brace)) { 01472 if (ShouldBreakBeforeBrace(Style, InitialToken)) 01473 addUnwrappedLine(); 01474 01475 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 01476 /*MunchSemi=*/false); 01477 } 01478 // We fall through to parsing a structural element afterwards, so 01479 // class A {} n, m; 01480 // will end up in one unwrapped line. 01481 // This does not apply for Java. 01482 if (Style.Language == FormatStyle::LK_Java) 01483 addUnwrappedLine(); 01484 } 01485 01486 void UnwrappedLineParser::parseObjCProtocolList() { 01487 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 01488 do 01489 nextToken(); 01490 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 01491 nextToken(); // Skip '>'. 01492 } 01493 01494 void UnwrappedLineParser::parseObjCUntilAtEnd() { 01495 do { 01496 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 01497 nextToken(); 01498 addUnwrappedLine(); 01499 break; 01500 } 01501 if (FormatTok->is(tok::l_brace)) { 01502 parseBlock(/*MustBeDeclaration=*/false); 01503 // In ObjC interfaces, nothing should be following the "}". 01504 addUnwrappedLine(); 01505 } else if (FormatTok->is(tok::r_brace)) { 01506 // Ignore stray "}". parseStructuralElement doesn't consume them. 01507 nextToken(); 01508 addUnwrappedLine(); 01509 } else { 01510 parseStructuralElement(); 01511 } 01512 } while (!eof()); 01513 } 01514 01515 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 01516 nextToken(); 01517 nextToken(); // interface name 01518 01519 // @interface can be followed by either a base class, or a category. 01520 if (FormatTok->Tok.is(tok::colon)) { 01521 nextToken(); 01522 nextToken(); // base class name 01523 } else if (FormatTok->Tok.is(tok::l_paren)) 01524 // Skip category, if present. 01525 parseParens(); 01526 01527 if (FormatTok->Tok.is(tok::less)) 01528 parseObjCProtocolList(); 01529 01530 if (FormatTok->Tok.is(tok::l_brace)) { 01531 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 01532 Style.BreakBeforeBraces == FormatStyle::BS_GNU) 01533 addUnwrappedLine(); 01534 parseBlock(/*MustBeDeclaration=*/true); 01535 } 01536 01537 // With instance variables, this puts '}' on its own line. Without instance 01538 // variables, this ends the @interface line. 01539 addUnwrappedLine(); 01540 01541 parseObjCUntilAtEnd(); 01542 } 01543 01544 void UnwrappedLineParser::parseObjCProtocol() { 01545 nextToken(); 01546 nextToken(); // protocol name 01547 01548 if (FormatTok->Tok.is(tok::less)) 01549 parseObjCProtocolList(); 01550 01551 // Check for protocol declaration. 01552 if (FormatTok->Tok.is(tok::semi)) { 01553 nextToken(); 01554 return addUnwrappedLine(); 01555 } 01556 01557 addUnwrappedLine(); 01558 parseObjCUntilAtEnd(); 01559 } 01560 01561 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 01562 StringRef Prefix = "") { 01563 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 01564 << (Line.InPPDirective ? " MACRO" : "") << ": "; 01565 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 01566 E = Line.Tokens.end(); 01567 I != E; ++I) { 01568 llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; 01569 } 01570 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 01571 E = Line.Tokens.end(); 01572 I != E; ++I) { 01573 const UnwrappedLineNode &Node = *I; 01574 for (SmallVectorImpl<UnwrappedLine>::const_iterator 01575 I = Node.Children.begin(), 01576 E = Node.Children.end(); 01577 I != E; ++I) { 01578 printDebugInfo(*I, "\nChild: "); 01579 } 01580 } 01581 llvm::dbgs() << "\n"; 01582 } 01583 01584 void UnwrappedLineParser::addUnwrappedLine() { 01585 if (Line->Tokens.empty()) 01586 return; 01587 DEBUG({ 01588 if (CurrentLines == &Lines) 01589 printDebugInfo(*Line); 01590 }); 01591 CurrentLines->push_back(*Line); 01592 Line->Tokens.clear(); 01593 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 01594 for (SmallVectorImpl<UnwrappedLine>::iterator 01595 I = PreprocessorDirectives.begin(), 01596 E = PreprocessorDirectives.end(); 01597 I != E; ++I) { 01598 CurrentLines->push_back(*I); 01599 } 01600 PreprocessorDirectives.clear(); 01601 } 01602 } 01603 01604 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 01605 01606 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 01607 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 01608 FormatTok.NewlinesBefore > 0; 01609 } 01610 01611 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 01612 bool JustComments = Line->Tokens.empty(); 01613 for (SmallVectorImpl<FormatToken *>::const_iterator 01614 I = CommentsBeforeNextToken.begin(), 01615 E = CommentsBeforeNextToken.end(); 01616 I != E; ++I) { 01617 if (isOnNewLine(**I) && JustComments) { 01618 addUnwrappedLine(); 01619 } 01620 pushToken(*I); 01621 } 01622 if (NewlineBeforeNext && JustComments) { 01623 addUnwrappedLine(); 01624 } 01625 CommentsBeforeNextToken.clear(); 01626 } 01627 01628 void UnwrappedLineParser::nextToken() { 01629 if (eof()) 01630 return; 01631 flushComments(isOnNewLine(*FormatTok)); 01632 pushToken(FormatTok); 01633 readToken(); 01634 } 01635 01636 void UnwrappedLineParser::readToken() { 01637 bool CommentsInCurrentLine = true; 01638 do { 01639 FormatTok = Tokens->getNextToken(); 01640 assert(FormatTok); 01641 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 01642 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 01643 // If there is an unfinished unwrapped line, we flush the preprocessor 01644 // directives only after that unwrapped line was finished later. 01645 bool SwitchToPreprocessorLines = 01646 !Line->Tokens.empty() && CurrentLines == &Lines; 01647 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 01648 // Comments stored before the preprocessor directive need to be output 01649 // before the preprocessor directive, at the same level as the 01650 // preprocessor directive, as we consider them to apply to the directive. 01651 flushComments(isOnNewLine(*FormatTok)); 01652 parsePPDirective(); 01653 } 01654 while (FormatTok->Type == TT_ConflictStart || 01655 FormatTok->Type == TT_ConflictEnd || 01656 FormatTok->Type == TT_ConflictAlternative) { 01657 if (FormatTok->Type == TT_ConflictStart) { 01658 conditionalCompilationStart(/*Unreachable=*/false); 01659 } else if (FormatTok->Type == TT_ConflictAlternative) { 01660 conditionalCompilationAlternative(); 01661 } else if (FormatTok->Type == TT_ConflictEnd) { 01662 conditionalCompilationEnd(); 01663 } 01664 FormatTok = Tokens->getNextToken(); 01665 FormatTok->MustBreakBefore = true; 01666 } 01667 01668 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 01669 !Line->InPPDirective) { 01670 continue; 01671 } 01672 01673 if (!FormatTok->Tok.is(tok::comment)) 01674 return; 01675 if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { 01676 CommentsInCurrentLine = false; 01677 } 01678 if (CommentsInCurrentLine) { 01679 pushToken(FormatTok); 01680 } else { 01681 CommentsBeforeNextToken.push_back(FormatTok); 01682 } 01683 } while (!eof()); 01684 } 01685 01686 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 01687 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 01688 if (MustBreakBeforeNextToken) { 01689 Line->Tokens.back().Tok->MustBreakBefore = true; 01690 MustBreakBeforeNextToken = false; 01691 } 01692 } 01693 01694 } // end namespace format 01695 } // end namespace clang