clang: UnwrappedLineParser.cpp Source File

Go to the documentation of this file.
00001 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 ///
00010 /// \file
00011 /// \brief This file contains the implementation of the UnwrappedLineParser,
00012 /// which turns a stream of tokens into UnwrappedLines.
00013 ///
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "UnwrappedLineParser.h"
00017 #include "llvm/Support/Debug.h"
00018 
00019 #define DEBUG_TYPE "format-parser"
00020 
00021 namespace clang {
00022 namespace format {
00023 
00024 class FormatTokenSource {
00025 public:
00026   virtual ~FormatTokenSource() {}
00027   virtual FormatToken *getNextToken() = 0;
00028 
00029   virtual unsigned getPosition() = 0;
00030   virtual FormatToken *setPosition(unsigned Position) = 0;
00031 };
00032 
00033 namespace {
00034 
00035 class ScopedDeclarationState {
00036 public:
00037   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
00038                          bool MustBeDeclaration)
00039       : Line(Line), Stack(Stack) {
00040     Line.MustBeDeclaration = MustBeDeclaration;
00041     Stack.push_back(MustBeDeclaration);
00042   }
00043   ~ScopedDeclarationState() {
00044     Stack.pop_back();
00045     if (!Stack.empty())
00046       Line.MustBeDeclaration = Stack.back();
00047     else
00048       Line.MustBeDeclaration = true;
00049   }
00050 
00051 private:
00052   UnwrappedLine &Line;
00053   std::vector<bool> &Stack;
00054 };
00055 
00056 class ScopedMacroState : public FormatTokenSource {
00057 public:
00058   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
00059                    FormatToken *&ResetToken, bool &StructuralError)
00060       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
00061         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
00062         StructuralError(StructuralError),
00063         PreviousStructuralError(StructuralError), Token(nullptr) {
00064     TokenSource = this;
00065     Line.Level = 0;
00066     Line.InPPDirective = true;
00067   }
00068 
00069   ~ScopedMacroState() {
00070     TokenSource = PreviousTokenSource;
00071     ResetToken = Token;
00072     Line.InPPDirective = false;
00073     Line.Level = PreviousLineLevel;
00074     StructuralError = PreviousStructuralError;
00075   }
00076 
00077   FormatToken *getNextToken() override {
00078     // The \c UnwrappedLineParser guards against this by never calling
00079     // \c getNextToken() after it has encountered the first eof token.
00080     assert(!eof());
00081     Token = PreviousTokenSource->getNextToken();
00082     if (eof())
00083       return getFakeEOF();
00084     return Token;
00085   }
00086 
00087   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
00088 
00089   FormatToken *setPosition(unsigned Position) override {
00090     Token = PreviousTokenSource->setPosition(Position);
00091     return Token;
00092   }
00093 
00094 private:
00095   bool eof() { return Token && Token->HasUnescapedNewline; }
00096 
00097   FormatToken *getFakeEOF() {
00098     static bool EOFInitialized = false;
00099     static FormatToken FormatTok;
00100     if (!EOFInitialized) {
00101       FormatTok.Tok.startToken();
00102       FormatTok.Tok.setKind(tok::eof);
00103       EOFInitialized = true;
00104     }
00105     return &FormatTok;
00106   }
00107 
00108   UnwrappedLine &Line;
00109   FormatTokenSource *&TokenSource;
00110   FormatToken *&ResetToken;
00111   unsigned PreviousLineLevel;
00112   FormatTokenSource *PreviousTokenSource;
00113   bool &StructuralError;
00114   bool PreviousStructuralError;
00115 
00116   FormatToken *Token;
00117 };
00118 
00119 } // end anonymous namespace
00120 
00121 class ScopedLineState {
00122 public:
00123   ScopedLineState(UnwrappedLineParser &Parser,
00124                   bool SwitchToPreprocessorLines = false)
00125       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
00126     if (SwitchToPreprocessorLines)
00127       Parser.CurrentLines = &Parser.PreprocessorDirectives;
00128     else if (!Parser.Line->Tokens.empty())
00129       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
00130     PreBlockLine = std::move(Parser.Line);
00131     Parser.Line = llvm::make_unique<UnwrappedLine>();
00132     Parser.Line->Level = PreBlockLine->Level;
00133     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
00134   }
00135 
00136   ~ScopedLineState() {
00137     if (!Parser.Line->Tokens.empty()) {
00138       Parser.addUnwrappedLine();
00139     }
00140     assert(Parser.Line->Tokens.empty());
00141     Parser.Line = std::move(PreBlockLine);
00142     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
00143       Parser.MustBreakBeforeNextToken = true;
00144     Parser.CurrentLines = OriginalLines;
00145   }
00146 
00147 private:
00148   UnwrappedLineParser &Parser;
00149 
00150   std::unique_ptr<UnwrappedLine> PreBlockLine;
00151   SmallVectorImpl<UnwrappedLine> *OriginalLines;
00152 };
00153 
00154 class CompoundStatementIndenter {
00155 public:
00156   CompoundStatementIndenter(UnwrappedLineParser *Parser,
00157                             const FormatStyle &Style, unsigned &LineLevel)
00158       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
00159     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
00160       Parser->addUnwrappedLine();
00161     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
00162       Parser->addUnwrappedLine();
00163       ++LineLevel;
00164     }
00165   }
00166   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
00167 
00168 private:
00169   unsigned &LineLevel;
00170   unsigned OldLineLevel;
00171 };
00172 
00173 namespace {
00174 
00175 class IndexedTokenSource : public FormatTokenSource {
00176 public:
00177   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
00178       : Tokens(Tokens), Position(-1) {}
00179 
00180   FormatToken *getNextToken() override {
00181     ++Position;
00182     return Tokens[Position];
00183   }
00184 
00185   unsigned getPosition() override {
00186     assert(Position >= 0);
00187     return Position;
00188   }
00189 
00190   FormatToken *setPosition(unsigned P) override {
00191     Position = P;
00192     return Tokens[Position];
00193   }
00194 
00195   void reset() { Position = -1; }
00196 
00197 private:
00198   ArrayRef<FormatToken *> Tokens;
00199   int Position;
00200 };
00201 
00202 } // end anonymous namespace
00203 
00204 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
00205                                          const AdditionalKeywords &Keywords,
00206                                          ArrayRef<FormatToken *> Tokens,
00207                                          UnwrappedLineConsumer &Callback)
00208     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
00209       CurrentLines(&Lines), StructuralError(false), Style(Style),
00210       Keywords(Keywords), Tokens(nullptr), Callback(Callback),
00211       AllTokens(Tokens), PPBranchLevel(-1) {}
00212 
00213 void UnwrappedLineParser::reset() {
00214   PPBranchLevel = -1;
00215   Line.reset(new UnwrappedLine);
00216   CommentsBeforeNextToken.clear();
00217   FormatTok = nullptr;
00218   MustBreakBeforeNextToken = false;
00219   PreprocessorDirectives.clear();
00220   CurrentLines = &Lines;
00221   DeclarationScopeStack.clear();
00222   StructuralError = false;
00223   PPStack.clear();
00224 }
00225 
00226 bool UnwrappedLineParser::parse() {
00227   IndexedTokenSource TokenSource(AllTokens);
00228   do {
00229     DEBUG(llvm::dbgs() << "----\n");
00230     reset();
00231     Tokens = &TokenSource;
00232     TokenSource.reset();
00233 
00234     readToken();
00235     parseFile();
00236     // Create line with eof token.
00237     pushToken(FormatTok);
00238     addUnwrappedLine();
00239 
00240     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
00241                                                   E = Lines.end();
00242          I != E; ++I) {
00243       Callback.consumeUnwrappedLine(*I);
00244     }
00245     Callback.finishRun();
00246     Lines.clear();
00247     while (!PPLevelBranchIndex.empty() &&
00248            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
00249       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
00250       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
00251     }
00252     if (!PPLevelBranchIndex.empty()) {
00253       ++PPLevelBranchIndex.back();
00254       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
00255       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
00256     }
00257   } while (!PPLevelBranchIndex.empty());
00258 
00259   return StructuralError;
00260 }
00261 
00262 void UnwrappedLineParser::parseFile() {
00263   ScopedDeclarationState DeclarationState(
00264       *Line, DeclarationScopeStack,
00265       /*MustBeDeclaration=*/ !Line->InPPDirective);
00266   parseLevel(/*HasOpeningBrace=*/false);
00267   // Make sure to format the remaining tokens.
00268   flushComments(true);
00269   addUnwrappedLine();
00270 }
00271 
00272 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
00273   bool SwitchLabelEncountered = false;
00274   do {
00275     switch (FormatTok->Tok.getKind()) {
00276     case tok::comment:
00277       nextToken();
00278       addUnwrappedLine();
00279       break;
00280     case tok::l_brace:
00281       // FIXME: Add parameter whether this can happen - if this happens, we must
00282       // be in a non-declaration context.
00283       parseBlock(/*MustBeDeclaration=*/false);
00284       addUnwrappedLine();
00285       break;
00286     case tok::r_brace:
00287       if (HasOpeningBrace)
00288         return;
00289       StructuralError = true;
00290       nextToken();
00291       addUnwrappedLine();
00292       break;
00293     case tok::kw_default:
00294     case tok::kw_case:
00295       if (!SwitchLabelEncountered &&
00296           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
00297         ++Line->Level;
00298       SwitchLabelEncountered = true;
00299       parseStructuralElement();
00300       break;
00301     default:
00302       parseStructuralElement();
00303       break;
00304     }
00305   } while (!eof());
00306 }
00307 
00308 void UnwrappedLineParser::calculateBraceTypes() {
00309   // We'll parse forward through the tokens until we hit
00310   // a closing brace or eof - note that getNextToken() will
00311   // parse macros, so this will magically work inside macro
00312   // definitions, too.
00313   unsigned StoredPosition = Tokens->getPosition();
00314   FormatToken *Tok = FormatTok;
00315   // Keep a stack of positions of lbrace tokens. We will
00316   // update information about whether an lbrace starts a
00317   // braced init list or a different block during the loop.
00318   SmallVector<FormatToken *, 8> LBraceStack;
00319   assert(Tok->Tok.is(tok::l_brace));
00320   do {
00321     // Get next none-comment token.
00322     FormatToken *NextTok;
00323     unsigned ReadTokens = 0;
00324     do {
00325       NextTok = Tokens->getNextToken();
00326       ++ReadTokens;
00327     } while (NextTok->is(tok::comment));
00328 
00329     switch (Tok->Tok.getKind()) {
00330     case tok::l_brace:
00331       LBraceStack.push_back(Tok);
00332       break;
00333     case tok::r_brace:
00334       if (!LBraceStack.empty()) {
00335         if (LBraceStack.back()->BlockKind == BK_Unknown) {
00336           bool ProbablyBracedList = false;
00337           if (Style.Language == FormatStyle::LK_Proto) {
00338             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
00339           } else {
00340             // Using OriginalColumn to distinguish between ObjC methods and
00341             // binary operators is a bit hacky.
00342             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
00343                                     NextTok->OriginalColumn == 0;
00344 
00345             // If there is a comma, semicolon or right paren after the closing
00346             // brace, we assume this is a braced initializer list.  Note that
00347             // regardless how we mark inner braces here, we will overwrite the
00348             // BlockKind later if we parse a braced list (where all blocks
00349             // inside are by default braced lists), or when we explicitly detect
00350             // blocks (for example while parsing lambdas).
00351             //
00352             // We exclude + and - as they can be ObjC visibility modifiers.
00353             ProbablyBracedList =
00354                 NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon,
00355                                  tok::r_paren, tok::r_square, tok::l_brace,
00356                                  tok::l_paren, tok::ellipsis) ||
00357                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
00358           }
00359           if (ProbablyBracedList) {
00360             Tok->BlockKind = BK_BracedInit;
00361             LBraceStack.back()->BlockKind = BK_BracedInit;
00362           } else {
00363             Tok->BlockKind = BK_Block;
00364             LBraceStack.back()->BlockKind = BK_Block;
00365           }
00366         }
00367         LBraceStack.pop_back();
00368       }
00369       break;
00370     case tok::at:
00371     case tok::semi:
00372     case tok::kw_if:
00373     case tok::kw_while:
00374     case tok::kw_for:
00375     case tok::kw_switch:
00376     case tok::kw_try:
00377       if (!LBraceStack.empty())
00378         LBraceStack.back()->BlockKind = BK_Block;
00379       break;
00380     default:
00381       break;
00382     }
00383     Tok = NextTok;
00384   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
00385   // Assume other blocks for all unclosed opening braces.
00386   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
00387     if (LBraceStack[i]->BlockKind == BK_Unknown)
00388       LBraceStack[i]->BlockKind = BK_Block;
00389   }
00390 
00391   FormatTok = Tokens->setPosition(StoredPosition);
00392 }
00393 
00394 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
00395                                      bool MunchSemi) {
00396   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
00397   unsigned InitialLevel = Line->Level;
00398   nextToken();
00399 
00400   addUnwrappedLine();
00401 
00402   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
00403                                           MustBeDeclaration);
00404   if (AddLevel)
00405     ++Line->Level;
00406   parseLevel(/*HasOpeningBrace=*/true);
00407 
00408   if (!FormatTok->Tok.is(tok::r_brace)) {
00409     Line->Level = InitialLevel;
00410     StructuralError = true;
00411     return;
00412   }
00413 
00414   nextToken(); // Munch the closing brace.
00415   if (MunchSemi && FormatTok->Tok.is(tok::semi))
00416     nextToken();
00417   Line->Level = InitialLevel;
00418 }
00419 
00420 static bool IsGoogScope(const UnwrappedLine &Line) {
00421   if (Line.Tokens.size() < 4)
00422     return false;
00423   auto I = Line.Tokens.begin();
00424   if (I->Tok->TokenText != "goog")
00425     return false;
00426   ++I;
00427   if (I->Tok->isNot(tok::period))
00428     return false;
00429   ++I;
00430   if (I->Tok->TokenText != "scope")
00431     return false;
00432   ++I;
00433   return I->Tok->is(tok::l_paren);
00434 }
00435 
00436 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
00437                                    const FormatToken &InitialToken) {
00438   switch (Style.BreakBeforeBraces) {
00439   case FormatStyle::BS_Linux:
00440     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
00441   case FormatStyle::BS_Allman:
00442   case FormatStyle::BS_GNU:
00443     return true;
00444   default:
00445     return false;
00446   }
00447 }
00448 
00449 void UnwrappedLineParser::parseChildBlock() {
00450   FormatTok->BlockKind = BK_Block;
00451   nextToken();
00452   {
00453     bool GoogScope =
00454         Style.Language == FormatStyle::LK_JavaScript && IsGoogScope(*Line);
00455     ScopedLineState LineState(*this);
00456     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
00457                                             /*MustBeDeclaration=*/false);
00458     Line->Level += GoogScope ? 0 : 1;
00459     parseLevel(/*HasOpeningBrace=*/true);
00460     Line->Level -= GoogScope ? 0 : 1;
00461   }
00462   nextToken();
00463 }
00464 
00465 void UnwrappedLineParser::parsePPDirective() {
00466   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
00467   ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
00468   nextToken();
00469 
00470   if (!FormatTok->Tok.getIdentifierInfo()) {
00471     parsePPUnknown();
00472     return;
00473   }
00474 
00475   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
00476   case tok::pp_define:
00477     parsePPDefine();
00478     return;
00479   case tok::pp_if:
00480     parsePPIf(/*IfDef=*/false);
00481     break;
00482   case tok::pp_ifdef:
00483   case tok::pp_ifndef:
00484     parsePPIf(/*IfDef=*/true);
00485     break;
00486   case tok::pp_else:
00487     parsePPElse();
00488     break;
00489   case tok::pp_elif:
00490     parsePPElIf();
00491     break;
00492   case tok::pp_endif:
00493     parsePPEndIf();
00494     break;
00495   default:
00496     parsePPUnknown();
00497     break;
00498   }
00499 }
00500 
00501 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
00502   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
00503     PPStack.push_back(PP_Unreachable);
00504   else
00505     PPStack.push_back(PP_Conditional);
00506 }
00507 
00508 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
00509   ++PPBranchLevel;
00510   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
00511   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
00512     PPLevelBranchIndex.push_back(0);
00513     PPLevelBranchCount.push_back(0);
00514   }
00515   PPChainBranchIndex.push(0);
00516   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
00517   conditionalCompilationCondition(Unreachable || Skip);
00518 }
00519 
00520 void UnwrappedLineParser::conditionalCompilationAlternative() {
00521   if (!PPStack.empty())
00522     PPStack.pop_back();
00523   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
00524   if (!PPChainBranchIndex.empty())
00525     ++PPChainBranchIndex.top();
00526   conditionalCompilationCondition(
00527       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
00528       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
00529 }
00530 
00531 void UnwrappedLineParser::conditionalCompilationEnd() {
00532   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
00533   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
00534     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
00535       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
00536     }
00537   }
00538   // Guard against #endif's without #if.
00539   if (PPBranchLevel > 0)
00540     --PPBranchLevel;
00541   if (!PPChainBranchIndex.empty())
00542     PPChainBranchIndex.pop();
00543   if (!PPStack.empty())
00544     PPStack.pop_back();
00545 }
00546 
00547 void UnwrappedLineParser::parsePPIf(bool IfDef) {
00548   nextToken();
00549   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
00550                          StringRef(FormatTok->Tok.getLiteralData(),
00551                                    FormatTok->Tok.getLength()) == "0") ||
00552                         FormatTok->Tok.is(tok::kw_false);
00553   conditionalCompilationStart(!IfDef && IsLiteralFalse);
00554   parsePPUnknown();
00555 }
00556 
00557 void UnwrappedLineParser::parsePPElse() {
00558   conditionalCompilationAlternative();
00559   parsePPUnknown();
00560 }
00561 
00562 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
00563 
00564 void UnwrappedLineParser::parsePPEndIf() {
00565   conditionalCompilationEnd();
00566   parsePPUnknown();
00567 }
00568 
00569 void UnwrappedLineParser::parsePPDefine() {
00570   nextToken();
00571 
00572   if (FormatTok->Tok.getKind() != tok::identifier) {
00573     parsePPUnknown();
00574     return;
00575   }
00576   nextToken();
00577   if (FormatTok->Tok.getKind() == tok::l_paren &&
00578       FormatTok->WhitespaceRange.getBegin() ==
00579           FormatTok->WhitespaceRange.getEnd()) {
00580     parseParens();
00581   }
00582   addUnwrappedLine();
00583   Line->Level = 1;
00584 
00585   // Errors during a preprocessor directive can only affect the layout of the
00586   // preprocessor directive, and thus we ignore them. An alternative approach
00587   // would be to use the same approach we use on the file level (no
00588   // re-indentation if there was a structural error) within the macro
00589   // definition.
00590   parseFile();
00591 }
00592 
00593 void UnwrappedLineParser::parsePPUnknown() {
00594   do {
00595     nextToken();
00596   } while (!eof());
00597   addUnwrappedLine();
00598 }
00599 
00600 // Here we blacklist certain tokens that are not usually the first token in an
00601 // unwrapped line. This is used in attempt to distinguish macro calls without
00602 // trailing semicolons from other constructs split to several lines.
00603 bool tokenCanStartNewLine(clang::Token Tok) {
00604   // Semicolon can be a null-statement, l_square can be a start of a macro or
00605   // a C++11 attribute, but this doesn't seem to be common.
00606   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
00607          Tok.isNot(tok::l_square) &&
00608          // Tokens that can only be used as binary operators and a part of
00609          // overloaded operator names.
00610          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
00611          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
00612          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
00613          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
00614          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
00615          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
00616          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
00617          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
00618          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
00619          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
00620          Tok.isNot(tok::lesslessequal) &&
00621          // Colon is used in labels, base class lists, initializer lists,
00622          // range-based for loops, ternary operator, but should never be the
00623          // first token in an unwrapped line.
00624          Tok.isNot(tok::colon) &&
00625          // 'noexcept' is a trailing annotation.
00626          Tok.isNot(tok::kw_noexcept);
00627 }
00628 
00629 void UnwrappedLineParser::parseStructuralElement() {
00630   assert(!FormatTok->Tok.is(tok::l_brace));
00631   switch (FormatTok->Tok.getKind()) {
00632   case tok::at:
00633     nextToken();
00634     if (FormatTok->Tok.is(tok::l_brace)) {
00635       parseBracedList();
00636       break;
00637     }
00638     switch (FormatTok->Tok.getObjCKeywordID()) {
00639     case tok::objc_public:
00640     case tok::objc_protected:
00641     case tok::objc_package:
00642     case tok::objc_private:
00643       return parseAccessSpecifier();
00644     case tok::objc_interface:
00645     case tok::objc_implementation:
00646       return parseObjCInterfaceOrImplementation();
00647     case tok::objc_protocol:
00648       return parseObjCProtocol();
00649     case tok::objc_end:
00650       return; // Handled by the caller.
00651     case tok::objc_optional:
00652     case tok::objc_required:
00653       nextToken();
00654       addUnwrappedLine();
00655       return;
00656     default:
00657       break;
00658     }
00659     break;
00660   case tok::kw_asm:
00661     FormatTok->Finalized = true;
00662     nextToken();
00663     if (FormatTok->is(tok::l_brace)) {
00664       while (FormatTok && FormatTok->isNot(tok::eof)) {
00665         FormatTok->Finalized = true;
00666         if (FormatTok->is(tok::r_brace)) {
00667           nextToken();
00668           break;
00669         }
00670         nextToken();
00671       }
00672     }
00673     break;
00674   case tok::kw_namespace:
00675     parseNamespace();
00676     return;
00677   case tok::kw_inline:
00678     nextToken();
00679     if (FormatTok->Tok.is(tok::kw_namespace)) {
00680       parseNamespace();
00681       return;
00682     }
00683     break;
00684   case tok::kw_public:
00685   case tok::kw_protected:
00686   case tok::kw_private:
00687     if (Style.Language == FormatStyle::LK_Java)
00688       nextToken();
00689     else
00690       parseAccessSpecifier();
00691     return;
00692   case tok::kw_if:
00693     parseIfThenElse();
00694     return;
00695   case tok::kw_for:
00696   case tok::kw_while:
00697     parseForOrWhileLoop();
00698     return;
00699   case tok::kw_do:
00700     parseDoWhile();
00701     return;
00702   case tok::kw_switch:
00703     parseSwitch();
00704     return;
00705   case tok::kw_default:
00706     nextToken();
00707     parseLabel();
00708     return;
00709   case tok::kw_case:
00710     parseCaseLabel();
00711     return;
00712   case tok::kw_try:
00713     parseTryCatch();
00714     return;
00715   case tok::kw_extern:
00716     nextToken();
00717     if (FormatTok->Tok.is(tok::string_literal)) {
00718       nextToken();
00719       if (FormatTok->Tok.is(tok::l_brace)) {
00720         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
00721         addUnwrappedLine();
00722         return;
00723       }
00724     }
00725     break;
00726   case tok::identifier:
00727     if (FormatTok->IsForEachMacro) {
00728       parseForOrWhileLoop();
00729       return;
00730     }
00731     // In all other cases, parse the declaration.
00732     break;
00733   default:
00734     break;
00735   }
00736   do {
00737     switch (FormatTok->Tok.getKind()) {
00738     case tok::at:
00739       nextToken();
00740       if (FormatTok->Tok.is(tok::l_brace))
00741         parseBracedList();
00742       break;
00743     case tok::kw_enum:
00744       parseEnum();
00745       break;
00746     case tok::kw_typedef:
00747       nextToken();
00748       if (FormatTok->is(Keywords.kw_NS_ENUM))
00749         parseEnum();
00750       break;
00751     case tok::kw_struct:
00752     case tok::kw_union:
00753     case tok::kw_class:
00754       parseRecord();
00755       // A record declaration or definition is always the start of a structural
00756       // element.
00757       break;
00758     case tok::semi:
00759       nextToken();
00760       addUnwrappedLine();
00761       return;
00762     case tok::r_brace:
00763       addUnwrappedLine();
00764       return;
00765     case tok::l_paren:
00766       parseParens();
00767       break;
00768     case tok::caret:
00769       nextToken();
00770       if (FormatTok->Tok.isAnyIdentifier() ||
00771           FormatTok->isSimpleTypeSpecifier())
00772         nextToken();
00773       if (FormatTok->is(tok::l_paren))
00774         parseParens();
00775       if (FormatTok->is(tok::l_brace))
00776         parseChildBlock();
00777       break;
00778     case tok::l_brace:
00779       if (!tryToParseBracedList()) {
00780         // A block outside of parentheses must be the last part of a
00781         // structural element.
00782         // FIXME: Figure out cases where this is not true, and add projections
00783         // for them (the one we know is missing are lambdas).
00784         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
00785           addUnwrappedLine();
00786         FormatTok->Type = TT_FunctionLBrace;
00787         parseBlock(/*MustBeDeclaration=*/false);
00788         addUnwrappedLine();
00789         return;
00790       }
00791       // Otherwise this was a braced init list, and the structural
00792       // element continues.
00793       break;
00794     case tok::kw_try:
00795       // We arrive here when parsing function-try blocks.
00796       parseTryCatch();
00797       return;
00798     case tok::identifier: {
00799       StringRef Text = FormatTok->TokenText;
00800       // Parse function literal unless 'function' is the first token in a line
00801       // in which case this should be treated as a free-standing function.
00802       if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" &&
00803           Line->Tokens.size() > 0) {
00804         tryToParseJSFunction();
00805         break;
00806       }
00807       nextToken();
00808       if (Line->Tokens.size() == 1) {
00809         if (FormatTok->Tok.is(tok::colon)) {
00810           parseLabel();
00811           return;
00812         }
00813         // Recognize function-like macro usages without trailing semicolon as
00814         // well as free-standing macrose like Q_OBJECT.
00815         bool FunctionLike = FormatTok->is(tok::l_paren);
00816         if (FunctionLike)
00817           parseParens();
00818         if (FormatTok->NewlinesBefore > 0 &&
00819             (Text.size() >= 5 || FunctionLike) &&
00820             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
00821           addUnwrappedLine();
00822           return;
00823         }
00824       }
00825       break;
00826     }
00827     case tok::equal:
00828       nextToken();
00829       if (FormatTok->Tok.is(tok::l_brace)) {
00830         parseBracedList();
00831       }
00832       break;
00833     case tok::l_square:
00834       parseSquare();
00835       break;
00836     default:
00837       nextToken();
00838       break;
00839     }
00840   } while (!eof());
00841 }
00842 
00843 bool UnwrappedLineParser::tryToParseLambda() {
00844   // FIXME: This is a dirty way to access the previous token. Find a better
00845   // solution.
00846   if (!Line->Tokens.empty() &&
00847       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
00848                                         tok::kw_new, tok::kw_delete) ||
00849        Line->Tokens.back().Tok->closesScope() ||
00850        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
00851     nextToken();
00852     return false;
00853   }
00854   assert(FormatTok->is(tok::l_square));
00855   FormatToken &LSquare = *FormatTok;
00856   if (!tryToParseLambdaIntroducer())
00857     return false;
00858 
00859   while (FormatTok->isNot(tok::l_brace)) {
00860     if (FormatTok->isSimpleTypeSpecifier()) {
00861       nextToken();
00862       continue;
00863     }
00864     switch (FormatTok->Tok.getKind()) {
00865     case tok::l_brace:
00866       break;
00867     case tok::l_paren:
00868       parseParens();
00869       break;
00870     case tok::less:
00871     case tok::greater:
00872     case tok::identifier:
00873     case tok::coloncolon:
00874     case tok::kw_mutable:
00875       nextToken();
00876       break;
00877     case tok::arrow:
00878       FormatTok->Type = TT_TrailingReturnArrow;
00879       nextToken();
00880       break;
00881     default:
00882       return true;
00883     }
00884   }
00885   LSquare.Type = TT_LambdaLSquare;
00886   parseChildBlock();
00887   return true;
00888 }
00889 
00890 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
00891   nextToken();
00892   if (FormatTok->is(tok::equal)) {
00893     nextToken();
00894     if (FormatTok->is(tok::r_square)) {
00895       nextToken();
00896       return true;
00897     }
00898     if (FormatTok->isNot(tok::comma))
00899       return false;
00900     nextToken();
00901   } else if (FormatTok->is(tok::amp)) {
00902     nextToken();
00903     if (FormatTok->is(tok::r_square)) {
00904       nextToken();
00905       return true;
00906     }
00907     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
00908       return false;
00909     }
00910     if (FormatTok->is(tok::comma))
00911       nextToken();
00912   } else if (FormatTok->is(tok::r_square)) {
00913     nextToken();
00914     return true;
00915   }
00916   do {
00917     if (FormatTok->is(tok::amp))
00918       nextToken();
00919     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
00920       return false;
00921     nextToken();
00922     if (FormatTok->is(tok::ellipsis))
00923       nextToken();
00924     if (FormatTok->is(tok::comma)) {
00925       nextToken();
00926     } else if (FormatTok->is(tok::r_square)) {
00927       nextToken();
00928       return true;
00929     } else {
00930       return false;
00931     }
00932   } while (!eof());
00933   return false;
00934 }
00935 
00936 void UnwrappedLineParser::tryToParseJSFunction() {
00937   nextToken();
00938 
00939   // Consume function name.
00940   if (FormatTok->is(tok::identifier))
00941       nextToken();
00942 
00943   if (FormatTok->isNot(tok::l_paren))
00944     return;
00945   nextToken();
00946   while (FormatTok->isNot(tok::l_brace)) {
00947     // Err on the side of caution in order to avoid consuming the full file in
00948     // case of incomplete code.
00949     if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren,
00950                             tok::comment))
00951       return;
00952     nextToken();
00953   }
00954   parseChildBlock();
00955 }
00956 
00957 bool UnwrappedLineParser::tryToParseBracedList() {
00958   if (FormatTok->BlockKind == BK_Unknown)
00959     calculateBraceTypes();
00960   assert(FormatTok->BlockKind != BK_Unknown);
00961   if (FormatTok->BlockKind == BK_Block)
00962     return false;
00963   parseBracedList();
00964   return true;
00965 }
00966 
00967 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
00968   bool HasError = false;
00969   nextToken();
00970 
00971   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
00972   // replace this by using parseAssigmentExpression() inside.
00973   do {
00974     if (Style.Language == FormatStyle::LK_JavaScript &&
00975         FormatTok->is(Keywords.kw_function)) {
00976       tryToParseJSFunction();
00977       continue;
00978     }
00979     switch (FormatTok->Tok.getKind()) {
00980     case tok::caret:
00981       nextToken();
00982       if (FormatTok->is(tok::l_brace)) {
00983         parseChildBlock();
00984       }
00985       break;
00986     case tok::l_square:
00987       tryToParseLambda();
00988       break;
00989     case tok::l_brace:
00990       // Assume there are no blocks inside a braced init list apart
00991       // from the ones we explicitly parse out (like lambdas).
00992       FormatTok->BlockKind = BK_BracedInit;
00993       parseBracedList();
00994       break;
00995     case tok::r_brace:
00996       nextToken();
00997       return !HasError;
00998     case tok::semi:
00999       HasError = true;
01000       if (!ContinueOnSemicolons)
01001         return !HasError;
01002       nextToken();
01003       break;
01004     case tok::comma:
01005       nextToken();
01006       break;
01007     default:
01008       nextToken();
01009       break;
01010     }
01011   } while (!eof());
01012   return false;
01013 }
01014 
01015 void UnwrappedLineParser::parseParens() {
01016   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
01017   nextToken();
01018   do {
01019     switch (FormatTok->Tok.getKind()) {
01020     case tok::l_paren:
01021       parseParens();
01022       break;
01023     case tok::r_paren:
01024       nextToken();
01025       return;
01026     case tok::r_brace:
01027       // A "}" inside parenthesis is an error if there wasn't a matching "{".
01028       return;
01029     case tok::l_square:
01030       tryToParseLambda();
01031       break;
01032     case tok::l_brace: {
01033       if (!tryToParseBracedList()) {
01034         parseChildBlock();
01035       }
01036       break;
01037     }
01038     case tok::at:
01039       nextToken();
01040       if (FormatTok->Tok.is(tok::l_brace))
01041         parseBracedList();
01042       break;
01043     case tok::identifier:
01044       if (Style.Language == FormatStyle::LK_JavaScript &&
01045           FormatTok->is(Keywords.kw_function))
01046         tryToParseJSFunction();
01047       else
01048         nextToken();
01049       break;
01050     default:
01051       nextToken();
01052       break;
01053     }
01054   } while (!eof());
01055 }
01056 
01057 void UnwrappedLineParser::parseSquare() {
01058   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
01059   if (tryToParseLambda())
01060     return;
01061   do {
01062     switch (FormatTok->Tok.getKind()) {
01063     case tok::l_paren:
01064       parseParens();
01065       break;
01066     case tok::r_square:
01067       nextToken();
01068       return;
01069     case tok::r_brace:
01070       // A "}" inside parenthesis is an error if there wasn't a matching "{".
01071       return;
01072     case tok::l_square:
01073       parseSquare();
01074       break;
01075     case tok::l_brace: {
01076       if (!tryToParseBracedList()) {
01077         parseChildBlock();
01078       }
01079       break;
01080     }
01081     case tok::at:
01082       nextToken();
01083       if (FormatTok->Tok.is(tok::l_brace))
01084         parseBracedList();
01085       break;
01086     default:
01087       nextToken();
01088       break;
01089     }
01090   } while (!eof());
01091 }
01092 
01093 void UnwrappedLineParser::parseIfThenElse() {
01094   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
01095   nextToken();
01096   if (FormatTok->Tok.is(tok::l_paren))
01097     parseParens();
01098   bool NeedsUnwrappedLine = false;
01099   if (FormatTok->Tok.is(tok::l_brace)) {
01100     CompoundStatementIndenter Indenter(this, Style, Line->Level);
01101     parseBlock(/*MustBeDeclaration=*/false);
01102     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
01103         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
01104       addUnwrappedLine();
01105     } else {
01106       NeedsUnwrappedLine = true;
01107     }
01108   } else {
01109     addUnwrappedLine();
01110     ++Line->Level;
01111     parseStructuralElement();
01112     --Line->Level;
01113   }
01114   if (FormatTok->Tok.is(tok::kw_else)) {
01115     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
01116       addUnwrappedLine();
01117     nextToken();
01118     if (FormatTok->Tok.is(tok::l_brace)) {
01119       CompoundStatementIndenter Indenter(this, Style, Line->Level);
01120       parseBlock(/*MustBeDeclaration=*/false);
01121       addUnwrappedLine();
01122     } else if (FormatTok->Tok.is(tok::kw_if)) {
01123       parseIfThenElse();
01124     } else {
01125       addUnwrappedLine();
01126       ++Line->Level;
01127       parseStructuralElement();
01128       --Line->Level;
01129     }
01130   } else if (NeedsUnwrappedLine) {
01131     addUnwrappedLine();
01132   }
01133 }
01134 
01135 void UnwrappedLineParser::parseTryCatch() {
01136   assert(FormatTok->is(tok::kw_try) && "'try' expected");
01137   nextToken();
01138   bool NeedsUnwrappedLine = false;
01139   if (FormatTok->is(tok::colon)) {
01140     // We are in a function try block, what comes is an initializer list.
01141     nextToken();
01142     while (FormatTok->is(tok::identifier)) {
01143       nextToken();
01144       if (FormatTok->is(tok::l_paren))
01145         parseParens();
01146       else
01147         StructuralError = true;
01148       if (FormatTok->is(tok::comma))
01149         nextToken();
01150     }
01151   }
01152   if (FormatTok->is(tok::l_brace)) {
01153     CompoundStatementIndenter Indenter(this, Style, Line->Level);
01154     parseBlock(/*MustBeDeclaration=*/false);
01155     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
01156         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
01157         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
01158       addUnwrappedLine();
01159     } else {
01160       NeedsUnwrappedLine = true;
01161     }
01162   } else if (!FormatTok->is(tok::kw_catch)) {
01163     // The C++ standard requires a compound-statement after a try.
01164     // If there's none, we try to assume there's a structuralElement
01165     // and try to continue.
01166     StructuralError = true;
01167     addUnwrappedLine();
01168     ++Line->Level;
01169     parseStructuralElement();
01170     --Line->Level;
01171   }
01172   while (FormatTok->is(tok::kw_catch) ||
01173          ((Style.Language == FormatStyle::LK_Java ||
01174            Style.Language == FormatStyle::LK_JavaScript) &&
01175           FormatTok->is(Keywords.kw_finally))) {
01176     nextToken();
01177     while (FormatTok->isNot(tok::l_brace)) {
01178       if (FormatTok->is(tok::l_paren)) {
01179         parseParens();
01180         continue;
01181       }
01182       if (FormatTok->isOneOf(tok::semi, tok::r_brace))
01183         return;
01184       nextToken();
01185     }
01186     NeedsUnwrappedLine = false;
01187     CompoundStatementIndenter Indenter(this, Style, Line->Level);
01188     parseBlock(/*MustBeDeclaration=*/false);
01189     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
01190         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
01191         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
01192       addUnwrappedLine();
01193     } else {
01194       NeedsUnwrappedLine = true;
01195     }
01196   }
01197   if (NeedsUnwrappedLine) {
01198     addUnwrappedLine();
01199   }
01200 }
01201 
01202 void UnwrappedLineParser::parseNamespace() {
01203   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
01204 
01205   const FormatToken &InitialToken = *FormatTok;
01206   nextToken();
01207   if (FormatTok->Tok.is(tok::identifier))
01208     nextToken();
01209   if (FormatTok->Tok.is(tok::l_brace)) {
01210     if (ShouldBreakBeforeBrace(Style, InitialToken))
01211       addUnwrappedLine();
01212 
01213     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
01214                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
01215                      DeclarationScopeStack.size() > 1);
01216     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
01217     // Munch the semicolon after a namespace. This is more common than one would
01218     // think. Puttin the semicolon into its own line is very ugly.
01219     if (FormatTok->Tok.is(tok::semi))
01220       nextToken();
01221     addUnwrappedLine();
01222   }
01223   // FIXME: Add error handling.
01224 }
01225 
01226 void UnwrappedLineParser::parseForOrWhileLoop() {
01227   assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) ||
01228           FormatTok->IsForEachMacro) &&
01229          "'for', 'while' or foreach macro expected");
01230   nextToken();
01231   if (FormatTok->Tok.is(tok::l_paren))
01232     parseParens();
01233   if (FormatTok->Tok.is(tok::l_brace)) {
01234     CompoundStatementIndenter Indenter(this, Style, Line->Level);
01235     parseBlock(/*MustBeDeclaration=*/false);
01236     addUnwrappedLine();
01237   } else {
01238     addUnwrappedLine();
01239     ++Line->Level;
01240     parseStructuralElement();
01241     --Line->Level;
01242   }
01243 }
01244 
01245 void UnwrappedLineParser::parseDoWhile() {
01246   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
01247   nextToken();
01248   if (FormatTok->Tok.is(tok::l_brace)) {
01249     CompoundStatementIndenter Indenter(this, Style, Line->Level);
01250     parseBlock(/*MustBeDeclaration=*/false);
01251     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
01252       addUnwrappedLine();
01253   } else {
01254     addUnwrappedLine();
01255     ++Line->Level;
01256     parseStructuralElement();
01257     --Line->Level;
01258   }
01259 
01260   // FIXME: Add error handling.
01261   if (!FormatTok->Tok.is(tok::kw_while)) {
01262     addUnwrappedLine();
01263     return;
01264   }
01265 
01266   nextToken();
01267   parseStructuralElement();
01268 }
01269 
01270 void UnwrappedLineParser::parseLabel() {
01271   nextToken();
01272   unsigned OldLineLevel = Line->Level;
01273   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
01274     --Line->Level;
01275   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
01276     CompoundStatementIndenter Indenter(this, Style, Line->Level);
01277     parseBlock(/*MustBeDeclaration=*/false);
01278     if (FormatTok->Tok.is(tok::kw_break)) {
01279       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
01280       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
01281           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
01282         addUnwrappedLine();
01283       }
01284       parseStructuralElement();
01285     }
01286     addUnwrappedLine();
01287   } else {
01288     addUnwrappedLine();
01289   }
01290   Line->Level = OldLineLevel;
01291 }
01292 
01293 void UnwrappedLineParser::parseCaseLabel() {
01294   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
01295   // FIXME: fix handling of complex expressions here.
01296   do {
01297     nextToken();
01298   } while (!eof() && !FormatTok->Tok.is(tok::colon));
01299   parseLabel();
01300 }
01301 
01302 void UnwrappedLineParser::parseSwitch() {
01303   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
01304   nextToken();
01305   if (FormatTok->Tok.is(tok::l_paren))
01306     parseParens();
01307   if (FormatTok->Tok.is(tok::l_brace)) {
01308     CompoundStatementIndenter Indenter(this, Style, Line->Level);
01309     parseBlock(/*MustBeDeclaration=*/false);
01310     addUnwrappedLine();
01311   } else {
01312     addUnwrappedLine();
01313     ++Line->Level;
01314     parseStructuralElement();
01315     --Line->Level;
01316   }
01317 }
01318 
01319 void UnwrappedLineParser::parseAccessSpecifier() {
01320   nextToken();
01321   // Understand Qt's slots.
01322   if (FormatTok->is(tok::identifier) &&
01323       (FormatTok->TokenText == "slots" || FormatTok->TokenText == "Q_SLOTS"))
01324     nextToken();
01325   // Otherwise, we don't know what it is, and we'd better keep the next token.
01326   if (FormatTok->Tok.is(tok::colon))
01327     nextToken();
01328   addUnwrappedLine();
01329 }
01330 
01331 void UnwrappedLineParser::parseEnum() {
01332   // Won't be 'enum' for NS_ENUMs.
01333   if (FormatTok->Tok.is(tok::kw_enum))
01334     nextToken(); 
01335 
01336   // Eat up enum class ...
01337   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
01338     nextToken();
01339   while (FormatTok->Tok.getIdentifierInfo() ||
01340          FormatTok->isOneOf(tok::colon, tok::coloncolon)) {
01341     nextToken();
01342     // We can have macros or attributes in between 'enum' and the enum name.
01343     if (FormatTok->Tok.is(tok::l_paren))
01344       parseParens();
01345     if (FormatTok->Tok.is(tok::identifier))
01346       nextToken();
01347   }
01348 
01349   // Just a declaration or something is wrong.
01350   if (!FormatTok->is(tok::l_brace))
01351     return;
01352   FormatTok->BlockKind = BK_Block;
01353 
01354   if (Style.Language == FormatStyle::LK_Java) {
01355     // Java enums are different.
01356     parseJavaEnumBody();
01357     return;
01358   }
01359 
01360   // Parse enum body.
01361   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
01362   if (HasError) {
01363     if (FormatTok->is(tok::semi))
01364       nextToken();
01365     addUnwrappedLine();
01366   }
01367 
01368   // We fall through to parsing a structural element afterwards, so that in
01369   // enum A {} n, m;
01370   // "} n, m;" will end up in one unwrapped line.
01371 }
01372 
01373 void UnwrappedLineParser::parseJavaEnumBody() {
01374   // Determine whether the enum is simple, i.e. does not have a semicolon or
01375   // constants with class bodies. Simple enums can be formatted like braced
01376   // lists, contracted to a single line, etc.
01377   unsigned StoredPosition = Tokens->getPosition();
01378   bool IsSimple = true;
01379   FormatToken *Tok = Tokens->getNextToken();
01380   while (Tok) {
01381     if (Tok->is(tok::r_brace))
01382       break;
01383     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
01384       IsSimple = false;
01385       break;
01386     }
01387     // FIXME: This will also mark enums with braces in the arguments to enum
01388     // constants as "not simple". This is probably fine in practice, though.
01389     Tok = Tokens->getNextToken();
01390   }
01391   FormatTok = Tokens->setPosition(StoredPosition);
01392 
01393   if (IsSimple) {
01394     parseBracedList();
01395     addUnwrappedLine();
01396     return;
01397   }
01398 
01399   // Parse the body of a more complex enum.
01400   // First add a line for everything up to the "{".
01401   nextToken();
01402   addUnwrappedLine();
01403   ++Line->Level;
01404 
01405   // Parse the enum constants.
01406   while (FormatTok) {
01407     if (FormatTok->is(tok::l_brace)) {
01408       // Parse the constant's class body.
01409       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
01410                  /*MunchSemi=*/false);
01411     } else if (FormatTok->is(tok::l_paren)) {
01412       parseParens();
01413     } else if (FormatTok->is(tok::comma)) {
01414       nextToken();
01415       addUnwrappedLine();
01416     } else if (FormatTok->is(tok::semi)) {
01417       nextToken();
01418       addUnwrappedLine();
01419       break;
01420     } else if (FormatTok->is(tok::r_brace)) {
01421       addUnwrappedLine();
01422       break;
01423     } else {
01424       nextToken();
01425     }
01426   }
01427 
01428   // Parse the class body after the enum's ";" if any.
01429   parseLevel(/*HasOpeningBrace=*/true);
01430   nextToken();
01431   --Line->Level;
01432   addUnwrappedLine();
01433 }
01434 
01435 void UnwrappedLineParser::parseRecord() {
01436   const FormatToken &InitialToken = *FormatTok;
01437   nextToken();
01438   if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute,
01439                          tok::kw___declspec, tok::kw_alignas)) {
01440     nextToken();
01441     // We can have macros or attributes in between 'class' and the class name.
01442     if (FormatTok->Tok.is(tok::l_paren)) {
01443       parseParens();
01444     }
01445     // The actual identifier can be a nested name specifier, and in macros
01446     // it is often token-pasted.
01447     while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) ||
01448            FormatTok->is(tok::hashhash) ||
01449            (Style.Language == FormatStyle::LK_Java &&
01450             FormatTok->isOneOf(tok::period, tok::comma)))
01451       nextToken();
01452 
01453     // Note that parsing away template declarations here leads to incorrectly
01454     // accepting function declarations as record declarations.
01455     // In general, we cannot solve this problem. Consider:
01456     // class A<int> B() {}
01457     // which can be a function definition or a class definition when B() is a
01458     // macro. If we find enough real-world cases where this is a problem, we
01459     // can parse for the 'template' keyword in the beginning of the statement,
01460     // and thus rule out the record production in case there is no template
01461     // (this would still leave us with an ambiguity between template function
01462     // and class declarations).
01463     if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
01464       while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
01465         if (FormatTok->Tok.is(tok::semi))
01466           return;
01467         nextToken();
01468       }
01469     }
01470   }
01471   if (FormatTok->Tok.is(tok::l_brace)) {
01472     if (ShouldBreakBeforeBrace(Style, InitialToken))
01473       addUnwrappedLine();
01474 
01475     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
01476                /*MunchSemi=*/false);
01477   }
01478   // We fall through to parsing a structural element afterwards, so
01479   // class A {} n, m;
01480   // will end up in one unwrapped line.
01481   // This does not apply for Java.
01482   if (Style.Language == FormatStyle::LK_Java)
01483     addUnwrappedLine();
01484 }
01485 
01486 void UnwrappedLineParser::parseObjCProtocolList() {
01487   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
01488   do
01489     nextToken();
01490   while (!eof() && FormatTok->Tok.isNot(tok::greater));
01491   nextToken(); // Skip '>'.
01492 }
01493 
01494 void UnwrappedLineParser::parseObjCUntilAtEnd() {
01495   do {
01496     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
01497       nextToken();
01498       addUnwrappedLine();
01499       break;
01500     }
01501     if (FormatTok->is(tok::l_brace)) {
01502       parseBlock(/*MustBeDeclaration=*/false);
01503       // In ObjC interfaces, nothing should be following the "}".
01504       addUnwrappedLine();
01505     } else if (FormatTok->is(tok::r_brace)) {
01506       // Ignore stray "}". parseStructuralElement doesn't consume them.
01507       nextToken();
01508       addUnwrappedLine();
01509     } else {
01510       parseStructuralElement();
01511     }
01512   } while (!eof());
01513 }
01514 
01515 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
01516   nextToken();
01517   nextToken(); // interface name
01518 
01519   // @interface can be followed by either a base class, or a category.
01520   if (FormatTok->Tok.is(tok::colon)) {
01521     nextToken();
01522     nextToken(); // base class name
01523   } else if (FormatTok->Tok.is(tok::l_paren))
01524     // Skip category, if present.
01525     parseParens();
01526 
01527   if (FormatTok->Tok.is(tok::less))
01528     parseObjCProtocolList();
01529 
01530   if (FormatTok->Tok.is(tok::l_brace)) {
01531     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
01532         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
01533       addUnwrappedLine();
01534     parseBlock(/*MustBeDeclaration=*/true);
01535   }
01536 
01537   // With instance variables, this puts '}' on its own line.  Without instance
01538   // variables, this ends the @interface line.
01539   addUnwrappedLine();
01540 
01541   parseObjCUntilAtEnd();
01542 }
01543 
01544 void UnwrappedLineParser::parseObjCProtocol() {
01545   nextToken();
01546   nextToken(); // protocol name
01547 
01548   if (FormatTok->Tok.is(tok::less))
01549     parseObjCProtocolList();
01550 
01551   // Check for protocol declaration.
01552   if (FormatTok->Tok.is(tok::semi)) {
01553     nextToken();
01554     return addUnwrappedLine();
01555   }
01556 
01557   addUnwrappedLine();
01558   parseObjCUntilAtEnd();
01559 }
01560 
01561 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
01562                                                  StringRef Prefix = "") {
01563   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
01564                << (Line.InPPDirective ? " MACRO" : "") << ": ";
01565   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
01566                                                     E = Line.Tokens.end();
01567        I != E; ++I) {
01568     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
01569   }
01570   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
01571                                                     E = Line.Tokens.end();
01572        I != E; ++I) {
01573     const UnwrappedLineNode &Node = *I;
01574     for (SmallVectorImpl<UnwrappedLine>::const_iterator
01575              I = Node.Children.begin(),
01576              E = Node.Children.end();
01577          I != E; ++I) {
01578       printDebugInfo(*I, "\nChild: ");
01579     }
01580   }
01581   llvm::dbgs() << "\n";
01582 }
01583 
01584 void UnwrappedLineParser::addUnwrappedLine() {
01585   if (Line->Tokens.empty())
01586     return;
01587   DEBUG({
01588     if (CurrentLines == &Lines)
01589       printDebugInfo(*Line);
01590   });
01591   CurrentLines->push_back(*Line);
01592   Line->Tokens.clear();
01593   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
01594     for (SmallVectorImpl<UnwrappedLine>::iterator
01595              I = PreprocessorDirectives.begin(),
01596              E = PreprocessorDirectives.end();
01597          I != E; ++I) {
01598       CurrentLines->push_back(*I);
01599     }
01600     PreprocessorDirectives.clear();
01601   }
01602 }
01603 
01604 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
01605 
01606 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
01607   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
01608          FormatTok.NewlinesBefore > 0;
01609 }
01610 
01611 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
01612   bool JustComments = Line->Tokens.empty();
01613   for (SmallVectorImpl<FormatToken *>::const_iterator
01614            I = CommentsBeforeNextToken.begin(),
01615            E = CommentsBeforeNextToken.end();
01616        I != E; ++I) {
01617     if (isOnNewLine(**I) && JustComments) {
01618       addUnwrappedLine();
01619     }
01620     pushToken(*I);
01621   }
01622   if (NewlineBeforeNext && JustComments) {
01623     addUnwrappedLine();
01624   }
01625   CommentsBeforeNextToken.clear();
01626 }
01627 
01628 void UnwrappedLineParser::nextToken() {
01629   if (eof())
01630     return;
01631   flushComments(isOnNewLine(*FormatTok));
01632   pushToken(FormatTok);
01633   readToken();
01634 }
01635 
01636 void UnwrappedLineParser::readToken() {
01637   bool CommentsInCurrentLine = true;
01638   do {
01639     FormatTok = Tokens->getNextToken();
01640     assert(FormatTok);
01641     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
01642            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
01643       // If there is an unfinished unwrapped line, we flush the preprocessor
01644       // directives only after that unwrapped line was finished later.
01645       bool SwitchToPreprocessorLines =
01646           !Line->Tokens.empty() && CurrentLines == &Lines;
01647       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
01648       // Comments stored before the preprocessor directive need to be output
01649       // before the preprocessor directive, at the same level as the
01650       // preprocessor directive, as we consider them to apply to the directive.
01651       flushComments(isOnNewLine(*FormatTok));
01652       parsePPDirective();
01653     }
01654     while (FormatTok->Type == TT_ConflictStart ||
01655            FormatTok->Type == TT_ConflictEnd ||
01656            FormatTok->Type == TT_ConflictAlternative) {
01657       if (FormatTok->Type == TT_ConflictStart) {
01658         conditionalCompilationStart(/*Unreachable=*/false);
01659       } else if (FormatTok->Type == TT_ConflictAlternative) {
01660         conditionalCompilationAlternative();
01661       } else if (FormatTok->Type == TT_ConflictEnd) {
01662         conditionalCompilationEnd();
01663       }
01664       FormatTok = Tokens->getNextToken();
01665       FormatTok->MustBreakBefore = true;
01666     }
01667 
01668     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
01669         !Line->InPPDirective) {
01670       continue;
01671     }
01672 
01673     if (!FormatTok->Tok.is(tok::comment))
01674       return;
01675     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
01676       CommentsInCurrentLine = false;
01677     }
01678     if (CommentsInCurrentLine) {
01679       pushToken(FormatTok);
01680     } else {
01681       CommentsBeforeNextToken.push_back(FormatTok);
01682     }
01683   } while (!eof());
01684 }
01685 
01686 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
01687   Line->Tokens.push_back(UnwrappedLineNode(Tok));
01688   if (MustBreakBeforeNextToken) {
01689     Line->Tokens.back().Tok->MustBreakBefore = true;
01690     MustBreakBeforeNextToken = false;
01691   }
01692 }
01693 
01694 } // end namespace format
01695 } // end namespace clang