clang API Documentation

TokenAnnotator.cpp
Go to the documentation of this file.
00001 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 ///
00010 /// \file
00011 /// \brief This file implements a token annotator, i.e. creates
00012 /// \c AnnotatedTokens out of \c FormatTokens with required extra information.
00013 ///
00014 //===----------------------------------------------------------------------===//
00015 
00016 #include "TokenAnnotator.h"
00017 #include "clang/Basic/SourceManager.h"
00018 #include "llvm/Support/Debug.h"
00019 
00020 #define DEBUG_TYPE "format-token-annotator"
00021 
00022 namespace clang {
00023 namespace format {
00024 
00025 namespace {
00026 
00027 /// \brief A parser that gathers additional information about tokens.
00028 ///
00029 /// The \c TokenAnnotator tries to match parenthesis and square brakets and
00030 /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
00031 /// into template parameter lists.
00032 class AnnotatingParser {
00033 public:
00034   AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line,
00035                    const AdditionalKeywords &Keywords)
00036       : Style(Style), Line(Line), CurrentToken(Line.First),
00037         KeywordVirtualFound(false), AutoFound(false), Keywords(Keywords) {
00038     Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false));
00039     resetTokenMetadata(CurrentToken);
00040   }
00041 
00042 private:
00043   bool parseAngle() {
00044     if (!CurrentToken)
00045       return false;
00046     ScopedContextCreator ContextCreator(*this, tok::less, 10);
00047     FormatToken *Left = CurrentToken->Previous;
00048     Contexts.back().IsExpression = false;
00049     // If there's a template keyword before the opening angle bracket, this is a
00050     // template parameter, not an argument.
00051     Contexts.back().InTemplateArgument =
00052         Left->Previous && Left->Previous->Tok.isNot(tok::kw_template);
00053 
00054     if (Style.Language == FormatStyle::LK_Java &&
00055         CurrentToken->is(tok::question))
00056       next();
00057 
00058     while (CurrentToken) {
00059       if (CurrentToken->is(tok::greater)) {
00060         Left->MatchingParen = CurrentToken;
00061         CurrentToken->MatchingParen = Left;
00062         CurrentToken->Type = TT_TemplateCloser;
00063         next();
00064         return true;
00065       }
00066       if (CurrentToken->is(tok::question) &&
00067           Style.Language == FormatStyle::LK_Java) {
00068         next();
00069         continue;
00070       }
00071       if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace,
00072                                 tok::colon, tok::question))
00073         return false;
00074       // If a && or || is found and interpreted as a binary operator, this set
00075       // of angles is likely part of something like "a < b && c > d". If the
00076       // angles are inside an expression, the ||/&& might also be a binary
00077       // operator that was misinterpreted because we are parsing template
00078       // parameters.
00079       // FIXME: This is getting out of hand, write a decent parser.
00080       if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) &&
00081           CurrentToken->Previous->Type == TT_BinaryOperator &&
00082           Contexts[Contexts.size() - 2].IsExpression &&
00083           Line.First->isNot(tok::kw_template))
00084         return false;
00085       updateParameterCount(Left, CurrentToken);
00086       if (!consumeToken())
00087         return false;
00088     }
00089     return false;
00090   }
00091 
00092   bool parseParens(bool LookForDecls = false) {
00093     if (!CurrentToken)
00094       return false;
00095     ScopedContextCreator ContextCreator(*this, tok::l_paren, 1);
00096 
00097     // FIXME: This is a bit of a hack. Do better.
00098     Contexts.back().ColonIsForRangeExpr =
00099         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
00100 
00101     bool StartsObjCMethodExpr = false;
00102     FormatToken *Left = CurrentToken->Previous;
00103     if (CurrentToken->is(tok::caret)) {
00104       // (^ can start a block type.
00105       Left->Type = TT_ObjCBlockLParen;
00106     } else if (FormatToken *MaybeSel = Left->Previous) {
00107       // @selector( starts a selector.
00108       if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous &&
00109           MaybeSel->Previous->is(tok::at)) {
00110         StartsObjCMethodExpr = true;
00111       }
00112     }
00113 
00114     if (Left->Previous &&
00115         (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if,
00116                                  tok::kw_while, tok::l_paren, tok::comma) ||
00117          Left->Previous->Type == TT_BinaryOperator)) {
00118       // static_assert, if and while usually contain expressions.
00119       Contexts.back().IsExpression = true;
00120     } else if (Line.InPPDirective &&
00121                (!Left->Previous ||
00122                 (Left->Previous->isNot(tok::identifier) &&
00123                  Left->Previous->Type != TT_OverloadedOperator))) {
00124       Contexts.back().IsExpression = true;
00125     } else if (Left->Previous && Left->Previous->is(tok::r_square) &&
00126                Left->Previous->MatchingParen &&
00127                Left->Previous->MatchingParen->Type == TT_LambdaLSquare) {
00128       // This is a parameter list of a lambda expression.
00129       Contexts.back().IsExpression = false;
00130     } else if (Contexts[Contexts.size() - 2].CaretFound) {
00131       // This is the parameter list of an ObjC block.
00132       Contexts.back().IsExpression = false;
00133     } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) {
00134       Left->Type = TT_AttributeParen;
00135     } else if (Left->Previous && Left->Previous->IsForEachMacro) {
00136       // The first argument to a foreach macro is a declaration.
00137       Contexts.back().IsForEachMacro = true;
00138       Contexts.back().IsExpression = false;
00139     } else if (Left->Previous && Left->Previous->MatchingParen &&
00140                Left->Previous->MatchingParen->Type == TT_ObjCBlockLParen) {
00141       Contexts.back().IsExpression = false;
00142     }
00143 
00144     if (StartsObjCMethodExpr) {
00145       Contexts.back().ColonIsObjCMethodExpr = true;
00146       Left->Type = TT_ObjCMethodExpr;
00147     }
00148 
00149     bool MightBeFunctionType = CurrentToken->is(tok::star);
00150     bool HasMultipleLines = false;
00151     bool HasMultipleParametersOnALine = false;
00152     while (CurrentToken) {
00153       // LookForDecls is set when "if (" has been seen. Check for
00154       // 'identifier' '*' 'identifier' followed by not '=' -- this
00155       // '*' has to be a binary operator but determineStarAmpUsage() will
00156       // categorize it as an unary operator, so set the right type here.
00157       if (LookForDecls && CurrentToken->Next) {
00158         FormatToken *Prev = CurrentToken->getPreviousNonComment();
00159         if (Prev) {
00160           FormatToken *PrevPrev = Prev->getPreviousNonComment();
00161           FormatToken *Next = CurrentToken->Next;
00162           if (PrevPrev && PrevPrev->is(tok::identifier) &&
00163               Prev->isOneOf(tok::star, tok::amp, tok::ampamp) &&
00164               CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) {
00165             Prev->Type = TT_BinaryOperator;
00166             LookForDecls = false;
00167           }
00168         }
00169       }
00170 
00171       if (CurrentToken->Previous->Type == TT_PointerOrReference &&
00172           CurrentToken->Previous->Previous->isOneOf(tok::l_paren,
00173                                                     tok::coloncolon))
00174         MightBeFunctionType = true;
00175       if (CurrentToken->Previous->Type == TT_BinaryOperator)
00176         Contexts.back().IsExpression = true;
00177       if (CurrentToken->is(tok::r_paren)) {
00178         if (MightBeFunctionType && CurrentToken->Next &&
00179             (CurrentToken->Next->is(tok::l_paren) ||
00180              (CurrentToken->Next->is(tok::l_square) &&
00181               !Contexts.back().IsExpression)))
00182           Left->Type = TT_FunctionTypeLParen;
00183         Left->MatchingParen = CurrentToken;
00184         CurrentToken->MatchingParen = Left;
00185 
00186         if (StartsObjCMethodExpr) {
00187           CurrentToken->Type = TT_ObjCMethodExpr;
00188           if (Contexts.back().FirstObjCSelectorName) {
00189             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
00190                 Contexts.back().LongestObjCSelectorName;
00191           }
00192         }
00193 
00194         if (Left->Type == TT_AttributeParen)
00195           CurrentToken->Type = TT_AttributeParen;
00196         if (Left->Previous && Left->Previous->Type == TT_JavaAnnotation)
00197           CurrentToken->Type = TT_JavaAnnotation;
00198         if (Left->Previous && Left->Previous->Type == TT_LeadingJavaAnnotation)
00199           CurrentToken->Type = TT_LeadingJavaAnnotation;
00200 
00201         if (!HasMultipleLines)
00202           Left->PackingKind = PPK_Inconclusive;
00203         else if (HasMultipleParametersOnALine)
00204           Left->PackingKind = PPK_BinPacked;
00205         else
00206           Left->PackingKind = PPK_OnePerLine;
00207 
00208         next();
00209         return true;
00210       }
00211       if (CurrentToken->isOneOf(tok::r_square, tok::r_brace))
00212         return false;
00213       else if (CurrentToken->is(tok::l_brace))
00214         Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen
00215       if (CurrentToken->is(tok::comma) && CurrentToken->Next &&
00216           !CurrentToken->Next->HasUnescapedNewline &&
00217           !CurrentToken->Next->isTrailingComment())
00218         HasMultipleParametersOnALine = true;
00219       if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) ||
00220           CurrentToken->isSimpleTypeSpecifier())
00221         Contexts.back().IsExpression = false;
00222       FormatToken *Tok = CurrentToken;
00223       if (!consumeToken())
00224         return false;
00225       updateParameterCount(Left, Tok);
00226       if (CurrentToken && CurrentToken->HasUnescapedNewline)
00227         HasMultipleLines = true;
00228     }
00229     return false;
00230   }
00231 
00232   bool parseSquare() {
00233     if (!CurrentToken)
00234       return false;
00235 
00236     // A '[' could be an index subscript (after an identifier or after
00237     // ')' or ']'), it could be the start of an Objective-C method
00238     // expression, or it could the the start of an Objective-C array literal.
00239     FormatToken *Left = CurrentToken->Previous;
00240     FormatToken *Parent = Left->getPreviousNonComment();
00241     bool StartsObjCMethodExpr =
00242         Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare &&
00243         CurrentToken->isNot(tok::l_brace) &&
00244         (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren,
00245                                     tok::kw_return, tok::kw_throw) ||
00246          Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn ||
00247          Parent->Type == TT_CastRParen ||
00248          getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown);
00249     ScopedContextCreator ContextCreator(*this, tok::l_square, 10);
00250     Contexts.back().IsExpression = true;
00251     bool ColonFound = false;
00252 
00253     if (StartsObjCMethodExpr) {
00254       Contexts.back().ColonIsObjCMethodExpr = true;
00255       Left->Type = TT_ObjCMethodExpr;
00256     } else if (Parent && Parent->is(tok::at)) {
00257       Left->Type = TT_ArrayInitializerLSquare;
00258     } else if (Left->Type == TT_Unknown) {
00259       Left->Type = TT_ArraySubscriptLSquare;
00260     }
00261 
00262     while (CurrentToken) {
00263       if (CurrentToken->is(tok::r_square)) {
00264         if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) &&
00265             Left->Type == TT_ObjCMethodExpr) {
00266           // An ObjC method call is rarely followed by an open parenthesis.
00267           // FIXME: Do we incorrectly label ":" with this?
00268           StartsObjCMethodExpr = false;
00269           Left->Type = TT_Unknown;
00270         }
00271         if (StartsObjCMethodExpr && CurrentToken->Previous != Left) {
00272           CurrentToken->Type = TT_ObjCMethodExpr;
00273           // determineStarAmpUsage() thinks that '*' '[' is allocating an
00274           // array of pointers, but if '[' starts a selector then '*' is a
00275           // binary operator.
00276           if (Parent && Parent->Type == TT_PointerOrReference)
00277             Parent->Type = TT_BinaryOperator;
00278         }
00279         Left->MatchingParen = CurrentToken;
00280         CurrentToken->MatchingParen = Left;
00281         if (Contexts.back().FirstObjCSelectorName) {
00282           Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
00283               Contexts.back().LongestObjCSelectorName;
00284           if (Left->BlockParameterCount > 1)
00285             Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0;
00286         }
00287         next();
00288         return true;
00289       }
00290       if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace))
00291         return false;
00292       if (CurrentToken->is(tok::colon)) {
00293         if (Left->Type == TT_ArraySubscriptLSquare) {
00294           Left->Type = TT_ObjCMethodExpr;
00295           StartsObjCMethodExpr = true;
00296           Contexts.back().ColonIsObjCMethodExpr = true;
00297           if (Parent && Parent->is(tok::r_paren))
00298             Parent->Type = TT_CastRParen;
00299         }
00300         ColonFound = true;
00301       }
00302       if (CurrentToken->is(tok::comma) &&
00303           Style.Language != FormatStyle::LK_Proto &&
00304           (Left->Type == TT_ArraySubscriptLSquare ||
00305            (Left->Type == TT_ObjCMethodExpr && !ColonFound)))
00306         Left->Type = TT_ArrayInitializerLSquare;
00307       FormatToken* Tok = CurrentToken;
00308       if (!consumeToken())
00309         return false;
00310       updateParameterCount(Left, Tok);
00311     }
00312     return false;
00313   }
00314 
00315   bool parseBrace() {
00316     if (CurrentToken) {
00317       FormatToken *Left = CurrentToken->Previous;
00318 
00319       if (Contexts.back().CaretFound)
00320         Left->Type = TT_ObjCBlockLBrace;
00321       Contexts.back().CaretFound = false;
00322 
00323       ScopedContextCreator ContextCreator(*this, tok::l_brace, 1);
00324       Contexts.back().ColonIsDictLiteral = true;
00325       if (Left->BlockKind == BK_BracedInit)
00326         Contexts.back().IsExpression = true;
00327 
00328       while (CurrentToken) {
00329         if (CurrentToken->is(tok::r_brace)) {
00330           Left->MatchingParen = CurrentToken;
00331           CurrentToken->MatchingParen = Left;
00332           next();
00333           return true;
00334         }
00335         if (CurrentToken->isOneOf(tok::r_paren, tok::r_square))
00336           return false;
00337         updateParameterCount(Left, CurrentToken);
00338         if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) {
00339           FormatToken *Previous = CurrentToken->getPreviousNonComment();
00340           if ((CurrentToken->is(tok::colon) ||
00341                Style.Language == FormatStyle::LK_Proto) &&
00342               Previous->is(tok::identifier))
00343             Previous->Type = TT_SelectorName;
00344           if (CurrentToken->is(tok::colon))
00345             Left->Type = TT_DictLiteral;
00346         }
00347         if (!consumeToken())
00348           return false;
00349       }
00350     }
00351     return true;
00352   }
00353 
00354   void updateParameterCount(FormatToken *Left, FormatToken *Current) {
00355     if (Current->Type == TT_LambdaLSquare ||
00356         (Current->is(tok::caret) && Current->Type == TT_UnaryOperator) ||
00357         (Style.Language == FormatStyle::LK_JavaScript &&
00358          Current->is(Keywords.kw_function))) {
00359       ++Left->BlockParameterCount;
00360     }
00361     if (Current->is(tok::comma)) {
00362       ++Left->ParameterCount;
00363       if (!Left->Role)
00364         Left->Role.reset(new CommaSeparatedList(Style));
00365       Left->Role->CommaFound(Current);
00366     } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) {
00367       Left->ParameterCount = 1;
00368     }
00369   }
00370 
00371   bool parseConditional() {
00372     while (CurrentToken) {
00373       if (CurrentToken->is(tok::colon)) {
00374         CurrentToken->Type = TT_ConditionalExpr;
00375         next();
00376         return true;
00377       }
00378       if (!consumeToken())
00379         return false;
00380     }
00381     return false;
00382   }
00383 
00384   bool parseTemplateDeclaration() {
00385     if (CurrentToken && CurrentToken->is(tok::less)) {
00386       CurrentToken->Type = TT_TemplateOpener;
00387       next();
00388       if (!parseAngle())
00389         return false;
00390       if (CurrentToken)
00391         CurrentToken->Previous->ClosesTemplateDeclaration = true;
00392       return true;
00393     }
00394     return false;
00395   }
00396 
00397   bool consumeToken() {
00398     FormatToken *Tok = CurrentToken;
00399     next();
00400     switch (Tok->Tok.getKind()) {
00401     case tok::plus:
00402     case tok::minus:
00403       if (!Tok->Previous && Line.MustBeDeclaration)
00404         Tok->Type = TT_ObjCMethodSpecifier;
00405       break;
00406     case tok::colon:
00407       if (!Tok->Previous)
00408         return false;
00409       // Colons from ?: are handled in parseConditional().
00410       if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1 &&
00411           Line.First->isNot(tok::kw_case)) {
00412         Tok->Type = TT_CtorInitializerColon;
00413       } else if (Contexts.back().ColonIsDictLiteral) {
00414         Tok->Type = TT_DictLiteral;
00415       } else if (Contexts.back().ColonIsObjCMethodExpr ||
00416                  Line.First->Type == TT_ObjCMethodSpecifier) {
00417         Tok->Type = TT_ObjCMethodExpr;
00418         Tok->Previous->Type = TT_SelectorName;
00419         if (Tok->Previous->ColumnWidth >
00420             Contexts.back().LongestObjCSelectorName) {
00421           Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth;
00422         }
00423         if (!Contexts.back().FirstObjCSelectorName)
00424           Contexts.back().FirstObjCSelectorName = Tok->Previous;
00425       } else if (Contexts.back().ColonIsForRangeExpr) {
00426         Tok->Type = TT_RangeBasedForLoopColon;
00427       } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) {
00428         Tok->Type = TT_BitFieldColon;
00429       } else if (Contexts.size() == 1 &&
00430                  !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) {
00431         Tok->Type = TT_InheritanceColon;
00432       } else if (Tok->Previous->is(tok::identifier) && Tok->Next &&
00433                  Tok->Next->isOneOf(tok::r_paren, tok::comma)) {
00434         // This handles a special macro in ObjC code where selectors including
00435         // the colon are passed as macro arguments.
00436         Tok->Type = TT_ObjCMethodExpr;
00437       } else if (Contexts.back().ContextKind == tok::l_paren) {
00438         Tok->Type = TT_InlineASMColon;
00439       }
00440       break;
00441     case tok::kw_if:
00442     case tok::kw_while:
00443       if (CurrentToken && CurrentToken->is(tok::l_paren)) {
00444         next();
00445         if (!parseParens(/*LookForDecls=*/true))
00446           return false;
00447       }
00448       break;
00449     case tok::kw_for:
00450       Contexts.back().ColonIsForRangeExpr = true;
00451       next();
00452       if (!parseParens())
00453         return false;
00454       break;
00455     case tok::l_paren:
00456       if (!parseParens())
00457         return false;
00458       if (Line.MustBeDeclaration && Contexts.size() == 1 &&
00459           !Contexts.back().IsExpression &&
00460           Line.First->Type != TT_ObjCProperty &&
00461           (!Tok->Previous || Tok->Previous->isNot(tok::kw_decltype)))
00462         Line.MightBeFunctionDecl = true;
00463       break;
00464     case tok::l_square:
00465       if (!parseSquare())
00466         return false;
00467       break;
00468     case tok::l_brace:
00469       if (!parseBrace())
00470         return false;
00471       break;
00472     case tok::less:
00473       if ((!Tok->Previous || !Tok->Previous->Tok.isLiteral()) && parseAngle())
00474         Tok->Type = TT_TemplateOpener;
00475       else {
00476         Tok->Type = TT_BinaryOperator;
00477         CurrentToken = Tok;
00478         next();
00479       }
00480       break;
00481     case tok::r_paren:
00482     case tok::r_square:
00483       return false;
00484     case tok::r_brace:
00485       // Lines can start with '}'.
00486       if (Tok->Previous)
00487         return false;
00488       break;
00489     case tok::greater:
00490       Tok->Type = TT_BinaryOperator;
00491       break;
00492     case tok::kw_operator:
00493       while (CurrentToken &&
00494              !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) {
00495         if (CurrentToken->isOneOf(tok::star, tok::amp))
00496           CurrentToken->Type = TT_PointerOrReference;
00497         consumeToken();
00498         if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator)
00499           CurrentToken->Previous->Type = TT_OverloadedOperator;
00500       }
00501       if (CurrentToken) {
00502         CurrentToken->Type = TT_OverloadedOperatorLParen;
00503         if (CurrentToken->Previous->Type == TT_BinaryOperator)
00504           CurrentToken->Previous->Type = TT_OverloadedOperator;
00505       }
00506       break;
00507     case tok::question:
00508       parseConditional();
00509       break;
00510     case tok::kw_template:
00511       parseTemplateDeclaration();
00512       break;
00513     case tok::identifier:
00514       if (Line.First->is(tok::kw_for) && Tok->is(Keywords.kw_in))
00515         Tok->Type = TT_ObjCForIn;
00516       break;
00517     case tok::comma:
00518       if (Contexts.back().FirstStartOfName)
00519         Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true;
00520       if (Contexts.back().InCtorInitializer)
00521         Tok->Type = TT_CtorInitializerComma;
00522       if (Contexts.back().IsForEachMacro)
00523         Contexts.back().IsExpression = true;
00524       break;
00525     default:
00526       break;
00527     }
00528     return true;
00529   }
00530 
00531   void parseIncludeDirective() {
00532     if (CurrentToken && CurrentToken->is(tok::less)) {
00533       next();
00534       while (CurrentToken) {
00535         if (CurrentToken->isNot(tok::comment) || CurrentToken->Next)
00536           CurrentToken->Type = TT_ImplicitStringLiteral;
00537         next();
00538       }
00539     } else {
00540       while (CurrentToken) {
00541         if (CurrentToken->isNot(tok::comment))
00542           // Mark these tokens as "implicit" string literals, so that
00543           // they are not split or line-wrapped.
00544           CurrentToken->Type = TT_ImplicitStringLiteral;
00545         next();
00546       }
00547     }
00548   }
00549 
00550   void parseWarningOrError() {
00551     next();
00552     // We still want to format the whitespace left of the first token of the
00553     // warning or error.
00554     next();
00555     while (CurrentToken) {
00556       CurrentToken->Type = TT_ImplicitStringLiteral;
00557       next();
00558     }
00559   }
00560 
00561   void parsePragma() {
00562     next(); // Consume "pragma".
00563     if (CurrentToken && CurrentToken->TokenText == "mark") {
00564       next(); // Consume "mark".
00565       next(); // Consume first token (so we fix leading whitespace).
00566       while (CurrentToken) {
00567         CurrentToken->Type = TT_ImplicitStringLiteral;
00568         next();
00569       }
00570     }
00571   }
00572 
00573   void parsePreprocessorDirective() {
00574     next();
00575     if (!CurrentToken)
00576       return;
00577     if (CurrentToken->Tok.is(tok::numeric_constant)) {
00578       CurrentToken->SpacesRequiredBefore = 1;
00579       return;
00580     }
00581     // Hashes in the middle of a line can lead to any strange token
00582     // sequence.
00583     if (!CurrentToken->Tok.getIdentifierInfo())
00584       return;
00585     switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) {
00586     case tok::pp_include:
00587     case tok::pp_import:
00588       next();
00589       parseIncludeDirective();
00590       break;
00591     case tok::pp_error:
00592     case tok::pp_warning:
00593       parseWarningOrError();
00594       break;
00595     case tok::pp_pragma:
00596       parsePragma();
00597       break;
00598     case tok::pp_if:
00599     case tok::pp_elif:
00600       Contexts.back().IsExpression = true;
00601       parseLine();
00602       break;
00603     default:
00604       break;
00605     }
00606     while (CurrentToken)
00607       next();
00608   }
00609 
00610 public:
00611   LineType parseLine() {
00612     if (CurrentToken->is(tok::hash)) {
00613       parsePreprocessorDirective();
00614       return LT_PreprocessorDirective;
00615     }
00616 
00617     // Directly allow to 'import <string-literal>' to support protocol buffer
00618     // definitions (code.google.com/p/protobuf) or missing "#" (either way we
00619     // should not break the line).
00620     IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo();
00621     if (Info && Info->getPPKeywordID() == tok::pp_import &&
00622         CurrentToken->Next) {
00623       next();
00624       parseIncludeDirective();
00625       return LT_Other;
00626     }
00627 
00628     // If this line starts and ends in '<' and '>', respectively, it is likely
00629     // part of "#define <a/b.h>".
00630     if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) {
00631       parseIncludeDirective();
00632       return LT_Other;
00633     }
00634 
00635     while (CurrentToken) {
00636       if (CurrentToken->is(tok::kw_virtual))
00637         KeywordVirtualFound = true;
00638       if (!consumeToken())
00639         return LT_Invalid;
00640     }
00641     if (KeywordVirtualFound)
00642       return LT_VirtualFunctionDecl;
00643 
00644     if (Line.First->Type == TT_ObjCMethodSpecifier) {
00645       if (Contexts.back().FirstObjCSelectorName)
00646         Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName =
00647             Contexts.back().LongestObjCSelectorName;
00648       return LT_ObjCMethodDecl;
00649     }
00650 
00651     return LT_Other;
00652   }
00653 
00654 private:
00655   void resetTokenMetadata(FormatToken *Token) {
00656     if (!Token)
00657       return;
00658 
00659     // Reset token type in case we have already looked at it and then
00660     // recovered from an error (e.g. failure to find the matching >).
00661     if (CurrentToken->Type != TT_LambdaLSquare &&
00662         CurrentToken->Type != TT_FunctionLBrace &&
00663         CurrentToken->Type != TT_ImplicitStringLiteral &&
00664         CurrentToken->Type != TT_RegexLiteral &&
00665         CurrentToken->Type != TT_TrailingReturnArrow)
00666       CurrentToken->Type = TT_Unknown;
00667     CurrentToken->Role.reset();
00668     CurrentToken->FakeLParens.clear();
00669     CurrentToken->FakeRParens = 0;
00670   }
00671 
00672   void next() {
00673     if (CurrentToken) {
00674       CurrentToken->NestingLevel = Contexts.size() - 1;
00675       CurrentToken->BindingStrength = Contexts.back().BindingStrength;
00676       determineTokenType(*CurrentToken);
00677       CurrentToken = CurrentToken->Next;
00678     }
00679 
00680     resetTokenMetadata(CurrentToken);
00681   }
00682 
00683   /// \brief A struct to hold information valid in a specific context, e.g.
00684   /// a pair of parenthesis.
00685   struct Context {
00686     Context(tok::TokenKind ContextKind, unsigned BindingStrength,
00687             bool IsExpression)
00688         : ContextKind(ContextKind), BindingStrength(BindingStrength),
00689           LongestObjCSelectorName(0), ColonIsForRangeExpr(false),
00690           ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false),
00691           FirstObjCSelectorName(nullptr), FirstStartOfName(nullptr),
00692           IsExpression(IsExpression), CanBeExpression(true),
00693           InTemplateArgument(false), InCtorInitializer(false),
00694           CaretFound(false), IsForEachMacro(false) {}
00695 
00696     tok::TokenKind ContextKind;
00697     unsigned BindingStrength;
00698     unsigned LongestObjCSelectorName;
00699     bool ColonIsForRangeExpr;
00700     bool ColonIsDictLiteral;
00701     bool ColonIsObjCMethodExpr;
00702     FormatToken *FirstObjCSelectorName;
00703     FormatToken *FirstStartOfName;
00704     bool IsExpression;
00705     bool CanBeExpression;
00706     bool InTemplateArgument;
00707     bool InCtorInitializer;
00708     bool CaretFound;
00709     bool IsForEachMacro;
00710   };
00711 
00712   /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime
00713   /// of each instance.
00714   struct ScopedContextCreator {
00715     AnnotatingParser &P;
00716 
00717     ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind,
00718                          unsigned Increase)
00719         : P(P) {
00720       P.Contexts.push_back(Context(ContextKind,
00721                                    P.Contexts.back().BindingStrength + Increase,
00722                                    P.Contexts.back().IsExpression));
00723     }
00724 
00725     ~ScopedContextCreator() { P.Contexts.pop_back(); }
00726   };
00727 
00728   void determineTokenType(FormatToken &Current) {
00729     if (Current.getPrecedence() == prec::Assignment &&
00730         !Line.First->isOneOf(tok::kw_template, tok::kw_using) &&
00731         (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) {
00732       Contexts.back().IsExpression = true;
00733       for (FormatToken *Previous = Current.Previous;
00734            Previous && !Previous->isOneOf(tok::comma, tok::semi);
00735            Previous = Previous->Previous) {
00736         if (Previous->isOneOf(tok::r_square, tok::r_paren)) {
00737           Previous = Previous->MatchingParen;
00738           if (!Previous)
00739             break;
00740         }
00741         if ((Previous->Type == TT_BinaryOperator ||
00742              Previous->Type == TT_UnaryOperator) &&
00743             Previous->isOneOf(tok::star, tok::amp) && Previous->Previous &&
00744             Previous->Previous->isNot(tok::equal)) {
00745           Previous->Type = TT_PointerOrReference;
00746         }
00747       }
00748     } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) {
00749       Contexts.back().IsExpression = true;
00750     } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration &&
00751                !Line.InPPDirective &&
00752                (!Current.Previous ||
00753                 Current.Previous->isNot(tok::kw_decltype))) {
00754       bool ParametersOfFunctionType =
00755           Current.Previous && Current.Previous->is(tok::r_paren) &&
00756           Current.Previous->MatchingParen &&
00757           Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen;
00758       bool IsForOrCatch = Current.Previous &&
00759                           Current.Previous->isOneOf(tok::kw_for, tok::kw_catch);
00760       Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch;
00761     } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) {
00762       for (FormatToken *Previous = Current.Previous;
00763            Previous && Previous->isOneOf(tok::star, tok::amp);
00764            Previous = Previous->Previous)
00765         Previous->Type = TT_PointerOrReference;
00766       if (Line.MustBeDeclaration)
00767         Contexts.back().IsExpression = Contexts.front().InCtorInitializer;
00768     } else if (Current.Previous &&
00769                Current.Previous->Type == TT_CtorInitializerColon) {
00770       Contexts.back().IsExpression = true;
00771       Contexts.back().InCtorInitializer = true;
00772     } else if (Current.is(tok::kw_new)) {
00773       Contexts.back().CanBeExpression = false;
00774     } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) {
00775       // This should be the condition or increment in a for-loop.
00776       Contexts.back().IsExpression = true;
00777     }
00778 
00779     if (Current.Type == TT_Unknown) {
00780       // Line.MightBeFunctionDecl can only be true after the parentheses of a
00781       // function declaration have been found. In this case, 'Current' is a
00782       // trailing token of this declaration and thus cannot be a name.
00783       if (isStartOfName(Current) &&
00784           (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) {
00785         Contexts.back().FirstStartOfName = &Current;
00786         Current.Type = TT_StartOfName;
00787       } else if (Current.is(tok::kw_auto)) {
00788         AutoFound = true;
00789       } else if (Current.is(tok::arrow) && AutoFound &&
00790                  Line.MustBeDeclaration && Current.NestingLevel == 0) {
00791         Current.Type = TT_TrailingReturnArrow;
00792       } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) {
00793         Current.Type =
00794             determineStarAmpUsage(Current, Contexts.back().CanBeExpression &&
00795                                                Contexts.back().IsExpression,
00796                                   Contexts.back().InTemplateArgument);
00797       } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) {
00798         Current.Type = determinePlusMinusCaretUsage(Current);
00799         if (Current.Type == TT_UnaryOperator && Current.is(tok::caret))
00800           Contexts.back().CaretFound = true;
00801       } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) {
00802         Current.Type = determineIncrementUsage(Current);
00803       } else if (Current.isOneOf(tok::exclaim, tok::tilde)) {
00804         Current.Type = TT_UnaryOperator;
00805       } else if (Current.is(tok::question)) {
00806         Current.Type = TT_ConditionalExpr;
00807       } else if (Current.isBinaryOperator() &&
00808                  (!Current.Previous ||
00809                   Current.Previous->isNot(tok::l_square))) {
00810         Current.Type = TT_BinaryOperator;
00811       } else if (Current.is(tok::comment)) {
00812         if (Current.TokenText.startswith("//"))
00813           Current.Type = TT_LineComment;
00814         else
00815           Current.Type = TT_BlockComment;
00816       } else if (Current.is(tok::r_paren)) {
00817         if (rParenEndsCast(Current))
00818           Current.Type = TT_CastRParen;
00819       } else if (Current.is(tok::at) && Current.Next) {
00820         switch (Current.Next->Tok.getObjCKeywordID()) {
00821         case tok::objc_interface:
00822         case tok::objc_implementation:
00823         case tok::objc_protocol:
00824           Current.Type = TT_ObjCDecl;
00825           break;
00826         case tok::objc_property:
00827           Current.Type = TT_ObjCProperty;
00828           break;
00829         default:
00830           break;
00831         }
00832       } else if (Current.is(tok::period)) {
00833         FormatToken *PreviousNoComment = Current.getPreviousNonComment();
00834         if (PreviousNoComment &&
00835             PreviousNoComment->isOneOf(tok::comma, tok::l_brace))
00836           Current.Type = TT_DesignatedInitializerPeriod;
00837       } else if (Current.isOneOf(tok::identifier, tok::kw_const) &&
00838                  Current.Previous &&
00839                  !Current.Previous->isOneOf(tok::equal, tok::at) &&
00840                  Line.MightBeFunctionDecl && Contexts.size() == 1) {
00841         // Line.MightBeFunctionDecl can only be true after the parentheses of a
00842         // function declaration have been found.
00843         Current.Type = TT_TrailingAnnotation;
00844       } else if (Style.Language == FormatStyle::LK_Java && Current.Previous &&
00845                  Current.Previous->is(tok::at) &&
00846                  Current.isNot(Keywords.kw_interface)) {
00847         const FormatToken& AtToken = *Current.Previous;
00848         if (!AtToken.Previous ||
00849             AtToken.Previous->Type == TT_LeadingJavaAnnotation)
00850           Current.Type = TT_LeadingJavaAnnotation;
00851         else
00852           Current.Type = TT_JavaAnnotation;
00853       }
00854     }
00855   }
00856 
00857   /// \brief Take a guess at whether \p Tok starts a name of a function or
00858   /// variable declaration.
00859   ///
00860   /// This is a heuristic based on whether \p Tok is an identifier following
00861   /// something that is likely a type.
00862   bool isStartOfName(const FormatToken &Tok) {
00863     if (Tok.isNot(tok::identifier) || !Tok.Previous)
00864       return false;
00865 
00866     // Skip "const" as it does not have an influence on whether this is a name.
00867     FormatToken *PreviousNotConst = Tok.Previous;
00868     while (PreviousNotConst && PreviousNotConst->is(tok::kw_const))
00869       PreviousNotConst = PreviousNotConst->Previous;
00870 
00871     if (!PreviousNotConst)
00872       return false;
00873 
00874     bool IsPPKeyword = PreviousNotConst->is(tok::identifier) &&
00875                        PreviousNotConst->Previous &&
00876                        PreviousNotConst->Previous->is(tok::hash);
00877 
00878     if (PreviousNotConst->Type == TT_TemplateCloser)
00879       return PreviousNotConst && PreviousNotConst->MatchingParen &&
00880              PreviousNotConst->MatchingParen->Previous &&
00881              PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template);
00882 
00883     if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen &&
00884         PreviousNotConst->MatchingParen->Previous &&
00885         PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype))
00886       return true;
00887 
00888     return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) ||
00889            PreviousNotConst->Type == TT_PointerOrReference ||
00890            PreviousNotConst->isSimpleTypeSpecifier();
00891   }
00892 
00893   /// \brief Determine whether ')' is ending a cast.
00894   bool rParenEndsCast(const FormatToken &Tok) {
00895     FormatToken *LeftOfParens = nullptr;
00896     if (Tok.MatchingParen)
00897       LeftOfParens = Tok.MatchingParen->getPreviousNonComment();
00898     if (LeftOfParens && LeftOfParens->is(tok::r_paren) &&
00899         LeftOfParens->MatchingParen)
00900       LeftOfParens = LeftOfParens->MatchingParen->Previous;
00901     if (LeftOfParens && LeftOfParens->is(tok::r_square) &&
00902         LeftOfParens->MatchingParen &&
00903         LeftOfParens->MatchingParen->Type == TT_LambdaLSquare)
00904       return false;
00905     bool IsCast = false;
00906     bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen;
00907     bool ParensAreType = !Tok.Previous ||
00908                          Tok.Previous->Type == TT_PointerOrReference ||
00909                          Tok.Previous->Type == TT_TemplateCloser ||
00910                          Tok.Previous->isSimpleTypeSpecifier();
00911     if (Style.Language == FormatStyle::LK_JavaScript && Tok.Next &&
00912         Tok.Next->is(Keywords.kw_in))
00913       return false;
00914     bool ParensCouldEndDecl =
00915         Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace);
00916     bool IsSizeOfOrAlignOf =
00917         LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof);
00918     if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf &&
00919         ((Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression) ||
00920          (Tok.Next && Tok.Next->isBinaryOperator())))
00921       IsCast = true;
00922     else if (Tok.Next && Tok.Next->isNot(tok::string_literal) &&
00923              (Tok.Next->Tok.isLiteral() ||
00924               Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof)))
00925       IsCast = true;
00926     // If there is an identifier after the (), it is likely a cast, unless
00927     // there is also an identifier before the ().
00928     else if (LeftOfParens &&
00929              (LeftOfParens->Tok.getIdentifierInfo() == nullptr ||
00930               LeftOfParens->is(tok::kw_return)) &&
00931              LeftOfParens->Type != TT_OverloadedOperator &&
00932              LeftOfParens->isNot(tok::at) &&
00933              LeftOfParens->Type != TT_TemplateCloser && Tok.Next) {
00934       if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) {
00935         IsCast = true;
00936       } else {
00937         // Use heuristics to recognize c style casting.
00938         FormatToken *Prev = Tok.Previous;
00939         if (Prev && Prev->isOneOf(tok::amp, tok::star))
00940           Prev = Prev->Previous;
00941 
00942         if (Prev && Tok.Next && Tok.Next->Next) {
00943           bool NextIsUnary = Tok.Next->isUnaryOperator() ||
00944                              Tok.Next->isOneOf(tok::amp, tok::star);
00945           IsCast =
00946               NextIsUnary && !Tok.Next->is(tok::plus) &&
00947               Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant);
00948         }
00949 
00950         for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) {
00951           if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) {
00952             IsCast = false;
00953             break;
00954           }
00955         }
00956       }
00957     }
00958     return IsCast && !ParensAreEmpty;
00959   }
00960 
00961   /// \brief Return the type of the given token assuming it is * or &.
00962   TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression,
00963                                   bool InTemplateArgument) {
00964     if (Style.Language == FormatStyle::LK_JavaScript)
00965       return TT_BinaryOperator;
00966 
00967     const FormatToken *PrevToken = Tok.getPreviousNonComment();
00968     if (!PrevToken)
00969       return TT_UnaryOperator;
00970 
00971     const FormatToken *NextToken = Tok.getNextNonComment();
00972     if (!NextToken || NextToken->is(tok::l_brace))
00973       return TT_Unknown;
00974 
00975     if (PrevToken->is(tok::coloncolon))
00976       return TT_PointerOrReference;
00977 
00978     if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace,
00979                            tok::comma, tok::semi, tok::kw_return, tok::colon,
00980                            tok::equal, tok::kw_delete, tok::kw_sizeof) ||
00981         PrevToken->Type == TT_BinaryOperator ||
00982         PrevToken->Type == TT_ConditionalExpr ||
00983         PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen)
00984       return TT_UnaryOperator;
00985 
00986     if (NextToken->is(tok::l_square) && NextToken->Type != TT_LambdaLSquare)
00987       return TT_PointerOrReference;
00988     if (NextToken->isOneOf(tok::kw_operator, tok::comma))
00989       return TT_PointerOrReference;
00990 
00991     if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen &&
00992         PrevToken->MatchingParen->Previous &&
00993         PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof,
00994                                                     tok::kw_decltype))
00995       return TT_PointerOrReference;
00996 
00997     if (PrevToken->Tok.isLiteral() ||
00998         PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true,
00999                            tok::kw_false, tok::r_brace) ||
01000         NextToken->Tok.isLiteral() ||
01001         NextToken->isOneOf(tok::kw_true, tok::kw_false) ||
01002         NextToken->isUnaryOperator() ||
01003         // If we know we're in a template argument, there are no named
01004         // declarations. Thus, having an identifier on the right-hand side
01005         // indicates a binary operator.
01006         (InTemplateArgument && NextToken->Tok.isAnyIdentifier()))
01007       return TT_BinaryOperator;
01008 
01009     // "&&(" is quite unlikely to be two successive unary "&".
01010     if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren))
01011       return TT_BinaryOperator;
01012 
01013     // This catches some cases where evaluation order is used as control flow:
01014     //   aaa && aaa->f();
01015     const FormatToken *NextNextToken = NextToken->getNextNonComment();
01016     if (NextNextToken && NextNextToken->is(tok::arrow))
01017       return TT_BinaryOperator;
01018 
01019     // It is very unlikely that we are going to find a pointer or reference type
01020     // definition on the RHS of an assignment.
01021     if (IsExpression)
01022       return TT_BinaryOperator;
01023 
01024     return TT_PointerOrReference;
01025   }
01026 
01027   TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) {
01028     const FormatToken *PrevToken = Tok.getPreviousNonComment();
01029     if (!PrevToken || PrevToken->Type == TT_CastRParen)
01030       return TT_UnaryOperator;
01031 
01032     // Use heuristics to recognize unary operators.
01033     if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square,
01034                            tok::question, tok::colon, tok::kw_return,
01035                            tok::kw_case, tok::at, tok::l_brace))
01036       return TT_UnaryOperator;
01037 
01038     // There can't be two consecutive binary operators.
01039     if (PrevToken->Type == TT_BinaryOperator)
01040       return TT_UnaryOperator;
01041 
01042     // Fall back to marking the token as binary operator.
01043     return TT_BinaryOperator;
01044   }
01045 
01046   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
01047   TokenType determineIncrementUsage(const FormatToken &Tok) {
01048     const FormatToken *PrevToken = Tok.getPreviousNonComment();
01049     if (!PrevToken || PrevToken->Type == TT_CastRParen)
01050       return TT_UnaryOperator;
01051     if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier))
01052       return TT_TrailingUnaryOperator;
01053 
01054     return TT_UnaryOperator;
01055   }
01056 
01057   SmallVector<Context, 8> Contexts;
01058 
01059   const FormatStyle &Style;
01060   AnnotatedLine &Line;
01061   FormatToken *CurrentToken;
01062   bool KeywordVirtualFound;
01063   bool AutoFound;
01064   const AdditionalKeywords &Keywords;
01065 };
01066 
01067 static int PrecedenceUnaryOperator = prec::PointerToMember + 1;
01068 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2;
01069 
01070 /// \brief Parses binary expressions by inserting fake parenthesis based on
01071 /// operator precedence.
01072 class ExpressionParser {
01073 public:
01074   ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords,
01075                    AnnotatedLine &Line)
01076       : Style(Style), Keywords(Keywords), Current(Line.First) {}
01077 
01078   /// \brief Parse expressions with the given operatore precedence.
01079   void parse(int Precedence = 0) {
01080     // Skip 'return' and ObjC selector colons as they are not part of a binary
01081     // expression.
01082     while (Current &&
01083            (Current->is(tok::kw_return) ||
01084             (Current->is(tok::colon) && (Current->Type == TT_ObjCMethodExpr ||
01085                                          Current->Type == TT_DictLiteral))))
01086       next();
01087 
01088     if (!Current || Precedence > PrecedenceArrowAndPeriod)
01089       return;
01090 
01091     // Conditional expressions need to be parsed separately for proper nesting.
01092     if (Precedence == prec::Conditional) {
01093       parseConditionalExpr();
01094       return;
01095     }
01096 
01097     // Parse unary operators, which all have a higher precedence than binary
01098     // operators.
01099     if (Precedence == PrecedenceUnaryOperator) {
01100       parseUnaryOperator();
01101       return;
01102     }
01103 
01104     FormatToken *Start = Current;
01105     FormatToken *LatestOperator = nullptr;
01106     unsigned OperatorIndex = 0;
01107 
01108     while (Current) {
01109       // Consume operators with higher precedence.
01110       parse(Precedence + 1);
01111 
01112       int CurrentPrecedence = getCurrentPrecedence();
01113 
01114       if (Current && Current->Type == TT_SelectorName &&
01115           Precedence == CurrentPrecedence) {
01116         if (LatestOperator)
01117           addFakeParenthesis(Start, prec::Level(Precedence));
01118         Start = Current;
01119       }
01120 
01121       // At the end of the line or when an operator with higher precedence is
01122       // found, insert fake parenthesis and return.
01123       if (!Current || (Current->closesScope() && Current->MatchingParen) ||
01124           (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) ||
01125           (CurrentPrecedence == prec::Conditional &&
01126            Precedence == prec::Assignment && Current->is(tok::colon))) {
01127         if (LatestOperator) {
01128           LatestOperator->LastOperator = true;
01129           if (Precedence == PrecedenceArrowAndPeriod) {
01130             // Call expressions don't have a binary operator precedence.
01131             addFakeParenthesis(Start, prec::Unknown);
01132           } else {
01133             addFakeParenthesis(Start, prec::Level(Precedence));
01134           }
01135         }
01136         return;
01137       }
01138 
01139       // Consume scopes: (), [], <> and {}
01140       if (Current->opensScope()) {
01141         while (Current && !Current->closesScope()) {
01142           next();
01143           parse();
01144         }
01145         next();
01146       } else {
01147         // Operator found.
01148         if (CurrentPrecedence == Precedence) {
01149           LatestOperator = Current;
01150           Current->OperatorIndex = OperatorIndex;
01151           ++OperatorIndex;
01152         }
01153         next(/*SkipPastLeadingComments=*/Precedence > 0);
01154       }
01155     }
01156   }
01157 
01158 private:
01159   /// \brief Gets the precedence (+1) of the given token for binary operators
01160   /// and other tokens that we treat like binary operators.
01161   int getCurrentPrecedence() {
01162     if (Current) {
01163       const FormatToken *NextNonComment = Current->getNextNonComment();
01164       if (Current->Type == TT_ConditionalExpr)
01165         return prec::Conditional;
01166       else if (NextNonComment && NextNonComment->is(tok::colon) &&
01167                NextNonComment->Type == TT_DictLiteral)
01168         return prec::Comma;
01169       else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon ||
01170                Current->Type == TT_SelectorName ||
01171                (Current->is(tok::comment) && NextNonComment &&
01172                 NextNonComment->Type == TT_SelectorName))
01173         return 0;
01174       else if (Current->Type == TT_RangeBasedForLoopColon)
01175         return prec::Comma;
01176       else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma))
01177         return Current->getPrecedence();
01178       else if (Current->isOneOf(tok::period, tok::arrow))
01179         return PrecedenceArrowAndPeriod;
01180       else if (Style.Language == FormatStyle::LK_Java &&
01181                Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements))
01182         return 0;
01183     }
01184     return -1;
01185   }
01186 
01187   void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) {
01188     Start->FakeLParens.push_back(Precedence);
01189     if (Precedence > prec::Unknown)
01190       Start->StartsBinaryExpression = true;
01191     if (Current) {
01192       FormatToken *Previous = Current->Previous;
01193       if (Previous->is(tok::comment) && Previous->Previous)
01194         Previous = Previous->Previous;
01195       ++Previous->FakeRParens;
01196       if (Precedence > prec::Unknown)
01197         Previous->EndsBinaryExpression = true;
01198     }
01199   }
01200 
01201   /// \brief Parse unary operator expressions and surround them with fake
01202   /// parentheses if appropriate.
01203   void parseUnaryOperator() {
01204     if (!Current || Current->Type != TT_UnaryOperator) {
01205       parse(PrecedenceArrowAndPeriod);
01206       return;
01207     }
01208 
01209     FormatToken *Start = Current;
01210     next();
01211     parseUnaryOperator();
01212 
01213     // The actual precedence doesn't matter.
01214     addFakeParenthesis(Start, prec::Unknown);
01215   }
01216 
01217   void parseConditionalExpr() {
01218     while (Current && Current->isTrailingComment()) {
01219       next();
01220     }
01221     FormatToken *Start = Current;
01222     parse(prec::LogicalOr);
01223     if (!Current || !Current->is(tok::question))
01224       return;
01225     next();
01226     parse(prec::Assignment);
01227     if (!Current || Current->Type != TT_ConditionalExpr)
01228       return;
01229     next();
01230     parse(prec::Assignment);
01231     addFakeParenthesis(Start, prec::Conditional);
01232   }
01233 
01234   void next(bool SkipPastLeadingComments = true) {
01235     if (Current)
01236       Current = Current->Next;
01237     while (Current &&
01238            (Current->NewlinesBefore == 0 || SkipPastLeadingComments) &&
01239            Current->isTrailingComment())
01240       Current = Current->Next;
01241   }
01242 
01243   const FormatStyle &Style;
01244   const AdditionalKeywords &Keywords;
01245   FormatToken *Current;
01246 };
01247 
01248 } // end anonymous namespace
01249 
01250 void
01251 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) {
01252   const AnnotatedLine *NextNonCommentLine = nullptr;
01253   for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(),
01254                                                           E = Lines.rend();
01255        I != E; ++I) {
01256     if (NextNonCommentLine && (*I)->First->is(tok::comment) &&
01257         (*I)->First->Next == nullptr)
01258       (*I)->Level = NextNonCommentLine->Level;
01259     else
01260       NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr;
01261 
01262     setCommentLineLevels((*I)->Children);
01263   }
01264 }
01265 
01266 void TokenAnnotator::annotate(AnnotatedLine &Line) {
01267   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
01268                                                   E = Line.Children.end();
01269        I != E; ++I) {
01270     annotate(**I);
01271   }
01272   AnnotatingParser Parser(Style, Line, Keywords);
01273   Line.Type = Parser.parseLine();
01274   if (Line.Type == LT_Invalid)
01275     return;
01276 
01277   ExpressionParser ExprParser(Style, Keywords, Line);
01278   ExprParser.parse();
01279 
01280   if (Line.First->Type == TT_ObjCMethodSpecifier)
01281     Line.Type = LT_ObjCMethodDecl;
01282   else if (Line.First->Type == TT_ObjCDecl)
01283     Line.Type = LT_ObjCDecl;
01284   else if (Line.First->Type == TT_ObjCProperty)
01285     Line.Type = LT_ObjCProperty;
01286 
01287   Line.First->SpacesRequiredBefore = 1;
01288   Line.First->CanBreakBefore = Line.First->MustBreakBefore;
01289 }
01290 
01291 // This function heuristically determines whether 'Current' starts the name of a
01292 // function declaration.
01293 static bool isFunctionDeclarationName(const FormatToken &Current) {
01294   if (Current.Type != TT_StartOfName ||
01295       Current.NestingLevel != 0)
01296     return false;
01297   const FormatToken *Next = Current.Next;
01298   for (; Next; Next = Next->Next) {
01299     if (Next->Type == TT_TemplateOpener) {
01300       Next = Next->MatchingParen;
01301     } else if (Next->is(tok::coloncolon)) {
01302       Next = Next->Next;
01303       if (!Next || !Next->is(tok::identifier))
01304         return false;
01305     } else if (Next->is(tok::l_paren)) {
01306       break;
01307     } else {
01308       return false;
01309     }
01310   }
01311   if (!Next)
01312     return false;
01313   assert(Next->is(tok::l_paren));
01314   if (Next->Next == Next->MatchingParen)
01315     return true;
01316   for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen;
01317        Tok = Tok->Next) {
01318     if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() ||
01319         Tok->Type == TT_PointerOrReference || Tok->Type == TT_StartOfName)
01320       return true;
01321     if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral())
01322       return false;
01323   }
01324   return false;
01325 }
01326 
01327 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) {
01328   for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(),
01329                                                   E = Line.Children.end();
01330        I != E; ++I) {
01331     calculateFormattingInformation(**I);
01332   }
01333 
01334   Line.First->TotalLength =
01335       Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth;
01336   if (!Line.First->Next)
01337     return;
01338   FormatToken *Current = Line.First->Next;
01339   bool InFunctionDecl = Line.MightBeFunctionDecl;
01340   while (Current) {
01341     if (isFunctionDeclarationName(*Current))
01342       Current->Type = TT_FunctionDeclarationName;
01343     if (Current->Type == TT_LineComment) {
01344       if (Current->Previous->BlockKind == BK_BracedInit &&
01345           Current->Previous->opensScope())
01346         Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1;
01347       else
01348         Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments;
01349 
01350       // If we find a trailing comment, iterate backwards to determine whether
01351       // it seems to relate to a specific parameter. If so, break before that
01352       // parameter to avoid changing the comment's meaning. E.g. don't move 'b'
01353       // to the previous line in:
01354       //   SomeFunction(a,
01355       //                b, // comment
01356       //                c);
01357       if (!Current->HasUnescapedNewline) {
01358         for (FormatToken *Parameter = Current->Previous; Parameter;
01359              Parameter = Parameter->Previous) {
01360           if (Parameter->isOneOf(tok::comment, tok::r_brace))
01361             break;
01362           if (Parameter->Previous && Parameter->Previous->is(tok::comma)) {
01363             if (Parameter->Previous->Type != TT_CtorInitializerComma &&
01364                 Parameter->HasUnescapedNewline)
01365               Parameter->MustBreakBefore = true;
01366             break;
01367           }
01368         }
01369       }
01370     } else if (Current->SpacesRequiredBefore == 0 &&
01371                spaceRequiredBefore(Line, *Current)) {
01372       Current->SpacesRequiredBefore = 1;
01373     }
01374 
01375     Current->MustBreakBefore =
01376         Current->MustBreakBefore || mustBreakBefore(Line, *Current);
01377 
01378     if (Style.AlwaysBreakAfterDefinitionReturnType &&
01379         InFunctionDecl && Current->Type == TT_FunctionDeclarationName &&
01380         !Line.Last->isOneOf(tok::semi, tok::comment))  // Only for definitions.
01381       // FIXME: Line.Last points to other characters than tok::semi
01382       // and tok::lbrace.
01383       Current->MustBreakBefore = true;
01384 
01385     Current->CanBreakBefore =
01386         Current->MustBreakBefore || canBreakBefore(Line, *Current);
01387     unsigned ChildSize = 0;
01388     if (Current->Previous->Children.size() == 1) {
01389       FormatToken &LastOfChild = *Current->Previous->Children[0]->Last;
01390       ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit
01391                                                   : LastOfChild.TotalLength + 1;
01392     }
01393     const FormatToken *Prev= Current->Previous;
01394     if (Current->MustBreakBefore || Prev->Children.size() > 1 ||
01395         (Prev->Children.size() == 1 &&
01396          Prev->Children[0]->First->MustBreakBefore) ||
01397         Current->IsMultiline)
01398       Current->TotalLength = Prev->TotalLength + Style.ColumnLimit;
01399     else
01400       Current->TotalLength = Prev->TotalLength + Current->ColumnWidth +
01401                              ChildSize + Current->SpacesRequiredBefore;
01402 
01403     if (Current->Type == TT_CtorInitializerColon)
01404       InFunctionDecl = false;
01405 
01406     // FIXME: Only calculate this if CanBreakBefore is true once static
01407     // initializers etc. are sorted out.
01408     // FIXME: Move magic numbers to a better place.
01409     Current->SplitPenalty = 20 * Current->BindingStrength +
01410                             splitPenalty(Line, *Current, InFunctionDecl);
01411 
01412     Current = Current->Next;
01413   }
01414 
01415   calculateUnbreakableTailLengths(Line);
01416   for (Current = Line.First; Current != nullptr; Current = Current->Next) {
01417     if (Current->Role)
01418       Current->Role->precomputeFormattingInfos(Current);
01419   }
01420 
01421   DEBUG({ printDebugInfo(Line); });
01422 }
01423 
01424 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) {
01425   unsigned UnbreakableTailLength = 0;
01426   FormatToken *Current = Line.Last;
01427   while (Current) {
01428     Current->UnbreakableTailLength = UnbreakableTailLength;
01429     if (Current->CanBreakBefore ||
01430         Current->isOneOf(tok::comment, tok::string_literal)) {
01431       UnbreakableTailLength = 0;
01432     } else {
01433       UnbreakableTailLength +=
01434           Current->ColumnWidth + Current->SpacesRequiredBefore;
01435     }
01436     Current = Current->Previous;
01437   }
01438 }
01439 
01440 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line,
01441                                       const FormatToken &Tok,
01442                                       bool InFunctionDecl) {
01443   const FormatToken &Left = *Tok.Previous;
01444   const FormatToken &Right = Tok;
01445 
01446   if (Left.is(tok::semi))
01447     return 0;
01448 
01449   if (Style.Language == FormatStyle::LK_Java) {
01450     if (Left.Type == TT_LeadingJavaAnnotation)
01451       return 1;
01452     if (Right.is(Keywords.kw_extends))
01453       return 1;
01454     if (Right.is(Keywords.kw_implements))
01455       return 2;
01456     if (Left.is(tok::comma) && Left.NestingLevel == 0)
01457       return 3;
01458   }
01459 
01460   if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next &&
01461                               Right.Next->Type == TT_DictLiteral))
01462     return 1;
01463   if (Right.is(tok::l_square)) {
01464     if (Style.Language == FormatStyle::LK_Proto)
01465       return 1;
01466     if (Right.Type != TT_ObjCMethodExpr && Right.Type != TT_LambdaLSquare)
01467       return 500;
01468   }
01469 
01470   if (Right.Type == TT_StartOfName ||
01471       Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) {
01472     if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt)
01473       return 3;
01474     if (Left.Type == TT_StartOfName)
01475       return 20;
01476     if (InFunctionDecl && Right.NestingLevel == 0)
01477       return Style.PenaltyReturnTypeOnItsOwnLine;
01478     return 200;
01479   }
01480   if (Left.is(tok::equal) && Right.is(tok::l_brace))
01481     return 150;
01482   if (Left.Type == TT_CastRParen)
01483     return 100;
01484   if (Left.is(tok::coloncolon) ||
01485       (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto))
01486     return 500;
01487   if (Left.isOneOf(tok::kw_class, tok::kw_struct))
01488     return 5000;
01489 
01490   if (Left.Type == TT_RangeBasedForLoopColon ||
01491       Left.Type == TT_InheritanceColon)
01492     return 2;
01493 
01494   if (Right.isMemberAccess()) {
01495     if (Left.is(tok::r_paren) && Left.MatchingParen &&
01496         Left.MatchingParen->ParameterCount > 0)
01497       return 20; // Should be smaller than breaking at a nested comma.
01498     return 150;
01499   }
01500 
01501   if (Right.Type == TT_TrailingAnnotation &&
01502       (!Right.Next || Right.Next->isNot(tok::l_paren))) {
01503     // Moving trailing annotations to the next line is fine for ObjC method
01504     // declarations.
01505     if (Line.First->Type == TT_ObjCMethodSpecifier)
01506 
01507       return 10;
01508     // Generally, breaking before a trailing annotation is bad unless it is
01509     // function-like. It seems to be especially preferable to keep standard
01510     // annotations (i.e. "const", "final" and "override") on the same line.
01511     // Use a slightly higher penalty after ")" so that annotations like
01512     // "const override" are kept together.
01513     bool is_short_annotation = Right.TokenText.size() < 10;
01514     return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0);
01515   }
01516 
01517   // In for-loops, prefer breaking at ',' and ';'.
01518   if (Line.First->is(tok::kw_for) && Left.is(tok::equal))
01519     return 4;
01520 
01521   // In Objective-C method expressions, prefer breaking before "param:" over
01522   // breaking after it.
01523   if (Right.Type == TT_SelectorName)
01524     return 0;
01525   if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr)
01526     return Line.MightBeFunctionDecl ? 50 : 500;
01527 
01528   if (Left.is(tok::l_paren) && InFunctionDecl)
01529     return 100;
01530   if (Left.is(tok::equal) && InFunctionDecl)
01531     return 110;
01532   if (Right.is(tok::r_brace))
01533     return 1;
01534   if (Left.Type == TT_TemplateOpener)
01535     return 100;
01536   if (Left.opensScope())
01537     return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter
01538                                    : 19;
01539 
01540   if (Right.is(tok::lessless)) {
01541     if (Left.is(tok::string_literal)) {
01542       StringRef Content = Left.TokenText;
01543       if (Content.startswith("\""))
01544         Content = Content.drop_front(1);
01545       if (Content.endswith("\""))
01546         Content = Content.drop_back(1);
01547       Content = Content.trim();
01548       if (Content.size() > 1 &&
01549           (Content.back() == ':' || Content.back() == '='))
01550         return 25;
01551     }
01552     return 1; // Breaking at a << is really cheap.
01553   }
01554   if (Left.Type == TT_ConditionalExpr)
01555     return prec::Conditional;
01556   prec::Level Level = Left.getPrecedence();
01557 
01558   if (Level != prec::Unknown)
01559     return Level;
01560 
01561   return 3;
01562 }
01563 
01564 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
01565                                           const FormatToken &Left,
01566                                           const FormatToken &Right) {
01567   if (Left.is(tok::kw_return) && Right.isNot(tok::semi))
01568     return true;
01569   if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty &&
01570       Left.Tok.getObjCKeywordID() == tok::objc_property)
01571     return true;
01572   if (Right.is(tok::hashhash))
01573     return Left.is(tok::hash);
01574   if (Left.isOneOf(tok::hashhash, tok::hash))
01575     return Right.is(tok::hash);
01576   if (Left.is(tok::l_paren) && Right.is(tok::r_paren))
01577     return Style.SpaceInEmptyParentheses;
01578   if (Left.is(tok::l_paren) || Right.is(tok::r_paren))
01579     return (Right.Type == TT_CastRParen ||
01580             (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen))
01581                ? Style.SpacesInCStyleCastParentheses
01582                : Style.SpacesInParentheses;
01583   if (Right.isOneOf(tok::semi, tok::comma))
01584     return false;
01585   if (Right.is(tok::less) &&
01586       (Left.isOneOf(tok::kw_template, tok::r_paren) ||
01587        (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList)))
01588     return true;
01589   if (Left.isOneOf(tok::exclaim, tok::tilde))
01590     return false;
01591   if (Left.is(tok::at) &&
01592       Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant,
01593                     tok::numeric_constant, tok::l_paren, tok::l_brace,
01594                     tok::kw_true, tok::kw_false))
01595     return false;
01596   if (Left.is(tok::coloncolon))
01597     return false;
01598   if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less))
01599     return false;
01600   if (Right.is(tok::ellipsis))
01601     return Left.Tok.isLiteral();
01602   if (Left.is(tok::l_square) && Right.is(tok::amp))
01603     return false;
01604   if (Right.Type == TT_PointerOrReference)
01605     return Left.Tok.isLiteral() ||
01606            ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) &&
01607             Style.PointerAlignment != FormatStyle::PAS_Left);
01608   if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) &&
01609       (Left.Type != TT_PointerOrReference ||
01610        Style.PointerAlignment != FormatStyle::PAS_Right))
01611     return true;
01612   if (Left.Type == TT_PointerOrReference)
01613     return Right.Tok.isLiteral() || Right.Type == TT_BlockComment ||
01614            ((Right.Type != TT_PointerOrReference) &&
01615             Right.isNot(tok::l_paren) &&
01616             Style.PointerAlignment != FormatStyle::PAS_Right && Left.Previous &&
01617             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon));
01618   if (Right.is(tok::star) && Left.is(tok::l_paren))
01619     return false;
01620   if (Left.is(tok::l_square))
01621     return (Left.Type == TT_ArrayInitializerLSquare &&
01622             Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) ||
01623            (Left.Type == TT_ArraySubscriptLSquare &&
01624             Style.SpacesInSquareBrackets && Right.isNot(tok::r_square));
01625   if (Right.is(tok::r_square))
01626     return Right.MatchingParen &&
01627            ((Style.SpacesInContainerLiterals &&
01628              Right.MatchingParen->Type == TT_ArrayInitializerLSquare) ||
01629             (Style.SpacesInSquareBrackets &&
01630              Right.MatchingParen->Type == TT_ArraySubscriptLSquare));
01631   if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr &&
01632       Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant) &&
01633       Left.Type != TT_DictLiteral)
01634     return false;
01635   if (Left.is(tok::colon))
01636     return Left.Type != TT_ObjCMethodExpr;
01637   if (Left.is(tok::l_brace) && Right.is(tok::r_brace))
01638     return !Left.Children.empty(); // No spaces in "{}".
01639   if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) ||
01640       (Right.is(tok::r_brace) && Right.MatchingParen &&
01641        Right.MatchingParen->BlockKind != BK_Block))
01642     return !Style.Cpp11BracedListStyle;
01643   if (Left.Type == TT_BlockComment)
01644     return !Left.TokenText.endswith("=*/");
01645   if (Right.is(tok::l_paren)) {
01646     if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen)
01647       return true;
01648     return Line.Type == LT_ObjCDecl ||
01649            Left.isOneOf(tok::kw_new, tok::kw_delete, tok::semi) ||
01650            (Style.SpaceBeforeParens != FormatStyle::SBPO_Never &&
01651             (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while,
01652                           tok::kw_switch, tok::kw_case) ||
01653              (Left.is(tok::kw_catch) &&
01654               (!Left.Previous || Left.Previous->isNot(tok::period))) ||
01655              Left.IsForEachMacro)) ||
01656            (Style.SpaceBeforeParens == FormatStyle::SBPO_Always &&
01657             (Left.is(tok::identifier) || Left.isFunctionLikeKeyword()) &&
01658             Line.Type != LT_PreprocessorDirective);
01659   }
01660   if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword)
01661     return false;
01662   if (Right.Type == TT_UnaryOperator)
01663     return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) &&
01664            (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr);
01665   if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square,
01666                     tok::r_paren) ||
01667        Left.isSimpleTypeSpecifier()) &&
01668       Right.is(tok::l_brace) && Right.getNextNonComment() &&
01669       Right.BlockKind != BK_Block)
01670     return false;
01671   if (Left.is(tok::period) || Right.is(tok::period))
01672     return false;
01673   if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L")
01674     return false;
01675   if (Left.Type == TT_TemplateCloser && Left.MatchingParen &&
01676       Left.MatchingParen->Previous &&
01677       Left.MatchingParen->Previous->is(tok::period))
01678     // A.<B>DoSomething();
01679     return false;
01680   return true;
01681 }
01682 
01683 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
01684                                          const FormatToken &Right) {
01685   const FormatToken &Left = *Right.Previous;
01686   if (Style.Language == FormatStyle::LK_Proto) {
01687     if (Right.is(tok::period) &&
01688         Left.isOneOf(Keywords.kw_optional, Keywords.kw_required,
01689                      Keywords.kw_repeated))
01690       return true;
01691     if (Right.is(tok::l_paren) &&
01692         Left.isOneOf(Keywords.kw_returns, Keywords.kw_option))
01693       return true;
01694   } else if (Style.Language == FormatStyle::LK_JavaScript) {
01695     if (Left.is(Keywords.kw_var))
01696       return true;
01697   } else if (Style.Language == FormatStyle::LK_Java) {
01698     if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren))
01699       return Style.SpaceBeforeParens != FormatStyle::SBPO_Never;
01700     if (Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private,
01701                      tok::kw_protected) &&
01702         Right.Type == TT_TemplateOpener)
01703       return true;
01704   }
01705   if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
01706     return true; // Never ever merge two identifiers.
01707   if (Left.Type == TT_ImplicitStringLiteral)
01708     return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
01709   if (Line.Type == LT_ObjCMethodDecl) {
01710     if (Left.Type == TT_ObjCMethodSpecifier)
01711       return true;
01712     if (Left.is(tok::r_paren) && Right.is(tok::identifier))
01713       // Don't space between ')' and <id>
01714       return false;
01715   }
01716   if (Line.Type == LT_ObjCProperty &&
01717       (Right.is(tok::equal) || Left.is(tok::equal)))
01718     return false;
01719 
01720   if (Right.Type == TT_TrailingReturnArrow ||
01721       Left.Type == TT_TrailingReturnArrow)
01722     return true;
01723   if (Left.is(tok::comma))
01724     return true;
01725   if (Right.is(tok::comma))
01726     return false;
01727   if (Right.Type == TT_CtorInitializerColon || Right.Type == TT_ObjCBlockLParen)
01728     return true;
01729   if (Left.is(tok::kw_operator))
01730     return Right.is(tok::coloncolon);
01731   if (Right.Type == TT_OverloadedOperatorLParen)
01732     return false;
01733   if (Right.is(tok::colon))
01734     return !Line.First->isOneOf(tok::kw_case, tok::kw_default) &&
01735            Right.getNextNonComment() && Right.Type != TT_ObjCMethodExpr &&
01736            !Left.is(tok::question) &&
01737            !(Right.Type == TT_InlineASMColon && Left.is(tok::coloncolon)) &&
01738            (Right.Type != TT_DictLiteral || Style.SpacesInContainerLiterals);
01739   if (Left.Type == TT_UnaryOperator)
01740     return Right.Type == TT_BinaryOperator;
01741   if (Left.Type == TT_CastRParen)
01742     return Style.SpaceAfterCStyleCast || Right.Type == TT_BinaryOperator;
01743   if (Left.is(tok::greater) && Right.is(tok::greater)) {
01744     return Right.Type == TT_TemplateCloser && Left.Type == TT_TemplateCloser &&
01745            (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles);
01746   }
01747   if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) ||
01748       Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar))
01749     return false;
01750   if (!Style.SpaceBeforeAssignmentOperators &&
01751       Right.getPrecedence() == prec::Assignment)
01752     return false;
01753   if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace))
01754     return (Left.Type == TT_TemplateOpener &&
01755             Style.Standard == FormatStyle::LS_Cpp03) ||
01756            !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren) ||
01757              Left.Type == TT_TemplateCloser || Left.Type == TT_TemplateOpener);
01758   if ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser))
01759     return Style.SpacesInAngles;
01760   if ((Right.Type == TT_BinaryOperator && !Left.is(tok::l_paren)) ||
01761       Left.Type == TT_BinaryOperator || Left.Type == TT_ConditionalExpr)
01762     return true;
01763   if (Left.Type == TT_TemplateCloser && Right.is(tok::l_paren))
01764     return Style.SpaceBeforeParens == FormatStyle::SBPO_Always;
01765   if (Right.Type == TT_TemplateOpener && Left.is(tok::r_paren) &&
01766       Left.MatchingParen &&
01767       Left.MatchingParen->Type == TT_OverloadedOperatorLParen)
01768     return false;
01769   if (Right.is(tok::less) && Left.isNot(tok::l_paren) &&
01770       Line.First->is(tok::hash))
01771     return true;
01772   if (Right.Type == TT_TrailingUnaryOperator)
01773     return false;
01774   if (Left.Type == TT_RegexLiteral)
01775     return false;
01776   return spaceRequiredBetween(Line, Left, Right);
01777 }
01778 
01779 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style.
01780 static bool isAllmanBrace(const FormatToken &Tok) {
01781   return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block &&
01782          Tok.Type != TT_ObjCBlockLBrace && Tok.Type != TT_DictLiteral;
01783 }
01784 
01785 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line,
01786                                      const FormatToken &Right) {
01787   const FormatToken &Left = *Right.Previous;
01788   if (Right.NewlinesBefore > 1)
01789     return true;
01790 
01791   // If the last token before a '}' is a comma or a trailing comment, the
01792   // intention is to insert a line break after it in order to make shuffling
01793   // around entries easier.
01794   const FormatToken *BeforeClosingBrace = nullptr;
01795   if (Left.is(tok::l_brace) && Left.BlockKind != BK_Block && Left.MatchingParen)
01796     BeforeClosingBrace = Left.MatchingParen->Previous;
01797   else if (Right.is(tok::r_brace) && Right.BlockKind != BK_Block)
01798     BeforeClosingBrace = &Left;
01799   if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) ||
01800                              BeforeClosingBrace->isTrailingComment()))
01801     return true;
01802 
01803   if (Right.is(tok::comment)) {
01804     return Left.BlockKind != BK_BracedInit &&
01805            Left.Type != TT_CtorInitializerColon &&
01806            (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline);
01807   } else if (Right.Previous->isTrailingComment() ||
01808              (Right.isStringLiteral() && Right.Previous->isStringLiteral())) {
01809     return true;
01810   } else if (Right.Previous->IsUnterminatedLiteral) {
01811     return true;
01812   } else if (Right.is(tok::lessless) && Right.Next &&
01813              Right.Previous->is(tok::string_literal) &&
01814              Right.Next->is(tok::string_literal)) {
01815     return true;
01816   } else if (Right.Previous->ClosesTemplateDeclaration &&
01817              Right.Previous->MatchingParen &&
01818              Right.Previous->MatchingParen->NestingLevel == 0 &&
01819              Style.AlwaysBreakTemplateDeclarations) {
01820     return true;
01821   } else if ((Right.Type == TT_CtorInitializerComma ||
01822               Right.Type == TT_CtorInitializerColon) &&
01823              Style.BreakConstructorInitializersBeforeComma &&
01824              !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) {
01825     return true;
01826   } else if (Right.is(tok::string_literal) &&
01827              Right.TokenText.startswith("R\"")) {
01828     // Raw string literals are special wrt. line breaks. The author has made a
01829     // deliberate choice and might have aligned the contents of the string
01830     // literal accordingly. Thus, we try keep existing line breaks.
01831     return Right.NewlinesBefore > 0;
01832   } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 &&
01833              Style.Language == FormatStyle::LK_Proto) {
01834     // Don't put enums onto single lines in protocol buffers.
01835     return true;
01836   } else if (Style.Language == FormatStyle::LK_JavaScript &&
01837              Right.is(tok::r_brace) && Left.is(tok::l_brace) &&
01838              !Left.Children.empty()) {
01839     // Support AllowShortFunctionsOnASingleLine for JavaScript.
01840     return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None ||
01841            (Left.NestingLevel == 0 && Line.Level == 0 &&
01842             Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline);
01843   } else if (isAllmanBrace(Left) || isAllmanBrace(Right)) {
01844     return Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
01845            Style.BreakBeforeBraces == FormatStyle::BS_GNU;
01846   } else if (Style.Language == FormatStyle::LK_Proto &&
01847              Left.isNot(tok::l_brace) && Right.Type == TT_SelectorName) {
01848     return true;
01849   } else if (Left.Type == TT_ObjCBlockLBrace &&
01850              !Style.AllowShortBlocksOnASingleLine) {
01851     return true;
01852   }
01853 
01854   if (Style.Language == FormatStyle::LK_JavaScript) {
01855     // FIXME: This might apply to other languages and token kinds.
01856     if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous &&
01857         Left.Previous->is(tok::char_constant))
01858       return true;
01859   } else if (Style.Language == FormatStyle::LK_Java) {
01860     if (Left.Type == TT_LeadingJavaAnnotation && Right.isNot(tok::l_paren) &&
01861         Line.Last->is(tok::l_brace))
01862       return true;
01863     if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next &&
01864         Right.Next->is(tok::string_literal))
01865       return true;
01866   }
01867 
01868   return false;
01869 }
01870 
01871 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line,
01872                                     const FormatToken &Right) {
01873   const FormatToken &Left = *Right.Previous;
01874 
01875   if (Style.Language == FormatStyle::LK_Java) {
01876     if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
01877                      Keywords.kw_implements))
01878       return false;
01879     if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends,
01880                       Keywords.kw_implements))
01881       return true;
01882   }
01883 
01884   if (Left.is(tok::at))
01885     return false;
01886   if (Left.Tok.getObjCKeywordID() == tok::objc_interface)
01887     return false;
01888   if (Left.Type == TT_JavaAnnotation || Left.Type == TT_LeadingJavaAnnotation)
01889     return true;
01890   if (Right.Type == TT_StartOfName ||
01891       Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator))
01892     return true;
01893   if (Right.isTrailingComment())
01894     // We rely on MustBreakBefore being set correctly here as we should not
01895     // change the "binding" behavior of a comment.
01896     // The first comment in a braced lists is always interpreted as belonging to
01897     // the first list element. Otherwise, it should be placed outside of the
01898     // list.
01899     return Left.BlockKind == BK_BracedInit;
01900   if (Left.is(tok::question) && Right.is(tok::colon))
01901     return false;
01902   if (Right.Type == TT_ConditionalExpr || Right.is(tok::question))
01903     return Style.BreakBeforeTernaryOperators;
01904   if (Left.Type == TT_ConditionalExpr || Left.is(tok::question))
01905     return !Style.BreakBeforeTernaryOperators;
01906   if (Right.Type == TT_InheritanceColon)
01907     return true;
01908   if (Right.is(tok::colon) && (Right.Type != TT_CtorInitializerColon &&
01909                                Right.Type != TT_InlineASMColon))
01910     return false;
01911   if (Left.is(tok::colon) &&
01912       (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr))
01913     return true;
01914   if (Right.Type == TT_SelectorName)
01915     return true;
01916   if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty)
01917     return true;
01918   if (Left.ClosesTemplateDeclaration)
01919     return true;
01920   if (Right.Type == TT_RangeBasedForLoopColon ||
01921       Right.Type == TT_OverloadedOperatorLParen ||
01922       Right.Type == TT_OverloadedOperator)
01923     return false;
01924   if (Left.Type == TT_RangeBasedForLoopColon)
01925     return true;
01926   if (Right.Type == TT_RangeBasedForLoopColon)
01927     return false;
01928   if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser ||
01929       Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator))
01930     return false;
01931   if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl)
01932     return false;
01933   if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen)
01934     return false;
01935   if (Left.is(tok::l_paren) && Left.Previous &&
01936       (Left.Previous->Type == TT_BinaryOperator ||
01937        Left.Previous->Type == TT_CastRParen || Left.Previous->is(tok::kw_if)))
01938     return false;
01939   if (Right.Type == TT_ImplicitStringLiteral)
01940     return false;
01941 
01942   if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser)
01943     return false;
01944 
01945   // We only break before r_brace if there was a corresponding break before
01946   // the l_brace, which is tracked by BreakBeforeClosingBrace.
01947   if (Right.is(tok::r_brace))
01948     return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block;
01949 
01950   // Allow breaking after a trailing annotation, e.g. after a method
01951   // declaration.
01952   if (Left.Type == TT_TrailingAnnotation)
01953     return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren,
01954                           tok::less, tok::coloncolon);
01955 
01956   if (Right.is(tok::kw___attribute))
01957     return true;
01958 
01959   if (Left.is(tok::identifier) && Right.is(tok::string_literal))
01960     return true;
01961 
01962   if (Right.is(tok::identifier) && Right.Next &&
01963       Right.Next->Type == TT_DictLiteral)
01964     return true;
01965 
01966   if (Left.Type == TT_CtorInitializerComma &&
01967       Style.BreakConstructorInitializersBeforeComma)
01968     return false;
01969   if (Right.Type == TT_CtorInitializerComma &&
01970       Style.BreakConstructorInitializersBeforeComma)
01971     return true;
01972   if (Left.is(tok::greater) && Right.is(tok::greater) &&
01973       Left.Type != TT_TemplateCloser)
01974     return false;
01975   if (Right.Type == TT_BinaryOperator &&
01976       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None &&
01977       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All ||
01978        Right.getPrecedence() != prec::Assignment))
01979     return true;
01980   if (Left.Type == TT_ArrayInitializerLSquare)
01981     return true;
01982   if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const))
01983     return true;
01984   if (Left.isBinaryOperator() && !Left.isOneOf(tok::arrowstar, tok::lessless) &&
01985       Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All &&
01986       (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None ||
01987        Left.getPrecedence() == prec::Assignment))
01988     return true;
01989   return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace,
01990                       tok::kw_class, tok::kw_struct) ||
01991          Right.isMemberAccess() || Right.Type == TT_TrailingReturnArrow ||
01992          Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) ||
01993          (Left.is(tok::r_paren) &&
01994           Right.isOneOf(tok::identifier, tok::kw_const)) ||
01995          (Left.is(tok::l_paren) && !Right.is(tok::r_paren));
01996 }
01997 
01998 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) {
01999   llvm::errs() << "AnnotatedTokens:\n";
02000   const FormatToken *Tok = Line.First;
02001   while (Tok) {
02002     llvm::errs() << " M=" << Tok->MustBreakBefore
02003                  << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type
02004                  << " S=" << Tok->SpacesRequiredBefore
02005                  << " B=" << Tok->BlockParameterCount
02006                  << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName()
02007                  << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind
02008                  << " FakeLParens=";
02009     for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i)
02010       llvm::errs() << Tok->FakeLParens[i] << "/";
02011     llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n";
02012     if (!Tok->Next)
02013       assert(Tok == Line.Last);
02014     Tok = Tok->Next;
02015   }
02016   llvm::errs() << "----\n";
02017 }
02018 
02019 } // namespace format
02020 } // namespace clang