clang API Documentation
00001 //===--- TokenAnnotator.cpp - Format C++ code -----------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// 00010 /// \file 00011 /// \brief This file implements a token annotator, i.e. creates 00012 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 00013 /// 00014 //===----------------------------------------------------------------------===// 00015 00016 #include "TokenAnnotator.h" 00017 #include "clang/Basic/SourceManager.h" 00018 #include "llvm/Support/Debug.h" 00019 00020 #define DEBUG_TYPE "format-token-annotator" 00021 00022 namespace clang { 00023 namespace format { 00024 00025 namespace { 00026 00027 /// \brief A parser that gathers additional information about tokens. 00028 /// 00029 /// The \c TokenAnnotator tries to match parenthesis and square brakets and 00030 /// store a parenthesis levels. It also tries to resolve matching "<" and ">" 00031 /// into template parameter lists. 00032 class AnnotatingParser { 00033 public: 00034 AnnotatingParser(const FormatStyle &Style, AnnotatedLine &Line, 00035 const AdditionalKeywords &Keywords) 00036 : Style(Style), Line(Line), CurrentToken(Line.First), 00037 KeywordVirtualFound(false), AutoFound(false), Keywords(Keywords) { 00038 Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); 00039 resetTokenMetadata(CurrentToken); 00040 } 00041 00042 private: 00043 bool parseAngle() { 00044 if (!CurrentToken) 00045 return false; 00046 ScopedContextCreator ContextCreator(*this, tok::less, 10); 00047 FormatToken *Left = CurrentToken->Previous; 00048 Contexts.back().IsExpression = false; 00049 // If there's a template keyword before the opening angle bracket, this is a 00050 // template parameter, not an argument. 00051 Contexts.back().InTemplateArgument = 00052 Left->Previous && Left->Previous->Tok.isNot(tok::kw_template); 00053 00054 if (Style.Language == FormatStyle::LK_Java && 00055 CurrentToken->is(tok::question)) 00056 next(); 00057 00058 while (CurrentToken) { 00059 if (CurrentToken->is(tok::greater)) { 00060 Left->MatchingParen = CurrentToken; 00061 CurrentToken->MatchingParen = Left; 00062 CurrentToken->Type = TT_TemplateCloser; 00063 next(); 00064 return true; 00065 } 00066 if (CurrentToken->is(tok::question) && 00067 Style.Language == FormatStyle::LK_Java) { 00068 next(); 00069 continue; 00070 } 00071 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square, tok::r_brace, 00072 tok::colon, tok::question)) 00073 return false; 00074 // If a && or || is found and interpreted as a binary operator, this set 00075 // of angles is likely part of something like "a < b && c > d". If the 00076 // angles are inside an expression, the ||/&& might also be a binary 00077 // operator that was misinterpreted because we are parsing template 00078 // parameters. 00079 // FIXME: This is getting out of hand, write a decent parser. 00080 if (CurrentToken->Previous->isOneOf(tok::pipepipe, tok::ampamp) && 00081 CurrentToken->Previous->Type == TT_BinaryOperator && 00082 Contexts[Contexts.size() - 2].IsExpression && 00083 Line.First->isNot(tok::kw_template)) 00084 return false; 00085 updateParameterCount(Left, CurrentToken); 00086 if (!consumeToken()) 00087 return false; 00088 } 00089 return false; 00090 } 00091 00092 bool parseParens(bool LookForDecls = false) { 00093 if (!CurrentToken) 00094 return false; 00095 ScopedContextCreator ContextCreator(*this, tok::l_paren, 1); 00096 00097 // FIXME: This is a bit of a hack. Do better. 00098 Contexts.back().ColonIsForRangeExpr = 00099 Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr; 00100 00101 bool StartsObjCMethodExpr = false; 00102 FormatToken *Left = CurrentToken->Previous; 00103 if (CurrentToken->is(tok::caret)) { 00104 // (^ can start a block type. 00105 Left->Type = TT_ObjCBlockLParen; 00106 } else if (FormatToken *MaybeSel = Left->Previous) { 00107 // @selector( starts a selector. 00108 if (MaybeSel->isObjCAtKeyword(tok::objc_selector) && MaybeSel->Previous && 00109 MaybeSel->Previous->is(tok::at)) { 00110 StartsObjCMethodExpr = true; 00111 } 00112 } 00113 00114 if (Left->Previous && 00115 (Left->Previous->isOneOf(tok::kw_static_assert, tok::kw_if, 00116 tok::kw_while, tok::l_paren, tok::comma) || 00117 Left->Previous->Type == TT_BinaryOperator)) { 00118 // static_assert, if and while usually contain expressions. 00119 Contexts.back().IsExpression = true; 00120 } else if (Line.InPPDirective && 00121 (!Left->Previous || 00122 (Left->Previous->isNot(tok::identifier) && 00123 Left->Previous->Type != TT_OverloadedOperator))) { 00124 Contexts.back().IsExpression = true; 00125 } else if (Left->Previous && Left->Previous->is(tok::r_square) && 00126 Left->Previous->MatchingParen && 00127 Left->Previous->MatchingParen->Type == TT_LambdaLSquare) { 00128 // This is a parameter list of a lambda expression. 00129 Contexts.back().IsExpression = false; 00130 } else if (Contexts[Contexts.size() - 2].CaretFound) { 00131 // This is the parameter list of an ObjC block. 00132 Contexts.back().IsExpression = false; 00133 } else if (Left->Previous && Left->Previous->is(tok::kw___attribute)) { 00134 Left->Type = TT_AttributeParen; 00135 } else if (Left->Previous && Left->Previous->IsForEachMacro) { 00136 // The first argument to a foreach macro is a declaration. 00137 Contexts.back().IsForEachMacro = true; 00138 Contexts.back().IsExpression = false; 00139 } else if (Left->Previous && Left->Previous->MatchingParen && 00140 Left->Previous->MatchingParen->Type == TT_ObjCBlockLParen) { 00141 Contexts.back().IsExpression = false; 00142 } 00143 00144 if (StartsObjCMethodExpr) { 00145 Contexts.back().ColonIsObjCMethodExpr = true; 00146 Left->Type = TT_ObjCMethodExpr; 00147 } 00148 00149 bool MightBeFunctionType = CurrentToken->is(tok::star); 00150 bool HasMultipleLines = false; 00151 bool HasMultipleParametersOnALine = false; 00152 while (CurrentToken) { 00153 // LookForDecls is set when "if (" has been seen. Check for 00154 // 'identifier' '*' 'identifier' followed by not '=' -- this 00155 // '*' has to be a binary operator but determineStarAmpUsage() will 00156 // categorize it as an unary operator, so set the right type here. 00157 if (LookForDecls && CurrentToken->Next) { 00158 FormatToken *Prev = CurrentToken->getPreviousNonComment(); 00159 if (Prev) { 00160 FormatToken *PrevPrev = Prev->getPreviousNonComment(); 00161 FormatToken *Next = CurrentToken->Next; 00162 if (PrevPrev && PrevPrev->is(tok::identifier) && 00163 Prev->isOneOf(tok::star, tok::amp, tok::ampamp) && 00164 CurrentToken->is(tok::identifier) && Next->isNot(tok::equal)) { 00165 Prev->Type = TT_BinaryOperator; 00166 LookForDecls = false; 00167 } 00168 } 00169 } 00170 00171 if (CurrentToken->Previous->Type == TT_PointerOrReference && 00172 CurrentToken->Previous->Previous->isOneOf(tok::l_paren, 00173 tok::coloncolon)) 00174 MightBeFunctionType = true; 00175 if (CurrentToken->Previous->Type == TT_BinaryOperator) 00176 Contexts.back().IsExpression = true; 00177 if (CurrentToken->is(tok::r_paren)) { 00178 if (MightBeFunctionType && CurrentToken->Next && 00179 (CurrentToken->Next->is(tok::l_paren) || 00180 (CurrentToken->Next->is(tok::l_square) && 00181 !Contexts.back().IsExpression))) 00182 Left->Type = TT_FunctionTypeLParen; 00183 Left->MatchingParen = CurrentToken; 00184 CurrentToken->MatchingParen = Left; 00185 00186 if (StartsObjCMethodExpr) { 00187 CurrentToken->Type = TT_ObjCMethodExpr; 00188 if (Contexts.back().FirstObjCSelectorName) { 00189 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 00190 Contexts.back().LongestObjCSelectorName; 00191 } 00192 } 00193 00194 if (Left->Type == TT_AttributeParen) 00195 CurrentToken->Type = TT_AttributeParen; 00196 if (Left->Previous && Left->Previous->Type == TT_JavaAnnotation) 00197 CurrentToken->Type = TT_JavaAnnotation; 00198 if (Left->Previous && Left->Previous->Type == TT_LeadingJavaAnnotation) 00199 CurrentToken->Type = TT_LeadingJavaAnnotation; 00200 00201 if (!HasMultipleLines) 00202 Left->PackingKind = PPK_Inconclusive; 00203 else if (HasMultipleParametersOnALine) 00204 Left->PackingKind = PPK_BinPacked; 00205 else 00206 Left->PackingKind = PPK_OnePerLine; 00207 00208 next(); 00209 return true; 00210 } 00211 if (CurrentToken->isOneOf(tok::r_square, tok::r_brace)) 00212 return false; 00213 else if (CurrentToken->is(tok::l_brace)) 00214 Left->Type = TT_Unknown; // Not TT_ObjCBlockLParen 00215 if (CurrentToken->is(tok::comma) && CurrentToken->Next && 00216 !CurrentToken->Next->HasUnescapedNewline && 00217 !CurrentToken->Next->isTrailingComment()) 00218 HasMultipleParametersOnALine = true; 00219 if (CurrentToken->isOneOf(tok::kw_const, tok::kw_auto) || 00220 CurrentToken->isSimpleTypeSpecifier()) 00221 Contexts.back().IsExpression = false; 00222 FormatToken *Tok = CurrentToken; 00223 if (!consumeToken()) 00224 return false; 00225 updateParameterCount(Left, Tok); 00226 if (CurrentToken && CurrentToken->HasUnescapedNewline) 00227 HasMultipleLines = true; 00228 } 00229 return false; 00230 } 00231 00232 bool parseSquare() { 00233 if (!CurrentToken) 00234 return false; 00235 00236 // A '[' could be an index subscript (after an identifier or after 00237 // ')' or ']'), it could be the start of an Objective-C method 00238 // expression, or it could the the start of an Objective-C array literal. 00239 FormatToken *Left = CurrentToken->Previous; 00240 FormatToken *Parent = Left->getPreviousNonComment(); 00241 bool StartsObjCMethodExpr = 00242 Contexts.back().CanBeExpression && Left->Type != TT_LambdaLSquare && 00243 CurrentToken->isNot(tok::l_brace) && 00244 (!Parent || Parent->isOneOf(tok::colon, tok::l_square, tok::l_paren, 00245 tok::kw_return, tok::kw_throw) || 00246 Parent->isUnaryOperator() || Parent->Type == TT_ObjCForIn || 00247 Parent->Type == TT_CastRParen || 00248 getBinOpPrecedence(Parent->Tok.getKind(), true, true) > prec::Unknown); 00249 ScopedContextCreator ContextCreator(*this, tok::l_square, 10); 00250 Contexts.back().IsExpression = true; 00251 bool ColonFound = false; 00252 00253 if (StartsObjCMethodExpr) { 00254 Contexts.back().ColonIsObjCMethodExpr = true; 00255 Left->Type = TT_ObjCMethodExpr; 00256 } else if (Parent && Parent->is(tok::at)) { 00257 Left->Type = TT_ArrayInitializerLSquare; 00258 } else if (Left->Type == TT_Unknown) { 00259 Left->Type = TT_ArraySubscriptLSquare; 00260 } 00261 00262 while (CurrentToken) { 00263 if (CurrentToken->is(tok::r_square)) { 00264 if (CurrentToken->Next && CurrentToken->Next->is(tok::l_paren) && 00265 Left->Type == TT_ObjCMethodExpr) { 00266 // An ObjC method call is rarely followed by an open parenthesis. 00267 // FIXME: Do we incorrectly label ":" with this? 00268 StartsObjCMethodExpr = false; 00269 Left->Type = TT_Unknown; 00270 } 00271 if (StartsObjCMethodExpr && CurrentToken->Previous != Left) { 00272 CurrentToken->Type = TT_ObjCMethodExpr; 00273 // determineStarAmpUsage() thinks that '*' '[' is allocating an 00274 // array of pointers, but if '[' starts a selector then '*' is a 00275 // binary operator. 00276 if (Parent && Parent->Type == TT_PointerOrReference) 00277 Parent->Type = TT_BinaryOperator; 00278 } 00279 Left->MatchingParen = CurrentToken; 00280 CurrentToken->MatchingParen = Left; 00281 if (Contexts.back().FirstObjCSelectorName) { 00282 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 00283 Contexts.back().LongestObjCSelectorName; 00284 if (Left->BlockParameterCount > 1) 00285 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 0; 00286 } 00287 next(); 00288 return true; 00289 } 00290 if (CurrentToken->isOneOf(tok::r_paren, tok::r_brace)) 00291 return false; 00292 if (CurrentToken->is(tok::colon)) { 00293 if (Left->Type == TT_ArraySubscriptLSquare) { 00294 Left->Type = TT_ObjCMethodExpr; 00295 StartsObjCMethodExpr = true; 00296 Contexts.back().ColonIsObjCMethodExpr = true; 00297 if (Parent && Parent->is(tok::r_paren)) 00298 Parent->Type = TT_CastRParen; 00299 } 00300 ColonFound = true; 00301 } 00302 if (CurrentToken->is(tok::comma) && 00303 Style.Language != FormatStyle::LK_Proto && 00304 (Left->Type == TT_ArraySubscriptLSquare || 00305 (Left->Type == TT_ObjCMethodExpr && !ColonFound))) 00306 Left->Type = TT_ArrayInitializerLSquare; 00307 FormatToken* Tok = CurrentToken; 00308 if (!consumeToken()) 00309 return false; 00310 updateParameterCount(Left, Tok); 00311 } 00312 return false; 00313 } 00314 00315 bool parseBrace() { 00316 if (CurrentToken) { 00317 FormatToken *Left = CurrentToken->Previous; 00318 00319 if (Contexts.back().CaretFound) 00320 Left->Type = TT_ObjCBlockLBrace; 00321 Contexts.back().CaretFound = false; 00322 00323 ScopedContextCreator ContextCreator(*this, tok::l_brace, 1); 00324 Contexts.back().ColonIsDictLiteral = true; 00325 if (Left->BlockKind == BK_BracedInit) 00326 Contexts.back().IsExpression = true; 00327 00328 while (CurrentToken) { 00329 if (CurrentToken->is(tok::r_brace)) { 00330 Left->MatchingParen = CurrentToken; 00331 CurrentToken->MatchingParen = Left; 00332 next(); 00333 return true; 00334 } 00335 if (CurrentToken->isOneOf(tok::r_paren, tok::r_square)) 00336 return false; 00337 updateParameterCount(Left, CurrentToken); 00338 if (CurrentToken->isOneOf(tok::colon, tok::l_brace)) { 00339 FormatToken *Previous = CurrentToken->getPreviousNonComment(); 00340 if ((CurrentToken->is(tok::colon) || 00341 Style.Language == FormatStyle::LK_Proto) && 00342 Previous->is(tok::identifier)) 00343 Previous->Type = TT_SelectorName; 00344 if (CurrentToken->is(tok::colon)) 00345 Left->Type = TT_DictLiteral; 00346 } 00347 if (!consumeToken()) 00348 return false; 00349 } 00350 } 00351 return true; 00352 } 00353 00354 void updateParameterCount(FormatToken *Left, FormatToken *Current) { 00355 if (Current->Type == TT_LambdaLSquare || 00356 (Current->is(tok::caret) && Current->Type == TT_UnaryOperator) || 00357 (Style.Language == FormatStyle::LK_JavaScript && 00358 Current->is(Keywords.kw_function))) { 00359 ++Left->BlockParameterCount; 00360 } 00361 if (Current->is(tok::comma)) { 00362 ++Left->ParameterCount; 00363 if (!Left->Role) 00364 Left->Role.reset(new CommaSeparatedList(Style)); 00365 Left->Role->CommaFound(Current); 00366 } else if (Left->ParameterCount == 0 && Current->isNot(tok::comment)) { 00367 Left->ParameterCount = 1; 00368 } 00369 } 00370 00371 bool parseConditional() { 00372 while (CurrentToken) { 00373 if (CurrentToken->is(tok::colon)) { 00374 CurrentToken->Type = TT_ConditionalExpr; 00375 next(); 00376 return true; 00377 } 00378 if (!consumeToken()) 00379 return false; 00380 } 00381 return false; 00382 } 00383 00384 bool parseTemplateDeclaration() { 00385 if (CurrentToken && CurrentToken->is(tok::less)) { 00386 CurrentToken->Type = TT_TemplateOpener; 00387 next(); 00388 if (!parseAngle()) 00389 return false; 00390 if (CurrentToken) 00391 CurrentToken->Previous->ClosesTemplateDeclaration = true; 00392 return true; 00393 } 00394 return false; 00395 } 00396 00397 bool consumeToken() { 00398 FormatToken *Tok = CurrentToken; 00399 next(); 00400 switch (Tok->Tok.getKind()) { 00401 case tok::plus: 00402 case tok::minus: 00403 if (!Tok->Previous && Line.MustBeDeclaration) 00404 Tok->Type = TT_ObjCMethodSpecifier; 00405 break; 00406 case tok::colon: 00407 if (!Tok->Previous) 00408 return false; 00409 // Colons from ?: are handled in parseConditional(). 00410 if (Tok->Previous->is(tok::r_paren) && Contexts.size() == 1 && 00411 Line.First->isNot(tok::kw_case)) { 00412 Tok->Type = TT_CtorInitializerColon; 00413 } else if (Contexts.back().ColonIsDictLiteral) { 00414 Tok->Type = TT_DictLiteral; 00415 } else if (Contexts.back().ColonIsObjCMethodExpr || 00416 Line.First->Type == TT_ObjCMethodSpecifier) { 00417 Tok->Type = TT_ObjCMethodExpr; 00418 Tok->Previous->Type = TT_SelectorName; 00419 if (Tok->Previous->ColumnWidth > 00420 Contexts.back().LongestObjCSelectorName) { 00421 Contexts.back().LongestObjCSelectorName = Tok->Previous->ColumnWidth; 00422 } 00423 if (!Contexts.back().FirstObjCSelectorName) 00424 Contexts.back().FirstObjCSelectorName = Tok->Previous; 00425 } else if (Contexts.back().ColonIsForRangeExpr) { 00426 Tok->Type = TT_RangeBasedForLoopColon; 00427 } else if (CurrentToken && CurrentToken->is(tok::numeric_constant)) { 00428 Tok->Type = TT_BitFieldColon; 00429 } else if (Contexts.size() == 1 && 00430 !Line.First->isOneOf(tok::kw_enum, tok::kw_case)) { 00431 Tok->Type = TT_InheritanceColon; 00432 } else if (Tok->Previous->is(tok::identifier) && Tok->Next && 00433 Tok->Next->isOneOf(tok::r_paren, tok::comma)) { 00434 // This handles a special macro in ObjC code where selectors including 00435 // the colon are passed as macro arguments. 00436 Tok->Type = TT_ObjCMethodExpr; 00437 } else if (Contexts.back().ContextKind == tok::l_paren) { 00438 Tok->Type = TT_InlineASMColon; 00439 } 00440 break; 00441 case tok::kw_if: 00442 case tok::kw_while: 00443 if (CurrentToken && CurrentToken->is(tok::l_paren)) { 00444 next(); 00445 if (!parseParens(/*LookForDecls=*/true)) 00446 return false; 00447 } 00448 break; 00449 case tok::kw_for: 00450 Contexts.back().ColonIsForRangeExpr = true; 00451 next(); 00452 if (!parseParens()) 00453 return false; 00454 break; 00455 case tok::l_paren: 00456 if (!parseParens()) 00457 return false; 00458 if (Line.MustBeDeclaration && Contexts.size() == 1 && 00459 !Contexts.back().IsExpression && 00460 Line.First->Type != TT_ObjCProperty && 00461 (!Tok->Previous || Tok->Previous->isNot(tok::kw_decltype))) 00462 Line.MightBeFunctionDecl = true; 00463 break; 00464 case tok::l_square: 00465 if (!parseSquare()) 00466 return false; 00467 break; 00468 case tok::l_brace: 00469 if (!parseBrace()) 00470 return false; 00471 break; 00472 case tok::less: 00473 if ((!Tok->Previous || !Tok->Previous->Tok.isLiteral()) && parseAngle()) 00474 Tok->Type = TT_TemplateOpener; 00475 else { 00476 Tok->Type = TT_BinaryOperator; 00477 CurrentToken = Tok; 00478 next(); 00479 } 00480 break; 00481 case tok::r_paren: 00482 case tok::r_square: 00483 return false; 00484 case tok::r_brace: 00485 // Lines can start with '}'. 00486 if (Tok->Previous) 00487 return false; 00488 break; 00489 case tok::greater: 00490 Tok->Type = TT_BinaryOperator; 00491 break; 00492 case tok::kw_operator: 00493 while (CurrentToken && 00494 !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { 00495 if (CurrentToken->isOneOf(tok::star, tok::amp)) 00496 CurrentToken->Type = TT_PointerOrReference; 00497 consumeToken(); 00498 if (CurrentToken && CurrentToken->Previous->Type == TT_BinaryOperator) 00499 CurrentToken->Previous->Type = TT_OverloadedOperator; 00500 } 00501 if (CurrentToken) { 00502 CurrentToken->Type = TT_OverloadedOperatorLParen; 00503 if (CurrentToken->Previous->Type == TT_BinaryOperator) 00504 CurrentToken->Previous->Type = TT_OverloadedOperator; 00505 } 00506 break; 00507 case tok::question: 00508 parseConditional(); 00509 break; 00510 case tok::kw_template: 00511 parseTemplateDeclaration(); 00512 break; 00513 case tok::identifier: 00514 if (Line.First->is(tok::kw_for) && Tok->is(Keywords.kw_in)) 00515 Tok->Type = TT_ObjCForIn; 00516 break; 00517 case tok::comma: 00518 if (Contexts.back().FirstStartOfName) 00519 Contexts.back().FirstStartOfName->PartOfMultiVariableDeclStmt = true; 00520 if (Contexts.back().InCtorInitializer) 00521 Tok->Type = TT_CtorInitializerComma; 00522 if (Contexts.back().IsForEachMacro) 00523 Contexts.back().IsExpression = true; 00524 break; 00525 default: 00526 break; 00527 } 00528 return true; 00529 } 00530 00531 void parseIncludeDirective() { 00532 if (CurrentToken && CurrentToken->is(tok::less)) { 00533 next(); 00534 while (CurrentToken) { 00535 if (CurrentToken->isNot(tok::comment) || CurrentToken->Next) 00536 CurrentToken->Type = TT_ImplicitStringLiteral; 00537 next(); 00538 } 00539 } else { 00540 while (CurrentToken) { 00541 if (CurrentToken->isNot(tok::comment)) 00542 // Mark these tokens as "implicit" string literals, so that 00543 // they are not split or line-wrapped. 00544 CurrentToken->Type = TT_ImplicitStringLiteral; 00545 next(); 00546 } 00547 } 00548 } 00549 00550 void parseWarningOrError() { 00551 next(); 00552 // We still want to format the whitespace left of the first token of the 00553 // warning or error. 00554 next(); 00555 while (CurrentToken) { 00556 CurrentToken->Type = TT_ImplicitStringLiteral; 00557 next(); 00558 } 00559 } 00560 00561 void parsePragma() { 00562 next(); // Consume "pragma". 00563 if (CurrentToken && CurrentToken->TokenText == "mark") { 00564 next(); // Consume "mark". 00565 next(); // Consume first token (so we fix leading whitespace). 00566 while (CurrentToken) { 00567 CurrentToken->Type = TT_ImplicitStringLiteral; 00568 next(); 00569 } 00570 } 00571 } 00572 00573 void parsePreprocessorDirective() { 00574 next(); 00575 if (!CurrentToken) 00576 return; 00577 if (CurrentToken->Tok.is(tok::numeric_constant)) { 00578 CurrentToken->SpacesRequiredBefore = 1; 00579 return; 00580 } 00581 // Hashes in the middle of a line can lead to any strange token 00582 // sequence. 00583 if (!CurrentToken->Tok.getIdentifierInfo()) 00584 return; 00585 switch (CurrentToken->Tok.getIdentifierInfo()->getPPKeywordID()) { 00586 case tok::pp_include: 00587 case tok::pp_import: 00588 next(); 00589 parseIncludeDirective(); 00590 break; 00591 case tok::pp_error: 00592 case tok::pp_warning: 00593 parseWarningOrError(); 00594 break; 00595 case tok::pp_pragma: 00596 parsePragma(); 00597 break; 00598 case tok::pp_if: 00599 case tok::pp_elif: 00600 Contexts.back().IsExpression = true; 00601 parseLine(); 00602 break; 00603 default: 00604 break; 00605 } 00606 while (CurrentToken) 00607 next(); 00608 } 00609 00610 public: 00611 LineType parseLine() { 00612 if (CurrentToken->is(tok::hash)) { 00613 parsePreprocessorDirective(); 00614 return LT_PreprocessorDirective; 00615 } 00616 00617 // Directly allow to 'import <string-literal>' to support protocol buffer 00618 // definitions (code.google.com/p/protobuf) or missing "#" (either way we 00619 // should not break the line). 00620 IdentifierInfo *Info = CurrentToken->Tok.getIdentifierInfo(); 00621 if (Info && Info->getPPKeywordID() == tok::pp_import && 00622 CurrentToken->Next) { 00623 next(); 00624 parseIncludeDirective(); 00625 return LT_Other; 00626 } 00627 00628 // If this line starts and ends in '<' and '>', respectively, it is likely 00629 // part of "#define <a/b.h>". 00630 if (CurrentToken->is(tok::less) && Line.Last->is(tok::greater)) { 00631 parseIncludeDirective(); 00632 return LT_Other; 00633 } 00634 00635 while (CurrentToken) { 00636 if (CurrentToken->is(tok::kw_virtual)) 00637 KeywordVirtualFound = true; 00638 if (!consumeToken()) 00639 return LT_Invalid; 00640 } 00641 if (KeywordVirtualFound) 00642 return LT_VirtualFunctionDecl; 00643 00644 if (Line.First->Type == TT_ObjCMethodSpecifier) { 00645 if (Contexts.back().FirstObjCSelectorName) 00646 Contexts.back().FirstObjCSelectorName->LongestObjCSelectorName = 00647 Contexts.back().LongestObjCSelectorName; 00648 return LT_ObjCMethodDecl; 00649 } 00650 00651 return LT_Other; 00652 } 00653 00654 private: 00655 void resetTokenMetadata(FormatToken *Token) { 00656 if (!Token) 00657 return; 00658 00659 // Reset token type in case we have already looked at it and then 00660 // recovered from an error (e.g. failure to find the matching >). 00661 if (CurrentToken->Type != TT_LambdaLSquare && 00662 CurrentToken->Type != TT_FunctionLBrace && 00663 CurrentToken->Type != TT_ImplicitStringLiteral && 00664 CurrentToken->Type != TT_RegexLiteral && 00665 CurrentToken->Type != TT_TrailingReturnArrow) 00666 CurrentToken->Type = TT_Unknown; 00667 CurrentToken->Role.reset(); 00668 CurrentToken->FakeLParens.clear(); 00669 CurrentToken->FakeRParens = 0; 00670 } 00671 00672 void next() { 00673 if (CurrentToken) { 00674 CurrentToken->NestingLevel = Contexts.size() - 1; 00675 CurrentToken->BindingStrength = Contexts.back().BindingStrength; 00676 determineTokenType(*CurrentToken); 00677 CurrentToken = CurrentToken->Next; 00678 } 00679 00680 resetTokenMetadata(CurrentToken); 00681 } 00682 00683 /// \brief A struct to hold information valid in a specific context, e.g. 00684 /// a pair of parenthesis. 00685 struct Context { 00686 Context(tok::TokenKind ContextKind, unsigned BindingStrength, 00687 bool IsExpression) 00688 : ContextKind(ContextKind), BindingStrength(BindingStrength), 00689 LongestObjCSelectorName(0), ColonIsForRangeExpr(false), 00690 ColonIsDictLiteral(false), ColonIsObjCMethodExpr(false), 00691 FirstObjCSelectorName(nullptr), FirstStartOfName(nullptr), 00692 IsExpression(IsExpression), CanBeExpression(true), 00693 InTemplateArgument(false), InCtorInitializer(false), 00694 CaretFound(false), IsForEachMacro(false) {} 00695 00696 tok::TokenKind ContextKind; 00697 unsigned BindingStrength; 00698 unsigned LongestObjCSelectorName; 00699 bool ColonIsForRangeExpr; 00700 bool ColonIsDictLiteral; 00701 bool ColonIsObjCMethodExpr; 00702 FormatToken *FirstObjCSelectorName; 00703 FormatToken *FirstStartOfName; 00704 bool IsExpression; 00705 bool CanBeExpression; 00706 bool InTemplateArgument; 00707 bool InCtorInitializer; 00708 bool CaretFound; 00709 bool IsForEachMacro; 00710 }; 00711 00712 /// \brief Puts a new \c Context onto the stack \c Contexts for the lifetime 00713 /// of each instance. 00714 struct ScopedContextCreator { 00715 AnnotatingParser &P; 00716 00717 ScopedContextCreator(AnnotatingParser &P, tok::TokenKind ContextKind, 00718 unsigned Increase) 00719 : P(P) { 00720 P.Contexts.push_back(Context(ContextKind, 00721 P.Contexts.back().BindingStrength + Increase, 00722 P.Contexts.back().IsExpression)); 00723 } 00724 00725 ~ScopedContextCreator() { P.Contexts.pop_back(); } 00726 }; 00727 00728 void determineTokenType(FormatToken &Current) { 00729 if (Current.getPrecedence() == prec::Assignment && 00730 !Line.First->isOneOf(tok::kw_template, tok::kw_using) && 00731 (!Current.Previous || Current.Previous->isNot(tok::kw_operator))) { 00732 Contexts.back().IsExpression = true; 00733 for (FormatToken *Previous = Current.Previous; 00734 Previous && !Previous->isOneOf(tok::comma, tok::semi); 00735 Previous = Previous->Previous) { 00736 if (Previous->isOneOf(tok::r_square, tok::r_paren)) { 00737 Previous = Previous->MatchingParen; 00738 if (!Previous) 00739 break; 00740 } 00741 if ((Previous->Type == TT_BinaryOperator || 00742 Previous->Type == TT_UnaryOperator) && 00743 Previous->isOneOf(tok::star, tok::amp) && Previous->Previous && 00744 Previous->Previous->isNot(tok::equal)) { 00745 Previous->Type = TT_PointerOrReference; 00746 } 00747 } 00748 } else if (Current.isOneOf(tok::kw_return, tok::kw_throw)) { 00749 Contexts.back().IsExpression = true; 00750 } else if (Current.is(tok::l_paren) && !Line.MustBeDeclaration && 00751 !Line.InPPDirective && 00752 (!Current.Previous || 00753 Current.Previous->isNot(tok::kw_decltype))) { 00754 bool ParametersOfFunctionType = 00755 Current.Previous && Current.Previous->is(tok::r_paren) && 00756 Current.Previous->MatchingParen && 00757 Current.Previous->MatchingParen->Type == TT_FunctionTypeLParen; 00758 bool IsForOrCatch = Current.Previous && 00759 Current.Previous->isOneOf(tok::kw_for, tok::kw_catch); 00760 Contexts.back().IsExpression = !ParametersOfFunctionType && !IsForOrCatch; 00761 } else if (Current.isOneOf(tok::r_paren, tok::greater, tok::comma)) { 00762 for (FormatToken *Previous = Current.Previous; 00763 Previous && Previous->isOneOf(tok::star, tok::amp); 00764 Previous = Previous->Previous) 00765 Previous->Type = TT_PointerOrReference; 00766 if (Line.MustBeDeclaration) 00767 Contexts.back().IsExpression = Contexts.front().InCtorInitializer; 00768 } else if (Current.Previous && 00769 Current.Previous->Type == TT_CtorInitializerColon) { 00770 Contexts.back().IsExpression = true; 00771 Contexts.back().InCtorInitializer = true; 00772 } else if (Current.is(tok::kw_new)) { 00773 Contexts.back().CanBeExpression = false; 00774 } else if (Current.is(tok::semi) || Current.is(tok::exclaim)) { 00775 // This should be the condition or increment in a for-loop. 00776 Contexts.back().IsExpression = true; 00777 } 00778 00779 if (Current.Type == TT_Unknown) { 00780 // Line.MightBeFunctionDecl can only be true after the parentheses of a 00781 // function declaration have been found. In this case, 'Current' is a 00782 // trailing token of this declaration and thus cannot be a name. 00783 if (isStartOfName(Current) && 00784 (!Line.MightBeFunctionDecl || Current.NestingLevel != 0)) { 00785 Contexts.back().FirstStartOfName = &Current; 00786 Current.Type = TT_StartOfName; 00787 } else if (Current.is(tok::kw_auto)) { 00788 AutoFound = true; 00789 } else if (Current.is(tok::arrow) && AutoFound && 00790 Line.MustBeDeclaration && Current.NestingLevel == 0) { 00791 Current.Type = TT_TrailingReturnArrow; 00792 } else if (Current.isOneOf(tok::star, tok::amp, tok::ampamp)) { 00793 Current.Type = 00794 determineStarAmpUsage(Current, Contexts.back().CanBeExpression && 00795 Contexts.back().IsExpression, 00796 Contexts.back().InTemplateArgument); 00797 } else if (Current.isOneOf(tok::minus, tok::plus, tok::caret)) { 00798 Current.Type = determinePlusMinusCaretUsage(Current); 00799 if (Current.Type == TT_UnaryOperator && Current.is(tok::caret)) 00800 Contexts.back().CaretFound = true; 00801 } else if (Current.isOneOf(tok::minusminus, tok::plusplus)) { 00802 Current.Type = determineIncrementUsage(Current); 00803 } else if (Current.isOneOf(tok::exclaim, tok::tilde)) { 00804 Current.Type = TT_UnaryOperator; 00805 } else if (Current.is(tok::question)) { 00806 Current.Type = TT_ConditionalExpr; 00807 } else if (Current.isBinaryOperator() && 00808 (!Current.Previous || 00809 Current.Previous->isNot(tok::l_square))) { 00810 Current.Type = TT_BinaryOperator; 00811 } else if (Current.is(tok::comment)) { 00812 if (Current.TokenText.startswith("//")) 00813 Current.Type = TT_LineComment; 00814 else 00815 Current.Type = TT_BlockComment; 00816 } else if (Current.is(tok::r_paren)) { 00817 if (rParenEndsCast(Current)) 00818 Current.Type = TT_CastRParen; 00819 } else if (Current.is(tok::at) && Current.Next) { 00820 switch (Current.Next->Tok.getObjCKeywordID()) { 00821 case tok::objc_interface: 00822 case tok::objc_implementation: 00823 case tok::objc_protocol: 00824 Current.Type = TT_ObjCDecl; 00825 break; 00826 case tok::objc_property: 00827 Current.Type = TT_ObjCProperty; 00828 break; 00829 default: 00830 break; 00831 } 00832 } else if (Current.is(tok::period)) { 00833 FormatToken *PreviousNoComment = Current.getPreviousNonComment(); 00834 if (PreviousNoComment && 00835 PreviousNoComment->isOneOf(tok::comma, tok::l_brace)) 00836 Current.Type = TT_DesignatedInitializerPeriod; 00837 } else if (Current.isOneOf(tok::identifier, tok::kw_const) && 00838 Current.Previous && 00839 !Current.Previous->isOneOf(tok::equal, tok::at) && 00840 Line.MightBeFunctionDecl && Contexts.size() == 1) { 00841 // Line.MightBeFunctionDecl can only be true after the parentheses of a 00842 // function declaration have been found. 00843 Current.Type = TT_TrailingAnnotation; 00844 } else if (Style.Language == FormatStyle::LK_Java && Current.Previous && 00845 Current.Previous->is(tok::at) && 00846 Current.isNot(Keywords.kw_interface)) { 00847 const FormatToken& AtToken = *Current.Previous; 00848 if (!AtToken.Previous || 00849 AtToken.Previous->Type == TT_LeadingJavaAnnotation) 00850 Current.Type = TT_LeadingJavaAnnotation; 00851 else 00852 Current.Type = TT_JavaAnnotation; 00853 } 00854 } 00855 } 00856 00857 /// \brief Take a guess at whether \p Tok starts a name of a function or 00858 /// variable declaration. 00859 /// 00860 /// This is a heuristic based on whether \p Tok is an identifier following 00861 /// something that is likely a type. 00862 bool isStartOfName(const FormatToken &Tok) { 00863 if (Tok.isNot(tok::identifier) || !Tok.Previous) 00864 return false; 00865 00866 // Skip "const" as it does not have an influence on whether this is a name. 00867 FormatToken *PreviousNotConst = Tok.Previous; 00868 while (PreviousNotConst && PreviousNotConst->is(tok::kw_const)) 00869 PreviousNotConst = PreviousNotConst->Previous; 00870 00871 if (!PreviousNotConst) 00872 return false; 00873 00874 bool IsPPKeyword = PreviousNotConst->is(tok::identifier) && 00875 PreviousNotConst->Previous && 00876 PreviousNotConst->Previous->is(tok::hash); 00877 00878 if (PreviousNotConst->Type == TT_TemplateCloser) 00879 return PreviousNotConst && PreviousNotConst->MatchingParen && 00880 PreviousNotConst->MatchingParen->Previous && 00881 PreviousNotConst->MatchingParen->Previous->isNot(tok::kw_template); 00882 00883 if (PreviousNotConst->is(tok::r_paren) && PreviousNotConst->MatchingParen && 00884 PreviousNotConst->MatchingParen->Previous && 00885 PreviousNotConst->MatchingParen->Previous->is(tok::kw_decltype)) 00886 return true; 00887 00888 return (!IsPPKeyword && PreviousNotConst->is(tok::identifier)) || 00889 PreviousNotConst->Type == TT_PointerOrReference || 00890 PreviousNotConst->isSimpleTypeSpecifier(); 00891 } 00892 00893 /// \brief Determine whether ')' is ending a cast. 00894 bool rParenEndsCast(const FormatToken &Tok) { 00895 FormatToken *LeftOfParens = nullptr; 00896 if (Tok.MatchingParen) 00897 LeftOfParens = Tok.MatchingParen->getPreviousNonComment(); 00898 if (LeftOfParens && LeftOfParens->is(tok::r_paren) && 00899 LeftOfParens->MatchingParen) 00900 LeftOfParens = LeftOfParens->MatchingParen->Previous; 00901 if (LeftOfParens && LeftOfParens->is(tok::r_square) && 00902 LeftOfParens->MatchingParen && 00903 LeftOfParens->MatchingParen->Type == TT_LambdaLSquare) 00904 return false; 00905 bool IsCast = false; 00906 bool ParensAreEmpty = Tok.Previous == Tok.MatchingParen; 00907 bool ParensAreType = !Tok.Previous || 00908 Tok.Previous->Type == TT_PointerOrReference || 00909 Tok.Previous->Type == TT_TemplateCloser || 00910 Tok.Previous->isSimpleTypeSpecifier(); 00911 if (Style.Language == FormatStyle::LK_JavaScript && Tok.Next && 00912 Tok.Next->is(Keywords.kw_in)) 00913 return false; 00914 bool ParensCouldEndDecl = 00915 Tok.Next && Tok.Next->isOneOf(tok::equal, tok::semi, tok::l_brace); 00916 bool IsSizeOfOrAlignOf = 00917 LeftOfParens && LeftOfParens->isOneOf(tok::kw_sizeof, tok::kw_alignof); 00918 if (ParensAreType && !ParensCouldEndDecl && !IsSizeOfOrAlignOf && 00919 ((Contexts.size() > 1 && Contexts[Contexts.size() - 2].IsExpression) || 00920 (Tok.Next && Tok.Next->isBinaryOperator()))) 00921 IsCast = true; 00922 else if (Tok.Next && Tok.Next->isNot(tok::string_literal) && 00923 (Tok.Next->Tok.isLiteral() || 00924 Tok.Next->isOneOf(tok::kw_sizeof, tok::kw_alignof))) 00925 IsCast = true; 00926 // If there is an identifier after the (), it is likely a cast, unless 00927 // there is also an identifier before the (). 00928 else if (LeftOfParens && 00929 (LeftOfParens->Tok.getIdentifierInfo() == nullptr || 00930 LeftOfParens->is(tok::kw_return)) && 00931 LeftOfParens->Type != TT_OverloadedOperator && 00932 LeftOfParens->isNot(tok::at) && 00933 LeftOfParens->Type != TT_TemplateCloser && Tok.Next) { 00934 if (Tok.Next->isOneOf(tok::identifier, tok::numeric_constant)) { 00935 IsCast = true; 00936 } else { 00937 // Use heuristics to recognize c style casting. 00938 FormatToken *Prev = Tok.Previous; 00939 if (Prev && Prev->isOneOf(tok::amp, tok::star)) 00940 Prev = Prev->Previous; 00941 00942 if (Prev && Tok.Next && Tok.Next->Next) { 00943 bool NextIsUnary = Tok.Next->isUnaryOperator() || 00944 Tok.Next->isOneOf(tok::amp, tok::star); 00945 IsCast = 00946 NextIsUnary && !Tok.Next->is(tok::plus) && 00947 Tok.Next->Next->isOneOf(tok::identifier, tok::numeric_constant); 00948 } 00949 00950 for (; Prev != Tok.MatchingParen; Prev = Prev->Previous) { 00951 if (!Prev || !Prev->isOneOf(tok::kw_const, tok::identifier)) { 00952 IsCast = false; 00953 break; 00954 } 00955 } 00956 } 00957 } 00958 return IsCast && !ParensAreEmpty; 00959 } 00960 00961 /// \brief Return the type of the given token assuming it is * or &. 00962 TokenType determineStarAmpUsage(const FormatToken &Tok, bool IsExpression, 00963 bool InTemplateArgument) { 00964 if (Style.Language == FormatStyle::LK_JavaScript) 00965 return TT_BinaryOperator; 00966 00967 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 00968 if (!PrevToken) 00969 return TT_UnaryOperator; 00970 00971 const FormatToken *NextToken = Tok.getNextNonComment(); 00972 if (!NextToken || NextToken->is(tok::l_brace)) 00973 return TT_Unknown; 00974 00975 if (PrevToken->is(tok::coloncolon)) 00976 return TT_PointerOrReference; 00977 00978 if (PrevToken->isOneOf(tok::l_paren, tok::l_square, tok::l_brace, 00979 tok::comma, tok::semi, tok::kw_return, tok::colon, 00980 tok::equal, tok::kw_delete, tok::kw_sizeof) || 00981 PrevToken->Type == TT_BinaryOperator || 00982 PrevToken->Type == TT_ConditionalExpr || 00983 PrevToken->Type == TT_UnaryOperator || PrevToken->Type == TT_CastRParen) 00984 return TT_UnaryOperator; 00985 00986 if (NextToken->is(tok::l_square) && NextToken->Type != TT_LambdaLSquare) 00987 return TT_PointerOrReference; 00988 if (NextToken->isOneOf(tok::kw_operator, tok::comma)) 00989 return TT_PointerOrReference; 00990 00991 if (PrevToken->is(tok::r_paren) && PrevToken->MatchingParen && 00992 PrevToken->MatchingParen->Previous && 00993 PrevToken->MatchingParen->Previous->isOneOf(tok::kw_typeof, 00994 tok::kw_decltype)) 00995 return TT_PointerOrReference; 00996 00997 if (PrevToken->Tok.isLiteral() || 00998 PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::kw_true, 00999 tok::kw_false, tok::r_brace) || 01000 NextToken->Tok.isLiteral() || 01001 NextToken->isOneOf(tok::kw_true, tok::kw_false) || 01002 NextToken->isUnaryOperator() || 01003 // If we know we're in a template argument, there are no named 01004 // declarations. Thus, having an identifier on the right-hand side 01005 // indicates a binary operator. 01006 (InTemplateArgument && NextToken->Tok.isAnyIdentifier())) 01007 return TT_BinaryOperator; 01008 01009 // "&&(" is quite unlikely to be two successive unary "&". 01010 if (Tok.is(tok::ampamp) && NextToken && NextToken->is(tok::l_paren)) 01011 return TT_BinaryOperator; 01012 01013 // This catches some cases where evaluation order is used as control flow: 01014 // aaa && aaa->f(); 01015 const FormatToken *NextNextToken = NextToken->getNextNonComment(); 01016 if (NextNextToken && NextNextToken->is(tok::arrow)) 01017 return TT_BinaryOperator; 01018 01019 // It is very unlikely that we are going to find a pointer or reference type 01020 // definition on the RHS of an assignment. 01021 if (IsExpression) 01022 return TT_BinaryOperator; 01023 01024 return TT_PointerOrReference; 01025 } 01026 01027 TokenType determinePlusMinusCaretUsage(const FormatToken &Tok) { 01028 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 01029 if (!PrevToken || PrevToken->Type == TT_CastRParen) 01030 return TT_UnaryOperator; 01031 01032 // Use heuristics to recognize unary operators. 01033 if (PrevToken->isOneOf(tok::equal, tok::l_paren, tok::comma, tok::l_square, 01034 tok::question, tok::colon, tok::kw_return, 01035 tok::kw_case, tok::at, tok::l_brace)) 01036 return TT_UnaryOperator; 01037 01038 // There can't be two consecutive binary operators. 01039 if (PrevToken->Type == TT_BinaryOperator) 01040 return TT_UnaryOperator; 01041 01042 // Fall back to marking the token as binary operator. 01043 return TT_BinaryOperator; 01044 } 01045 01046 /// \brief Determine whether ++/-- are pre- or post-increments/-decrements. 01047 TokenType determineIncrementUsage(const FormatToken &Tok) { 01048 const FormatToken *PrevToken = Tok.getPreviousNonComment(); 01049 if (!PrevToken || PrevToken->Type == TT_CastRParen) 01050 return TT_UnaryOperator; 01051 if (PrevToken->isOneOf(tok::r_paren, tok::r_square, tok::identifier)) 01052 return TT_TrailingUnaryOperator; 01053 01054 return TT_UnaryOperator; 01055 } 01056 01057 SmallVector<Context, 8> Contexts; 01058 01059 const FormatStyle &Style; 01060 AnnotatedLine &Line; 01061 FormatToken *CurrentToken; 01062 bool KeywordVirtualFound; 01063 bool AutoFound; 01064 const AdditionalKeywords &Keywords; 01065 }; 01066 01067 static int PrecedenceUnaryOperator = prec::PointerToMember + 1; 01068 static int PrecedenceArrowAndPeriod = prec::PointerToMember + 2; 01069 01070 /// \brief Parses binary expressions by inserting fake parenthesis based on 01071 /// operator precedence. 01072 class ExpressionParser { 01073 public: 01074 ExpressionParser(const FormatStyle &Style, const AdditionalKeywords &Keywords, 01075 AnnotatedLine &Line) 01076 : Style(Style), Keywords(Keywords), Current(Line.First) {} 01077 01078 /// \brief Parse expressions with the given operatore precedence. 01079 void parse(int Precedence = 0) { 01080 // Skip 'return' and ObjC selector colons as they are not part of a binary 01081 // expression. 01082 while (Current && 01083 (Current->is(tok::kw_return) || 01084 (Current->is(tok::colon) && (Current->Type == TT_ObjCMethodExpr || 01085 Current->Type == TT_DictLiteral)))) 01086 next(); 01087 01088 if (!Current || Precedence > PrecedenceArrowAndPeriod) 01089 return; 01090 01091 // Conditional expressions need to be parsed separately for proper nesting. 01092 if (Precedence == prec::Conditional) { 01093 parseConditionalExpr(); 01094 return; 01095 } 01096 01097 // Parse unary operators, which all have a higher precedence than binary 01098 // operators. 01099 if (Precedence == PrecedenceUnaryOperator) { 01100 parseUnaryOperator(); 01101 return; 01102 } 01103 01104 FormatToken *Start = Current; 01105 FormatToken *LatestOperator = nullptr; 01106 unsigned OperatorIndex = 0; 01107 01108 while (Current) { 01109 // Consume operators with higher precedence. 01110 parse(Precedence + 1); 01111 01112 int CurrentPrecedence = getCurrentPrecedence(); 01113 01114 if (Current && Current->Type == TT_SelectorName && 01115 Precedence == CurrentPrecedence) { 01116 if (LatestOperator) 01117 addFakeParenthesis(Start, prec::Level(Precedence)); 01118 Start = Current; 01119 } 01120 01121 // At the end of the line or when an operator with higher precedence is 01122 // found, insert fake parenthesis and return. 01123 if (!Current || (Current->closesScope() && Current->MatchingParen) || 01124 (CurrentPrecedence != -1 && CurrentPrecedence < Precedence) || 01125 (CurrentPrecedence == prec::Conditional && 01126 Precedence == prec::Assignment && Current->is(tok::colon))) { 01127 if (LatestOperator) { 01128 LatestOperator->LastOperator = true; 01129 if (Precedence == PrecedenceArrowAndPeriod) { 01130 // Call expressions don't have a binary operator precedence. 01131 addFakeParenthesis(Start, prec::Unknown); 01132 } else { 01133 addFakeParenthesis(Start, prec::Level(Precedence)); 01134 } 01135 } 01136 return; 01137 } 01138 01139 // Consume scopes: (), [], <> and {} 01140 if (Current->opensScope()) { 01141 while (Current && !Current->closesScope()) { 01142 next(); 01143 parse(); 01144 } 01145 next(); 01146 } else { 01147 // Operator found. 01148 if (CurrentPrecedence == Precedence) { 01149 LatestOperator = Current; 01150 Current->OperatorIndex = OperatorIndex; 01151 ++OperatorIndex; 01152 } 01153 next(/*SkipPastLeadingComments=*/Precedence > 0); 01154 } 01155 } 01156 } 01157 01158 private: 01159 /// \brief Gets the precedence (+1) of the given token for binary operators 01160 /// and other tokens that we treat like binary operators. 01161 int getCurrentPrecedence() { 01162 if (Current) { 01163 const FormatToken *NextNonComment = Current->getNextNonComment(); 01164 if (Current->Type == TT_ConditionalExpr) 01165 return prec::Conditional; 01166 else if (NextNonComment && NextNonComment->is(tok::colon) && 01167 NextNonComment->Type == TT_DictLiteral) 01168 return prec::Comma; 01169 else if (Current->is(tok::semi) || Current->Type == TT_InlineASMColon || 01170 Current->Type == TT_SelectorName || 01171 (Current->is(tok::comment) && NextNonComment && 01172 NextNonComment->Type == TT_SelectorName)) 01173 return 0; 01174 else if (Current->Type == TT_RangeBasedForLoopColon) 01175 return prec::Comma; 01176 else if (Current->Type == TT_BinaryOperator || Current->is(tok::comma)) 01177 return Current->getPrecedence(); 01178 else if (Current->isOneOf(tok::period, tok::arrow)) 01179 return PrecedenceArrowAndPeriod; 01180 else if (Style.Language == FormatStyle::LK_Java && 01181 Current->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) 01182 return 0; 01183 } 01184 return -1; 01185 } 01186 01187 void addFakeParenthesis(FormatToken *Start, prec::Level Precedence) { 01188 Start->FakeLParens.push_back(Precedence); 01189 if (Precedence > prec::Unknown) 01190 Start->StartsBinaryExpression = true; 01191 if (Current) { 01192 FormatToken *Previous = Current->Previous; 01193 if (Previous->is(tok::comment) && Previous->Previous) 01194 Previous = Previous->Previous; 01195 ++Previous->FakeRParens; 01196 if (Precedence > prec::Unknown) 01197 Previous->EndsBinaryExpression = true; 01198 } 01199 } 01200 01201 /// \brief Parse unary operator expressions and surround them with fake 01202 /// parentheses if appropriate. 01203 void parseUnaryOperator() { 01204 if (!Current || Current->Type != TT_UnaryOperator) { 01205 parse(PrecedenceArrowAndPeriod); 01206 return; 01207 } 01208 01209 FormatToken *Start = Current; 01210 next(); 01211 parseUnaryOperator(); 01212 01213 // The actual precedence doesn't matter. 01214 addFakeParenthesis(Start, prec::Unknown); 01215 } 01216 01217 void parseConditionalExpr() { 01218 while (Current && Current->isTrailingComment()) { 01219 next(); 01220 } 01221 FormatToken *Start = Current; 01222 parse(prec::LogicalOr); 01223 if (!Current || !Current->is(tok::question)) 01224 return; 01225 next(); 01226 parse(prec::Assignment); 01227 if (!Current || Current->Type != TT_ConditionalExpr) 01228 return; 01229 next(); 01230 parse(prec::Assignment); 01231 addFakeParenthesis(Start, prec::Conditional); 01232 } 01233 01234 void next(bool SkipPastLeadingComments = true) { 01235 if (Current) 01236 Current = Current->Next; 01237 while (Current && 01238 (Current->NewlinesBefore == 0 || SkipPastLeadingComments) && 01239 Current->isTrailingComment()) 01240 Current = Current->Next; 01241 } 01242 01243 const FormatStyle &Style; 01244 const AdditionalKeywords &Keywords; 01245 FormatToken *Current; 01246 }; 01247 01248 } // end anonymous namespace 01249 01250 void 01251 TokenAnnotator::setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) { 01252 const AnnotatedLine *NextNonCommentLine = nullptr; 01253 for (SmallVectorImpl<AnnotatedLine *>::reverse_iterator I = Lines.rbegin(), 01254 E = Lines.rend(); 01255 I != E; ++I) { 01256 if (NextNonCommentLine && (*I)->First->is(tok::comment) && 01257 (*I)->First->Next == nullptr) 01258 (*I)->Level = NextNonCommentLine->Level; 01259 else 01260 NextNonCommentLine = (*I)->First->isNot(tok::r_brace) ? (*I) : nullptr; 01261 01262 setCommentLineLevels((*I)->Children); 01263 } 01264 } 01265 01266 void TokenAnnotator::annotate(AnnotatedLine &Line) { 01267 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 01268 E = Line.Children.end(); 01269 I != E; ++I) { 01270 annotate(**I); 01271 } 01272 AnnotatingParser Parser(Style, Line, Keywords); 01273 Line.Type = Parser.parseLine(); 01274 if (Line.Type == LT_Invalid) 01275 return; 01276 01277 ExpressionParser ExprParser(Style, Keywords, Line); 01278 ExprParser.parse(); 01279 01280 if (Line.First->Type == TT_ObjCMethodSpecifier) 01281 Line.Type = LT_ObjCMethodDecl; 01282 else if (Line.First->Type == TT_ObjCDecl) 01283 Line.Type = LT_ObjCDecl; 01284 else if (Line.First->Type == TT_ObjCProperty) 01285 Line.Type = LT_ObjCProperty; 01286 01287 Line.First->SpacesRequiredBefore = 1; 01288 Line.First->CanBreakBefore = Line.First->MustBreakBefore; 01289 } 01290 01291 // This function heuristically determines whether 'Current' starts the name of a 01292 // function declaration. 01293 static bool isFunctionDeclarationName(const FormatToken &Current) { 01294 if (Current.Type != TT_StartOfName || 01295 Current.NestingLevel != 0) 01296 return false; 01297 const FormatToken *Next = Current.Next; 01298 for (; Next; Next = Next->Next) { 01299 if (Next->Type == TT_TemplateOpener) { 01300 Next = Next->MatchingParen; 01301 } else if (Next->is(tok::coloncolon)) { 01302 Next = Next->Next; 01303 if (!Next || !Next->is(tok::identifier)) 01304 return false; 01305 } else if (Next->is(tok::l_paren)) { 01306 break; 01307 } else { 01308 return false; 01309 } 01310 } 01311 if (!Next) 01312 return false; 01313 assert(Next->is(tok::l_paren)); 01314 if (Next->Next == Next->MatchingParen) 01315 return true; 01316 for (const FormatToken *Tok = Next->Next; Tok != Next->MatchingParen; 01317 Tok = Tok->Next) { 01318 if (Tok->is(tok::kw_const) || Tok->isSimpleTypeSpecifier() || 01319 Tok->Type == TT_PointerOrReference || Tok->Type == TT_StartOfName) 01320 return true; 01321 if (Tok->isOneOf(tok::l_brace, tok::string_literal) || Tok->Tok.isLiteral()) 01322 return false; 01323 } 01324 return false; 01325 } 01326 01327 void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) { 01328 for (SmallVectorImpl<AnnotatedLine *>::iterator I = Line.Children.begin(), 01329 E = Line.Children.end(); 01330 I != E; ++I) { 01331 calculateFormattingInformation(**I); 01332 } 01333 01334 Line.First->TotalLength = 01335 Line.First->IsMultiline ? Style.ColumnLimit : Line.First->ColumnWidth; 01336 if (!Line.First->Next) 01337 return; 01338 FormatToken *Current = Line.First->Next; 01339 bool InFunctionDecl = Line.MightBeFunctionDecl; 01340 while (Current) { 01341 if (isFunctionDeclarationName(*Current)) 01342 Current->Type = TT_FunctionDeclarationName; 01343 if (Current->Type == TT_LineComment) { 01344 if (Current->Previous->BlockKind == BK_BracedInit && 01345 Current->Previous->opensScope()) 01346 Current->SpacesRequiredBefore = Style.Cpp11BracedListStyle ? 0 : 1; 01347 else 01348 Current->SpacesRequiredBefore = Style.SpacesBeforeTrailingComments; 01349 01350 // If we find a trailing comment, iterate backwards to determine whether 01351 // it seems to relate to a specific parameter. If so, break before that 01352 // parameter to avoid changing the comment's meaning. E.g. don't move 'b' 01353 // to the previous line in: 01354 // SomeFunction(a, 01355 // b, // comment 01356 // c); 01357 if (!Current->HasUnescapedNewline) { 01358 for (FormatToken *Parameter = Current->Previous; Parameter; 01359 Parameter = Parameter->Previous) { 01360 if (Parameter->isOneOf(tok::comment, tok::r_brace)) 01361 break; 01362 if (Parameter->Previous && Parameter->Previous->is(tok::comma)) { 01363 if (Parameter->Previous->Type != TT_CtorInitializerComma && 01364 Parameter->HasUnescapedNewline) 01365 Parameter->MustBreakBefore = true; 01366 break; 01367 } 01368 } 01369 } 01370 } else if (Current->SpacesRequiredBefore == 0 && 01371 spaceRequiredBefore(Line, *Current)) { 01372 Current->SpacesRequiredBefore = 1; 01373 } 01374 01375 Current->MustBreakBefore = 01376 Current->MustBreakBefore || mustBreakBefore(Line, *Current); 01377 01378 if (Style.AlwaysBreakAfterDefinitionReturnType && 01379 InFunctionDecl && Current->Type == TT_FunctionDeclarationName && 01380 !Line.Last->isOneOf(tok::semi, tok::comment)) // Only for definitions. 01381 // FIXME: Line.Last points to other characters than tok::semi 01382 // and tok::lbrace. 01383 Current->MustBreakBefore = true; 01384 01385 Current->CanBreakBefore = 01386 Current->MustBreakBefore || canBreakBefore(Line, *Current); 01387 unsigned ChildSize = 0; 01388 if (Current->Previous->Children.size() == 1) { 01389 FormatToken &LastOfChild = *Current->Previous->Children[0]->Last; 01390 ChildSize = LastOfChild.isTrailingComment() ? Style.ColumnLimit 01391 : LastOfChild.TotalLength + 1; 01392 } 01393 const FormatToken *Prev= Current->Previous; 01394 if (Current->MustBreakBefore || Prev->Children.size() > 1 || 01395 (Prev->Children.size() == 1 && 01396 Prev->Children[0]->First->MustBreakBefore) || 01397 Current->IsMultiline) 01398 Current->TotalLength = Prev->TotalLength + Style.ColumnLimit; 01399 else 01400 Current->TotalLength = Prev->TotalLength + Current->ColumnWidth + 01401 ChildSize + Current->SpacesRequiredBefore; 01402 01403 if (Current->Type == TT_CtorInitializerColon) 01404 InFunctionDecl = false; 01405 01406 // FIXME: Only calculate this if CanBreakBefore is true once static 01407 // initializers etc. are sorted out. 01408 // FIXME: Move magic numbers to a better place. 01409 Current->SplitPenalty = 20 * Current->BindingStrength + 01410 splitPenalty(Line, *Current, InFunctionDecl); 01411 01412 Current = Current->Next; 01413 } 01414 01415 calculateUnbreakableTailLengths(Line); 01416 for (Current = Line.First; Current != nullptr; Current = Current->Next) { 01417 if (Current->Role) 01418 Current->Role->precomputeFormattingInfos(Current); 01419 } 01420 01421 DEBUG({ printDebugInfo(Line); }); 01422 } 01423 01424 void TokenAnnotator::calculateUnbreakableTailLengths(AnnotatedLine &Line) { 01425 unsigned UnbreakableTailLength = 0; 01426 FormatToken *Current = Line.Last; 01427 while (Current) { 01428 Current->UnbreakableTailLength = UnbreakableTailLength; 01429 if (Current->CanBreakBefore || 01430 Current->isOneOf(tok::comment, tok::string_literal)) { 01431 UnbreakableTailLength = 0; 01432 } else { 01433 UnbreakableTailLength += 01434 Current->ColumnWidth + Current->SpacesRequiredBefore; 01435 } 01436 Current = Current->Previous; 01437 } 01438 } 01439 01440 unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, 01441 const FormatToken &Tok, 01442 bool InFunctionDecl) { 01443 const FormatToken &Left = *Tok.Previous; 01444 const FormatToken &Right = Tok; 01445 01446 if (Left.is(tok::semi)) 01447 return 0; 01448 01449 if (Style.Language == FormatStyle::LK_Java) { 01450 if (Left.Type == TT_LeadingJavaAnnotation) 01451 return 1; 01452 if (Right.is(Keywords.kw_extends)) 01453 return 1; 01454 if (Right.is(Keywords.kw_implements)) 01455 return 2; 01456 if (Left.is(tok::comma) && Left.NestingLevel == 0) 01457 return 3; 01458 } 01459 01460 if (Left.is(tok::comma) || (Right.is(tok::identifier) && Right.Next && 01461 Right.Next->Type == TT_DictLiteral)) 01462 return 1; 01463 if (Right.is(tok::l_square)) { 01464 if (Style.Language == FormatStyle::LK_Proto) 01465 return 1; 01466 if (Right.Type != TT_ObjCMethodExpr && Right.Type != TT_LambdaLSquare) 01467 return 500; 01468 } 01469 01470 if (Right.Type == TT_StartOfName || 01471 Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) { 01472 if (Line.First->is(tok::kw_for) && Right.PartOfMultiVariableDeclStmt) 01473 return 3; 01474 if (Left.Type == TT_StartOfName) 01475 return 20; 01476 if (InFunctionDecl && Right.NestingLevel == 0) 01477 return Style.PenaltyReturnTypeOnItsOwnLine; 01478 return 200; 01479 } 01480 if (Left.is(tok::equal) && Right.is(tok::l_brace)) 01481 return 150; 01482 if (Left.Type == TT_CastRParen) 01483 return 100; 01484 if (Left.is(tok::coloncolon) || 01485 (Right.is(tok::period) && Style.Language == FormatStyle::LK_Proto)) 01486 return 500; 01487 if (Left.isOneOf(tok::kw_class, tok::kw_struct)) 01488 return 5000; 01489 01490 if (Left.Type == TT_RangeBasedForLoopColon || 01491 Left.Type == TT_InheritanceColon) 01492 return 2; 01493 01494 if (Right.isMemberAccess()) { 01495 if (Left.is(tok::r_paren) && Left.MatchingParen && 01496 Left.MatchingParen->ParameterCount > 0) 01497 return 20; // Should be smaller than breaking at a nested comma. 01498 return 150; 01499 } 01500 01501 if (Right.Type == TT_TrailingAnnotation && 01502 (!Right.Next || Right.Next->isNot(tok::l_paren))) { 01503 // Moving trailing annotations to the next line is fine for ObjC method 01504 // declarations. 01505 if (Line.First->Type == TT_ObjCMethodSpecifier) 01506 01507 return 10; 01508 // Generally, breaking before a trailing annotation is bad unless it is 01509 // function-like. It seems to be especially preferable to keep standard 01510 // annotations (i.e. "const", "final" and "override") on the same line. 01511 // Use a slightly higher penalty after ")" so that annotations like 01512 // "const override" are kept together. 01513 bool is_short_annotation = Right.TokenText.size() < 10; 01514 return (Left.is(tok::r_paren) ? 100 : 120) + (is_short_annotation ? 50 : 0); 01515 } 01516 01517 // In for-loops, prefer breaking at ',' and ';'. 01518 if (Line.First->is(tok::kw_for) && Left.is(tok::equal)) 01519 return 4; 01520 01521 // In Objective-C method expressions, prefer breaking before "param:" over 01522 // breaking after it. 01523 if (Right.Type == TT_SelectorName) 01524 return 0; 01525 if (Left.is(tok::colon) && Left.Type == TT_ObjCMethodExpr) 01526 return Line.MightBeFunctionDecl ? 50 : 500; 01527 01528 if (Left.is(tok::l_paren) && InFunctionDecl) 01529 return 100; 01530 if (Left.is(tok::equal) && InFunctionDecl) 01531 return 110; 01532 if (Right.is(tok::r_brace)) 01533 return 1; 01534 if (Left.Type == TT_TemplateOpener) 01535 return 100; 01536 if (Left.opensScope()) 01537 return Left.ParameterCount > 1 ? Style.PenaltyBreakBeforeFirstCallParameter 01538 : 19; 01539 01540 if (Right.is(tok::lessless)) { 01541 if (Left.is(tok::string_literal)) { 01542 StringRef Content = Left.TokenText; 01543 if (Content.startswith("\"")) 01544 Content = Content.drop_front(1); 01545 if (Content.endswith("\"")) 01546 Content = Content.drop_back(1); 01547 Content = Content.trim(); 01548 if (Content.size() > 1 && 01549 (Content.back() == ':' || Content.back() == '=')) 01550 return 25; 01551 } 01552 return 1; // Breaking at a << is really cheap. 01553 } 01554 if (Left.Type == TT_ConditionalExpr) 01555 return prec::Conditional; 01556 prec::Level Level = Left.getPrecedence(); 01557 01558 if (Level != prec::Unknown) 01559 return Level; 01560 01561 return 3; 01562 } 01563 01564 bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, 01565 const FormatToken &Left, 01566 const FormatToken &Right) { 01567 if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) 01568 return true; 01569 if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && 01570 Left.Tok.getObjCKeywordID() == tok::objc_property) 01571 return true; 01572 if (Right.is(tok::hashhash)) 01573 return Left.is(tok::hash); 01574 if (Left.isOneOf(tok::hashhash, tok::hash)) 01575 return Right.is(tok::hash); 01576 if (Left.is(tok::l_paren) && Right.is(tok::r_paren)) 01577 return Style.SpaceInEmptyParentheses; 01578 if (Left.is(tok::l_paren) || Right.is(tok::r_paren)) 01579 return (Right.Type == TT_CastRParen || 01580 (Left.MatchingParen && Left.MatchingParen->Type == TT_CastRParen)) 01581 ? Style.SpacesInCStyleCastParentheses 01582 : Style.SpacesInParentheses; 01583 if (Right.isOneOf(tok::semi, tok::comma)) 01584 return false; 01585 if (Right.is(tok::less) && 01586 (Left.isOneOf(tok::kw_template, tok::r_paren) || 01587 (Line.Type == LT_ObjCDecl && Style.ObjCSpaceBeforeProtocolList))) 01588 return true; 01589 if (Left.isOneOf(tok::exclaim, tok::tilde)) 01590 return false; 01591 if (Left.is(tok::at) && 01592 Right.isOneOf(tok::identifier, tok::string_literal, tok::char_constant, 01593 tok::numeric_constant, tok::l_paren, tok::l_brace, 01594 tok::kw_true, tok::kw_false)) 01595 return false; 01596 if (Left.is(tok::coloncolon)) 01597 return false; 01598 if (Left.is(tok::less) || Right.isOneOf(tok::greater, tok::less)) 01599 return false; 01600 if (Right.is(tok::ellipsis)) 01601 return Left.Tok.isLiteral(); 01602 if (Left.is(tok::l_square) && Right.is(tok::amp)) 01603 return false; 01604 if (Right.Type == TT_PointerOrReference) 01605 return Left.Tok.isLiteral() || 01606 ((Left.Type != TT_PointerOrReference) && Left.isNot(tok::l_paren) && 01607 Style.PointerAlignment != FormatStyle::PAS_Left); 01608 if (Right.Type == TT_FunctionTypeLParen && Left.isNot(tok::l_paren) && 01609 (Left.Type != TT_PointerOrReference || 01610 Style.PointerAlignment != FormatStyle::PAS_Right)) 01611 return true; 01612 if (Left.Type == TT_PointerOrReference) 01613 return Right.Tok.isLiteral() || Right.Type == TT_BlockComment || 01614 ((Right.Type != TT_PointerOrReference) && 01615 Right.isNot(tok::l_paren) && 01616 Style.PointerAlignment != FormatStyle::PAS_Right && Left.Previous && 01617 !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon)); 01618 if (Right.is(tok::star) && Left.is(tok::l_paren)) 01619 return false; 01620 if (Left.is(tok::l_square)) 01621 return (Left.Type == TT_ArrayInitializerLSquare && 01622 Style.SpacesInContainerLiterals && Right.isNot(tok::r_square)) || 01623 (Left.Type == TT_ArraySubscriptLSquare && 01624 Style.SpacesInSquareBrackets && Right.isNot(tok::r_square)); 01625 if (Right.is(tok::r_square)) 01626 return Right.MatchingParen && 01627 ((Style.SpacesInContainerLiterals && 01628 Right.MatchingParen->Type == TT_ArrayInitializerLSquare) || 01629 (Style.SpacesInSquareBrackets && 01630 Right.MatchingParen->Type == TT_ArraySubscriptLSquare)); 01631 if (Right.is(tok::l_square) && Right.Type != TT_ObjCMethodExpr && 01632 Right.Type != TT_LambdaLSquare && Left.isNot(tok::numeric_constant) && 01633 Left.Type != TT_DictLiteral) 01634 return false; 01635 if (Left.is(tok::colon)) 01636 return Left.Type != TT_ObjCMethodExpr; 01637 if (Left.is(tok::l_brace) && Right.is(tok::r_brace)) 01638 return !Left.Children.empty(); // No spaces in "{}". 01639 if ((Left.is(tok::l_brace) && Left.BlockKind != BK_Block) || 01640 (Right.is(tok::r_brace) && Right.MatchingParen && 01641 Right.MatchingParen->BlockKind != BK_Block)) 01642 return !Style.Cpp11BracedListStyle; 01643 if (Left.Type == TT_BlockComment) 01644 return !Left.TokenText.endswith("=*/"); 01645 if (Right.is(tok::l_paren)) { 01646 if (Left.is(tok::r_paren) && Left.Type == TT_AttributeParen) 01647 return true; 01648 return Line.Type == LT_ObjCDecl || 01649 Left.isOneOf(tok::kw_new, tok::kw_delete, tok::semi) || 01650 (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && 01651 (Left.isOneOf(tok::kw_if, tok::kw_for, tok::kw_while, 01652 tok::kw_switch, tok::kw_case) || 01653 (Left.is(tok::kw_catch) && 01654 (!Left.Previous || Left.Previous->isNot(tok::period))) || 01655 Left.IsForEachMacro)) || 01656 (Style.SpaceBeforeParens == FormatStyle::SBPO_Always && 01657 (Left.is(tok::identifier) || Left.isFunctionLikeKeyword()) && 01658 Line.Type != LT_PreprocessorDirective); 01659 } 01660 if (Left.is(tok::at) && Right.Tok.getObjCKeywordID() != tok::objc_not_keyword) 01661 return false; 01662 if (Right.Type == TT_UnaryOperator) 01663 return !Left.isOneOf(tok::l_paren, tok::l_square, tok::at) && 01664 (Left.isNot(tok::colon) || Left.Type != TT_ObjCMethodExpr); 01665 if ((Left.isOneOf(tok::identifier, tok::greater, tok::r_square, 01666 tok::r_paren) || 01667 Left.isSimpleTypeSpecifier()) && 01668 Right.is(tok::l_brace) && Right.getNextNonComment() && 01669 Right.BlockKind != BK_Block) 01670 return false; 01671 if (Left.is(tok::period) || Right.is(tok::period)) 01672 return false; 01673 if (Right.is(tok::hash) && Left.is(tok::identifier) && Left.TokenText == "L") 01674 return false; 01675 if (Left.Type == TT_TemplateCloser && Left.MatchingParen && 01676 Left.MatchingParen->Previous && 01677 Left.MatchingParen->Previous->is(tok::period)) 01678 // A.<B>DoSomething(); 01679 return false; 01680 return true; 01681 } 01682 01683 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, 01684 const FormatToken &Right) { 01685 const FormatToken &Left = *Right.Previous; 01686 if (Style.Language == FormatStyle::LK_Proto) { 01687 if (Right.is(tok::period) && 01688 Left.isOneOf(Keywords.kw_optional, Keywords.kw_required, 01689 Keywords.kw_repeated)) 01690 return true; 01691 if (Right.is(tok::l_paren) && 01692 Left.isOneOf(Keywords.kw_returns, Keywords.kw_option)) 01693 return true; 01694 } else if (Style.Language == FormatStyle::LK_JavaScript) { 01695 if (Left.is(Keywords.kw_var)) 01696 return true; 01697 } else if (Style.Language == FormatStyle::LK_Java) { 01698 if (Left.is(Keywords.kw_synchronized) && Right.is(tok::l_paren)) 01699 return Style.SpaceBeforeParens != FormatStyle::SBPO_Never; 01700 if (Left.isOneOf(tok::kw_static, tok::kw_public, tok::kw_private, 01701 tok::kw_protected) && 01702 Right.Type == TT_TemplateOpener) 01703 return true; 01704 } 01705 if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo()) 01706 return true; // Never ever merge two identifiers. 01707 if (Left.Type == TT_ImplicitStringLiteral) 01708 return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd(); 01709 if (Line.Type == LT_ObjCMethodDecl) { 01710 if (Left.Type == TT_ObjCMethodSpecifier) 01711 return true; 01712 if (Left.is(tok::r_paren) && Right.is(tok::identifier)) 01713 // Don't space between ')' and <id> 01714 return false; 01715 } 01716 if (Line.Type == LT_ObjCProperty && 01717 (Right.is(tok::equal) || Left.is(tok::equal))) 01718 return false; 01719 01720 if (Right.Type == TT_TrailingReturnArrow || 01721 Left.Type == TT_TrailingReturnArrow) 01722 return true; 01723 if (Left.is(tok::comma)) 01724 return true; 01725 if (Right.is(tok::comma)) 01726 return false; 01727 if (Right.Type == TT_CtorInitializerColon || Right.Type == TT_ObjCBlockLParen) 01728 return true; 01729 if (Left.is(tok::kw_operator)) 01730 return Right.is(tok::coloncolon); 01731 if (Right.Type == TT_OverloadedOperatorLParen) 01732 return false; 01733 if (Right.is(tok::colon)) 01734 return !Line.First->isOneOf(tok::kw_case, tok::kw_default) && 01735 Right.getNextNonComment() && Right.Type != TT_ObjCMethodExpr && 01736 !Left.is(tok::question) && 01737 !(Right.Type == TT_InlineASMColon && Left.is(tok::coloncolon)) && 01738 (Right.Type != TT_DictLiteral || Style.SpacesInContainerLiterals); 01739 if (Left.Type == TT_UnaryOperator) 01740 return Right.Type == TT_BinaryOperator; 01741 if (Left.Type == TT_CastRParen) 01742 return Style.SpaceAfterCStyleCast || Right.Type == TT_BinaryOperator; 01743 if (Left.is(tok::greater) && Right.is(tok::greater)) { 01744 return Right.Type == TT_TemplateCloser && Left.Type == TT_TemplateCloser && 01745 (Style.Standard != FormatStyle::LS_Cpp11 || Style.SpacesInAngles); 01746 } 01747 if (Right.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar) || 01748 Left.isOneOf(tok::arrow, tok::period, tok::arrowstar, tok::periodstar)) 01749 return false; 01750 if (!Style.SpaceBeforeAssignmentOperators && 01751 Right.getPrecedence() == prec::Assignment) 01752 return false; 01753 if (Right.is(tok::coloncolon) && Left.isNot(tok::l_brace)) 01754 return (Left.Type == TT_TemplateOpener && 01755 Style.Standard == FormatStyle::LS_Cpp03) || 01756 !(Left.isOneOf(tok::identifier, tok::l_paren, tok::r_paren) || 01757 Left.Type == TT_TemplateCloser || Left.Type == TT_TemplateOpener); 01758 if ((Left.Type == TT_TemplateOpener) != (Right.Type == TT_TemplateCloser)) 01759 return Style.SpacesInAngles; 01760 if ((Right.Type == TT_BinaryOperator && !Left.is(tok::l_paren)) || 01761 Left.Type == TT_BinaryOperator || Left.Type == TT_ConditionalExpr) 01762 return true; 01763 if (Left.Type == TT_TemplateCloser && Right.is(tok::l_paren)) 01764 return Style.SpaceBeforeParens == FormatStyle::SBPO_Always; 01765 if (Right.Type == TT_TemplateOpener && Left.is(tok::r_paren) && 01766 Left.MatchingParen && 01767 Left.MatchingParen->Type == TT_OverloadedOperatorLParen) 01768 return false; 01769 if (Right.is(tok::less) && Left.isNot(tok::l_paren) && 01770 Line.First->is(tok::hash)) 01771 return true; 01772 if (Right.Type == TT_TrailingUnaryOperator) 01773 return false; 01774 if (Left.Type == TT_RegexLiteral) 01775 return false; 01776 return spaceRequiredBetween(Line, Left, Right); 01777 } 01778 01779 // Returns 'true' if 'Tok' is a brace we'd want to break before in Allman style. 01780 static bool isAllmanBrace(const FormatToken &Tok) { 01781 return Tok.is(tok::l_brace) && Tok.BlockKind == BK_Block && 01782 Tok.Type != TT_ObjCBlockLBrace && Tok.Type != TT_DictLiteral; 01783 } 01784 01785 bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, 01786 const FormatToken &Right) { 01787 const FormatToken &Left = *Right.Previous; 01788 if (Right.NewlinesBefore > 1) 01789 return true; 01790 01791 // If the last token before a '}' is a comma or a trailing comment, the 01792 // intention is to insert a line break after it in order to make shuffling 01793 // around entries easier. 01794 const FormatToken *BeforeClosingBrace = nullptr; 01795 if (Left.is(tok::l_brace) && Left.BlockKind != BK_Block && Left.MatchingParen) 01796 BeforeClosingBrace = Left.MatchingParen->Previous; 01797 else if (Right.is(tok::r_brace) && Right.BlockKind != BK_Block) 01798 BeforeClosingBrace = &Left; 01799 if (BeforeClosingBrace && (BeforeClosingBrace->is(tok::comma) || 01800 BeforeClosingBrace->isTrailingComment())) 01801 return true; 01802 01803 if (Right.is(tok::comment)) { 01804 return Left.BlockKind != BK_BracedInit && 01805 Left.Type != TT_CtorInitializerColon && 01806 (Right.NewlinesBefore > 0 && Right.HasUnescapedNewline); 01807 } else if (Right.Previous->isTrailingComment() || 01808 (Right.isStringLiteral() && Right.Previous->isStringLiteral())) { 01809 return true; 01810 } else if (Right.Previous->IsUnterminatedLiteral) { 01811 return true; 01812 } else if (Right.is(tok::lessless) && Right.Next && 01813 Right.Previous->is(tok::string_literal) && 01814 Right.Next->is(tok::string_literal)) { 01815 return true; 01816 } else if (Right.Previous->ClosesTemplateDeclaration && 01817 Right.Previous->MatchingParen && 01818 Right.Previous->MatchingParen->NestingLevel == 0 && 01819 Style.AlwaysBreakTemplateDeclarations) { 01820 return true; 01821 } else if ((Right.Type == TT_CtorInitializerComma || 01822 Right.Type == TT_CtorInitializerColon) && 01823 Style.BreakConstructorInitializersBeforeComma && 01824 !Style.ConstructorInitializerAllOnOneLineOrOnePerLine) { 01825 return true; 01826 } else if (Right.is(tok::string_literal) && 01827 Right.TokenText.startswith("R\"")) { 01828 // Raw string literals are special wrt. line breaks. The author has made a 01829 // deliberate choice and might have aligned the contents of the string 01830 // literal accordingly. Thus, we try keep existing line breaks. 01831 return Right.NewlinesBefore > 0; 01832 } else if (Right.Previous->is(tok::l_brace) && Right.NestingLevel == 1 && 01833 Style.Language == FormatStyle::LK_Proto) { 01834 // Don't put enums onto single lines in protocol buffers. 01835 return true; 01836 } else if (Style.Language == FormatStyle::LK_JavaScript && 01837 Right.is(tok::r_brace) && Left.is(tok::l_brace) && 01838 !Left.Children.empty()) { 01839 // Support AllowShortFunctionsOnASingleLine for JavaScript. 01840 return Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_None || 01841 (Left.NestingLevel == 0 && Line.Level == 0 && 01842 Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline); 01843 } else if (isAllmanBrace(Left) || isAllmanBrace(Right)) { 01844 return Style.BreakBeforeBraces == FormatStyle::BS_Allman || 01845 Style.BreakBeforeBraces == FormatStyle::BS_GNU; 01846 } else if (Style.Language == FormatStyle::LK_Proto && 01847 Left.isNot(tok::l_brace) && Right.Type == TT_SelectorName) { 01848 return true; 01849 } else if (Left.Type == TT_ObjCBlockLBrace && 01850 !Style.AllowShortBlocksOnASingleLine) { 01851 return true; 01852 } 01853 01854 if (Style.Language == FormatStyle::LK_JavaScript) { 01855 // FIXME: This might apply to other languages and token kinds. 01856 if (Right.is(tok::char_constant) && Left.is(tok::plus) && Left.Previous && 01857 Left.Previous->is(tok::char_constant)) 01858 return true; 01859 } else if (Style.Language == FormatStyle::LK_Java) { 01860 if (Left.Type == TT_LeadingJavaAnnotation && Right.isNot(tok::l_paren) && 01861 Line.Last->is(tok::l_brace)) 01862 return true; 01863 if (Right.is(tok::plus) && Left.is(tok::string_literal) && Right.Next && 01864 Right.Next->is(tok::string_literal)) 01865 return true; 01866 } 01867 01868 return false; 01869 } 01870 01871 bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, 01872 const FormatToken &Right) { 01873 const FormatToken &Left = *Right.Previous; 01874 01875 if (Style.Language == FormatStyle::LK_Java) { 01876 if (Left.isOneOf(Keywords.kw_throws, Keywords.kw_extends, 01877 Keywords.kw_implements)) 01878 return false; 01879 if (Right.isOneOf(Keywords.kw_throws, Keywords.kw_extends, 01880 Keywords.kw_implements)) 01881 return true; 01882 } 01883 01884 if (Left.is(tok::at)) 01885 return false; 01886 if (Left.Tok.getObjCKeywordID() == tok::objc_interface) 01887 return false; 01888 if (Left.Type == TT_JavaAnnotation || Left.Type == TT_LeadingJavaAnnotation) 01889 return true; 01890 if (Right.Type == TT_StartOfName || 01891 Right.Type == TT_FunctionDeclarationName || Right.is(tok::kw_operator)) 01892 return true; 01893 if (Right.isTrailingComment()) 01894 // We rely on MustBreakBefore being set correctly here as we should not 01895 // change the "binding" behavior of a comment. 01896 // The first comment in a braced lists is always interpreted as belonging to 01897 // the first list element. Otherwise, it should be placed outside of the 01898 // list. 01899 return Left.BlockKind == BK_BracedInit; 01900 if (Left.is(tok::question) && Right.is(tok::colon)) 01901 return false; 01902 if (Right.Type == TT_ConditionalExpr || Right.is(tok::question)) 01903 return Style.BreakBeforeTernaryOperators; 01904 if (Left.Type == TT_ConditionalExpr || Left.is(tok::question)) 01905 return !Style.BreakBeforeTernaryOperators; 01906 if (Right.Type == TT_InheritanceColon) 01907 return true; 01908 if (Right.is(tok::colon) && (Right.Type != TT_CtorInitializerColon && 01909 Right.Type != TT_InlineASMColon)) 01910 return false; 01911 if (Left.is(tok::colon) && 01912 (Left.Type == TT_DictLiteral || Left.Type == TT_ObjCMethodExpr)) 01913 return true; 01914 if (Right.Type == TT_SelectorName) 01915 return true; 01916 if (Left.is(tok::r_paren) && Line.Type == LT_ObjCProperty) 01917 return true; 01918 if (Left.ClosesTemplateDeclaration) 01919 return true; 01920 if (Right.Type == TT_RangeBasedForLoopColon || 01921 Right.Type == TT_OverloadedOperatorLParen || 01922 Right.Type == TT_OverloadedOperator) 01923 return false; 01924 if (Left.Type == TT_RangeBasedForLoopColon) 01925 return true; 01926 if (Right.Type == TT_RangeBasedForLoopColon) 01927 return false; 01928 if (Left.Type == TT_PointerOrReference || Left.Type == TT_TemplateCloser || 01929 Left.Type == TT_UnaryOperator || Left.is(tok::kw_operator)) 01930 return false; 01931 if (Left.is(tok::equal) && Line.Type == LT_VirtualFunctionDecl) 01932 return false; 01933 if (Left.is(tok::l_paren) && Left.Type == TT_AttributeParen) 01934 return false; 01935 if (Left.is(tok::l_paren) && Left.Previous && 01936 (Left.Previous->Type == TT_BinaryOperator || 01937 Left.Previous->Type == TT_CastRParen || Left.Previous->is(tok::kw_if))) 01938 return false; 01939 if (Right.Type == TT_ImplicitStringLiteral) 01940 return false; 01941 01942 if (Right.is(tok::r_paren) || Right.Type == TT_TemplateCloser) 01943 return false; 01944 01945 // We only break before r_brace if there was a corresponding break before 01946 // the l_brace, which is tracked by BreakBeforeClosingBrace. 01947 if (Right.is(tok::r_brace)) 01948 return Right.MatchingParen && Right.MatchingParen->BlockKind == BK_Block; 01949 01950 // Allow breaking after a trailing annotation, e.g. after a method 01951 // declaration. 01952 if (Left.Type == TT_TrailingAnnotation) 01953 return !Right.isOneOf(tok::l_brace, tok::semi, tok::equal, tok::l_paren, 01954 tok::less, tok::coloncolon); 01955 01956 if (Right.is(tok::kw___attribute)) 01957 return true; 01958 01959 if (Left.is(tok::identifier) && Right.is(tok::string_literal)) 01960 return true; 01961 01962 if (Right.is(tok::identifier) && Right.Next && 01963 Right.Next->Type == TT_DictLiteral) 01964 return true; 01965 01966 if (Left.Type == TT_CtorInitializerComma && 01967 Style.BreakConstructorInitializersBeforeComma) 01968 return false; 01969 if (Right.Type == TT_CtorInitializerComma && 01970 Style.BreakConstructorInitializersBeforeComma) 01971 return true; 01972 if (Left.is(tok::greater) && Right.is(tok::greater) && 01973 Left.Type != TT_TemplateCloser) 01974 return false; 01975 if (Right.Type == TT_BinaryOperator && 01976 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_None && 01977 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_All || 01978 Right.getPrecedence() != prec::Assignment)) 01979 return true; 01980 if (Left.Type == TT_ArrayInitializerLSquare) 01981 return true; 01982 if (Right.is(tok::kw_typename) && Left.isNot(tok::kw_const)) 01983 return true; 01984 if (Left.isBinaryOperator() && !Left.isOneOf(tok::arrowstar, tok::lessless) && 01985 Style.BreakBeforeBinaryOperators != FormatStyle::BOS_All && 01986 (Style.BreakBeforeBinaryOperators == FormatStyle::BOS_None || 01987 Left.getPrecedence() == prec::Assignment)) 01988 return true; 01989 return Left.isOneOf(tok::comma, tok::coloncolon, tok::semi, tok::l_brace, 01990 tok::kw_class, tok::kw_struct) || 01991 Right.isMemberAccess() || Right.Type == TT_TrailingReturnArrow || 01992 Right.isOneOf(tok::lessless, tok::colon, tok::l_square, tok::at) || 01993 (Left.is(tok::r_paren) && 01994 Right.isOneOf(tok::identifier, tok::kw_const)) || 01995 (Left.is(tok::l_paren) && !Right.is(tok::r_paren)); 01996 } 01997 01998 void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { 01999 llvm::errs() << "AnnotatedTokens:\n"; 02000 const FormatToken *Tok = Line.First; 02001 while (Tok) { 02002 llvm::errs() << " M=" << Tok->MustBreakBefore 02003 << " C=" << Tok->CanBreakBefore << " T=" << Tok->Type 02004 << " S=" << Tok->SpacesRequiredBefore 02005 << " B=" << Tok->BlockParameterCount 02006 << " P=" << Tok->SplitPenalty << " Name=" << Tok->Tok.getName() 02007 << " L=" << Tok->TotalLength << " PPK=" << Tok->PackingKind 02008 << " FakeLParens="; 02009 for (unsigned i = 0, e = Tok->FakeLParens.size(); i != e; ++i) 02010 llvm::errs() << Tok->FakeLParens[i] << "/"; 02011 llvm::errs() << " FakeRParens=" << Tok->FakeRParens << "\n"; 02012 if (!Tok->Next) 02013 assert(Tok == Line.Last); 02014 Tok = Tok->Next; 02015 } 02016 llvm::errs() << "----\n"; 02017 } 02018 02019 } // namespace format 02020 } // namespace clang