clang API Documentation
00001 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// 00010 /// \file 00011 /// \brief This file contains the declaration of the FormatToken, a wrapper 00012 /// around Token with additional information related to formatting. 00013 /// 00014 //===----------------------------------------------------------------------===// 00015 00016 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 00017 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 00018 00019 #include "clang/Basic/IdentifierTable.h" 00020 #include "clang/Basic/OperatorPrecedence.h" 00021 #include "clang/Format/Format.h" 00022 #include "clang/Lex/Lexer.h" 00023 #include <memory> 00024 00025 namespace clang { 00026 namespace format { 00027 00028 enum TokenType { 00029 TT_ArrayInitializerLSquare, 00030 TT_ArraySubscriptLSquare, 00031 TT_AttributeParen, 00032 TT_BinaryOperator, 00033 TT_BitFieldColon, 00034 TT_BlockComment, 00035 TT_CastRParen, 00036 TT_ConditionalExpr, 00037 TT_ConflictAlternative, 00038 TT_ConflictEnd, 00039 TT_ConflictStart, 00040 TT_CtorInitializerColon, 00041 TT_CtorInitializerComma, 00042 TT_DesignatedInitializerPeriod, 00043 TT_DictLiteral, 00044 TT_FunctionDeclarationName, 00045 TT_FunctionLBrace, 00046 TT_FunctionTypeLParen, 00047 TT_ImplicitStringLiteral, 00048 TT_InheritanceColon, 00049 TT_InlineASMColon, 00050 TT_JavaAnnotation, 00051 TT_LambdaLSquare, 00052 TT_LeadingJavaAnnotation, 00053 TT_LineComment, 00054 TT_ObjCBlockLBrace, 00055 TT_ObjCBlockLParen, 00056 TT_ObjCDecl, 00057 TT_ObjCForIn, 00058 TT_ObjCMethodExpr, 00059 TT_ObjCMethodSpecifier, 00060 TT_ObjCProperty, 00061 TT_OverloadedOperator, 00062 TT_OverloadedOperatorLParen, 00063 TT_PointerOrReference, 00064 TT_PureVirtualSpecifier, 00065 TT_RangeBasedForLoopColon, 00066 TT_RegexLiteral, 00067 TT_SelectorName, 00068 TT_StartOfName, 00069 TT_TemplateCloser, 00070 TT_TemplateOpener, 00071 TT_TrailingAnnotation, 00072 TT_TrailingReturnArrow, 00073 TT_TrailingUnaryOperator, 00074 TT_UnaryOperator, 00075 TT_Unknown 00076 }; 00077 00078 // Represents what type of block a set of braces open. 00079 enum BraceBlockKind { 00080 BK_Unknown, 00081 BK_Block, 00082 BK_BracedInit 00083 }; 00084 00085 // The packing kind of a function's parameters. 00086 enum ParameterPackingKind { 00087 PPK_BinPacked, 00088 PPK_OnePerLine, 00089 PPK_Inconclusive 00090 }; 00091 00092 enum FormatDecision { 00093 FD_Unformatted, 00094 FD_Continue, 00095 FD_Break 00096 }; 00097 00098 class TokenRole; 00099 class AnnotatedLine; 00100 00101 /// \brief A wrapper around a \c Token storing information about the 00102 /// whitespace characters preceding it. 00103 struct FormatToken { 00104 FormatToken() 00105 : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), 00106 ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false), 00107 IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false), 00108 BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), 00109 CanBreakBefore(false), ClosesTemplateDeclaration(false), 00110 ParameterCount(0), BlockParameterCount(0), 00111 PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0), 00112 BindingStrength(0), NestingLevel(0), SplitPenalty(0), 00113 LongestObjCSelectorName(0), FakeRParens(0), 00114 StartsBinaryExpression(false), EndsBinaryExpression(false), 00115 OperatorIndex(0), LastOperator(false), 00116 PartOfMultiVariableDeclStmt(false), IsForEachMacro(false), 00117 MatchingParen(nullptr), Previous(nullptr), Next(nullptr), 00118 Decision(FD_Unformatted), Finalized(false) {} 00119 00120 /// \brief The \c Token. 00121 Token Tok; 00122 00123 /// \brief The number of newlines immediately before the \c Token. 00124 /// 00125 /// This can be used to determine what the user wrote in the original code 00126 /// and thereby e.g. leave an empty line between two function definitions. 00127 unsigned NewlinesBefore; 00128 00129 /// \brief Whether there is at least one unescaped newline before the \c 00130 /// Token. 00131 bool HasUnescapedNewline; 00132 00133 /// \brief The range of the whitespace immediately preceding the \c Token. 00134 SourceRange WhitespaceRange; 00135 00136 /// \brief The offset just past the last '\n' in this token's leading 00137 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 00138 unsigned LastNewlineOffset; 00139 00140 /// \brief The width of the non-whitespace parts of the token (or its first 00141 /// line for multi-line tokens) in columns. 00142 /// We need this to correctly measure number of columns a token spans. 00143 unsigned ColumnWidth; 00144 00145 /// \brief Contains the width in columns of the last line of a multi-line 00146 /// token. 00147 unsigned LastLineColumnWidth; 00148 00149 /// \brief Whether the token text contains newlines (escaped or not). 00150 bool IsMultiline; 00151 00152 /// \brief Indicates that this is the first token. 00153 bool IsFirst; 00154 00155 /// \brief Whether there must be a line break before this token. 00156 /// 00157 /// This happens for example when a preprocessor directive ended directly 00158 /// before the token. 00159 bool MustBreakBefore; 00160 00161 /// \brief Returns actual token start location without leading escaped 00162 /// newlines and whitespace. 00163 /// 00164 /// This can be different to Tok.getLocation(), which includes leading escaped 00165 /// newlines. 00166 SourceLocation getStartOfNonWhitespace() const { 00167 return WhitespaceRange.getEnd(); 00168 } 00169 00170 /// \brief The raw text of the token. 00171 /// 00172 /// Contains the raw token text without leading whitespace and without leading 00173 /// escaped newlines. 00174 StringRef TokenText; 00175 00176 /// \brief Set to \c true if this token is an unterminated literal. 00177 bool IsUnterminatedLiteral; 00178 00179 /// \brief Contains the kind of block if this token is a brace. 00180 BraceBlockKind BlockKind; 00181 00182 TokenType Type; 00183 00184 /// \brief The number of spaces that should be inserted before this token. 00185 unsigned SpacesRequiredBefore; 00186 00187 /// \brief \c true if it is allowed to break before this token. 00188 bool CanBreakBefore; 00189 00190 bool ClosesTemplateDeclaration; 00191 00192 /// \brief Number of parameters, if this is "(", "[" or "<". 00193 /// 00194 /// This is initialized to 1 as we don't need to distinguish functions with 00195 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 00196 /// the number of commas. 00197 unsigned ParameterCount; 00198 00199 /// \brief Number of parameters that are nested blocks, 00200 /// if this is "(", "[" or "<". 00201 unsigned BlockParameterCount; 00202 00203 /// \brief A token can have a special role that can carry extra information 00204 /// about the token's formatting. 00205 std::unique_ptr<TokenRole> Role; 00206 00207 /// \brief If this is an opening parenthesis, how are the parameters packed? 00208 ParameterPackingKind PackingKind; 00209 00210 /// \brief The total length of the unwrapped line up to and including this 00211 /// token. 00212 unsigned TotalLength; 00213 00214 /// \brief The original 0-based column of this token, including expanded tabs. 00215 /// The configured TabWidth is used as tab width. 00216 unsigned OriginalColumn; 00217 00218 /// \brief The length of following tokens until the next natural split point, 00219 /// or the next token that can be broken. 00220 unsigned UnbreakableTailLength; 00221 00222 // FIXME: Come up with a 'cleaner' concept. 00223 /// \brief The binding strength of a token. This is a combined value of 00224 /// operator precedence, parenthesis nesting, etc. 00225 unsigned BindingStrength; 00226 00227 /// \brief The nesting level of this token, i.e. the number of surrounding (), 00228 /// [], {} or <>. 00229 unsigned NestingLevel; 00230 00231 /// \brief Penalty for inserting a line break before this token. 00232 unsigned SplitPenalty; 00233 00234 /// \brief If this is the first ObjC selector name in an ObjC method 00235 /// definition or call, this contains the length of the longest name. 00236 /// 00237 /// This being set to 0 means that the selectors should not be colon-aligned, 00238 /// e.g. because several of them are block-type. 00239 unsigned LongestObjCSelectorName; 00240 00241 /// \brief Stores the number of required fake parentheses and the 00242 /// corresponding operator precedence. 00243 /// 00244 /// If multiple fake parentheses start at a token, this vector stores them in 00245 /// reverse order, i.e. inner fake parenthesis first. 00246 SmallVector<prec::Level, 4> FakeLParens; 00247 /// \brief Insert this many fake ) after this token for correct indentation. 00248 unsigned FakeRParens; 00249 00250 /// \brief \c true if this token starts a binary expression, i.e. has at least 00251 /// one fake l_paren with a precedence greater than prec::Unknown. 00252 bool StartsBinaryExpression; 00253 /// \brief \c true if this token ends a binary expression. 00254 bool EndsBinaryExpression; 00255 00256 /// \brief Is this is an operator (or "."/"->") in a sequence of operators 00257 /// with the same precedence, contains the 0-based operator index. 00258 unsigned OperatorIndex; 00259 00260 /// \brief Is this the last operator (or "."/"->") in a sequence of operators 00261 /// with the same precedence? 00262 bool LastOperator; 00263 00264 /// \brief Is this token part of a \c DeclStmt defining multiple variables? 00265 /// 00266 /// Only set if \c Type == \c TT_StartOfName. 00267 bool PartOfMultiVariableDeclStmt; 00268 00269 /// \brief Is this a foreach macro? 00270 bool IsForEachMacro; 00271 00272 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } 00273 00274 bool is(const IdentifierInfo *II) const { 00275 return II && II == Tok.getIdentifierInfo(); 00276 } 00277 00278 template <typename T> 00279 bool isOneOf(T K1, T K2) const { 00280 return is(K1) || is(K2); 00281 } 00282 00283 template <typename T> 00284 bool isOneOf(T K1, T K2, T K3) const { 00285 return is(K1) || is(K2) || is(K3); 00286 } 00287 00288 template <typename T> 00289 bool isOneOf(T K1, T K2, T K3, T K4, T K5 = tok::NUM_TOKENS, 00290 T K6 = tok::NUM_TOKENS, T K7 = tok::NUM_TOKENS, 00291 T K8 = tok::NUM_TOKENS, T K9 = tok::NUM_TOKENS, 00292 T K10 = tok::NUM_TOKENS, T K11 = tok::NUM_TOKENS, 00293 T K12 = tok::NUM_TOKENS) const { 00294 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || 00295 is(K8) || is(K9) || is(K10) || is(K11) || is(K12); 00296 } 00297 00298 template <typename T> 00299 bool isNot(T Kind) const { 00300 return Tok.isNot(Kind); 00301 } 00302 bool isNot(IdentifierInfo *II) const { return II != Tok.getIdentifierInfo(); } 00303 00304 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 00305 00306 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 00307 return Tok.isObjCAtKeyword(Kind); 00308 } 00309 00310 bool isAccessSpecifier(bool ColonRequired = true) const { 00311 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 00312 (!ColonRequired || (Next && Next->is(tok::colon))); 00313 } 00314 00315 /// \brief Determine whether the token is a simple-type-specifier. 00316 bool isSimpleTypeSpecifier() const; 00317 00318 bool isObjCAccessSpecifier() const { 00319 return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) || 00320 Next->isObjCAtKeyword(tok::objc_protected) || 00321 Next->isObjCAtKeyword(tok::objc_package) || 00322 Next->isObjCAtKeyword(tok::objc_private)); 00323 } 00324 00325 /// \brief Returns whether \p Tok is ([{ or a template opening <. 00326 bool opensScope() const { 00327 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square) || 00328 Type == TT_TemplateOpener; 00329 } 00330 /// \brief Returns whether \p Tok is )]} or a template closing >. 00331 bool closesScope() const { 00332 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square) || 00333 Type == TT_TemplateCloser; 00334 } 00335 00336 /// \brief Returns \c true if this is a "." or "->" accessing a member. 00337 bool isMemberAccess() const { 00338 return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 00339 Type != TT_DesignatedInitializerPeriod && 00340 Type != TT_TrailingReturnArrow; 00341 } 00342 00343 bool isUnaryOperator() const { 00344 switch (Tok.getKind()) { 00345 case tok::plus: 00346 case tok::plusplus: 00347 case tok::minus: 00348 case tok::minusminus: 00349 case tok::exclaim: 00350 case tok::tilde: 00351 case tok::kw_sizeof: 00352 case tok::kw_alignof: 00353 return true; 00354 default: 00355 return false; 00356 } 00357 } 00358 00359 bool isBinaryOperator() const { 00360 // Comma is a binary operator, but does not behave as such wrt. formatting. 00361 return getPrecedence() > prec::Comma; 00362 } 00363 00364 bool isTrailingComment() const { 00365 return is(tok::comment) && 00366 (Type == TT_LineComment || !Next || Next->NewlinesBefore > 0); 00367 } 00368 00369 /// \brief Returns \c true if this is a keyword that can be used 00370 /// like a function call (e.g. sizeof, typeid, ...). 00371 bool isFunctionLikeKeyword() const { 00372 switch (Tok.getKind()) { 00373 case tok::kw_throw: 00374 case tok::kw_typeid: 00375 case tok::kw_return: 00376 case tok::kw_sizeof: 00377 case tok::kw_alignof: 00378 case tok::kw_alignas: 00379 case tok::kw_decltype: 00380 case tok::kw_noexcept: 00381 case tok::kw_static_assert: 00382 case tok::kw___attribute: 00383 return true; 00384 default: 00385 return false; 00386 } 00387 } 00388 00389 prec::Level getPrecedence() const { 00390 return getBinOpPrecedence(Tok.getKind(), true, true); 00391 } 00392 00393 /// \brief Returns the previous token ignoring comments. 00394 FormatToken *getPreviousNonComment() const { 00395 FormatToken *Tok = Previous; 00396 while (Tok && Tok->is(tok::comment)) 00397 Tok = Tok->Previous; 00398 return Tok; 00399 } 00400 00401 /// \brief Returns the next token ignoring comments. 00402 const FormatToken *getNextNonComment() const { 00403 const FormatToken *Tok = Next; 00404 while (Tok && Tok->is(tok::comment)) 00405 Tok = Tok->Next; 00406 return Tok; 00407 } 00408 00409 /// \brief Returns \c true if this tokens starts a block-type list, i.e. a 00410 /// list that should be indented with a block indent. 00411 bool opensBlockTypeList(const FormatStyle &Style) const { 00412 return Type == TT_ArrayInitializerLSquare || 00413 (is(tok::l_brace) && 00414 (BlockKind == BK_Block || Type == TT_DictLiteral || 00415 !Style.Cpp11BracedListStyle)); 00416 } 00417 00418 /// \brief Same as opensBlockTypeList, but for the closing token. 00419 bool closesBlockTypeList(const FormatStyle &Style) const { 00420 return MatchingParen && MatchingParen->opensBlockTypeList(Style); 00421 } 00422 00423 FormatToken *MatchingParen; 00424 00425 FormatToken *Previous; 00426 FormatToken *Next; 00427 00428 SmallVector<AnnotatedLine *, 1> Children; 00429 00430 /// \brief Stores the formatting decision for the token once it was made. 00431 FormatDecision Decision; 00432 00433 /// \brief If \c true, this token has been fully formatted (indented and 00434 /// potentially re-formatted inside), and we do not allow further formatting 00435 /// changes. 00436 bool Finalized; 00437 00438 private: 00439 // Disallow copying. 00440 FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION; 00441 void operator=(const FormatToken &) LLVM_DELETED_FUNCTION; 00442 }; 00443 00444 class ContinuationIndenter; 00445 struct LineState; 00446 00447 class TokenRole { 00448 public: 00449 TokenRole(const FormatStyle &Style) : Style(Style) {} 00450 virtual ~TokenRole(); 00451 00452 /// \brief After the \c TokenAnnotator has finished annotating all the tokens, 00453 /// this function precomputes required information for formatting. 00454 virtual void precomputeFormattingInfos(const FormatToken *Token); 00455 00456 /// \brief Apply the special formatting that the given role demands. 00457 /// 00458 /// Assumes that the token having this role is already formatted. 00459 /// 00460 /// Continues formatting from \p State leaving indentation to \p Indenter and 00461 /// returns the total penalty that this formatting incurs. 00462 virtual unsigned formatFromToken(LineState &State, 00463 ContinuationIndenter *Indenter, 00464 bool DryRun) { 00465 return 0; 00466 } 00467 00468 /// \brief Same as \c formatFromToken, but assumes that the first token has 00469 /// already been set thereby deciding on the first line break. 00470 virtual unsigned formatAfterToken(LineState &State, 00471 ContinuationIndenter *Indenter, 00472 bool DryRun) { 00473 return 0; 00474 } 00475 00476 /// \brief Notifies the \c Role that a comma was found. 00477 virtual void CommaFound(const FormatToken *Token) {} 00478 00479 protected: 00480 const FormatStyle &Style; 00481 }; 00482 00483 class CommaSeparatedList : public TokenRole { 00484 public: 00485 CommaSeparatedList(const FormatStyle &Style) 00486 : TokenRole(Style), HasNestedBracedList(false) {} 00487 00488 void precomputeFormattingInfos(const FormatToken *Token) override; 00489 00490 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 00491 bool DryRun) override; 00492 00493 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 00494 bool DryRun) override; 00495 00496 /// \brief Adds \p Token as the next comma to the \c CommaSeparated list. 00497 void CommaFound(const FormatToken *Token) override { 00498 Commas.push_back(Token); 00499 } 00500 00501 private: 00502 /// \brief A struct that holds information on how to format a given list with 00503 /// a specific number of columns. 00504 struct ColumnFormat { 00505 /// \brief The number of columns to use. 00506 unsigned Columns; 00507 00508 /// \brief The total width in characters. 00509 unsigned TotalWidth; 00510 00511 /// \brief The number of lines required for this format. 00512 unsigned LineCount; 00513 00514 /// \brief The size of each column in characters. 00515 SmallVector<unsigned, 8> ColumnSizes; 00516 }; 00517 00518 /// \brief Calculate which \c ColumnFormat fits best into 00519 /// \p RemainingCharacters. 00520 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 00521 00522 /// \brief The ordered \c FormatTokens making up the commas of this list. 00523 SmallVector<const FormatToken *, 8> Commas; 00524 00525 /// \brief The length of each of the list's items in characters including the 00526 /// trailing comma. 00527 SmallVector<unsigned, 8> ItemLengths; 00528 00529 /// \brief Precomputed formats that can be used for this list. 00530 SmallVector<ColumnFormat, 4> Formats; 00531 00532 bool HasNestedBracedList; 00533 }; 00534 00535 /// \brief Encapsulates keywords that are context sensitive or for languages not 00536 /// properly supported by Clang's lexer. 00537 struct AdditionalKeywords { 00538 AdditionalKeywords(IdentifierTable &IdentTable) { 00539 kw_in = &IdentTable.get("in"); 00540 kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 00541 00542 kw_finally = &IdentTable.get("finally"); 00543 kw_function = &IdentTable.get("function"); 00544 kw_var = &IdentTable.get("var"); 00545 00546 kw_extends = &IdentTable.get("extends"); 00547 kw_implements = &IdentTable.get("implements"); 00548 kw_interface = &IdentTable.get("interface"); 00549 kw_synchronized = &IdentTable.get("synchronized"); 00550 kw_throws = &IdentTable.get("throws"); 00551 00552 kw_option = &IdentTable.get("option"); 00553 kw_optional = &IdentTable.get("optional"); 00554 kw_repeated = &IdentTable.get("repeated"); 00555 kw_required = &IdentTable.get("required"); 00556 kw_returns = &IdentTable.get("returns"); 00557 } 00558 00559 // ObjC context sensitive keywords. 00560 IdentifierInfo *kw_in; 00561 IdentifierInfo *kw_NS_ENUM; 00562 00563 // JavaScript keywords. 00564 IdentifierInfo *kw_finally; 00565 IdentifierInfo *kw_function; 00566 IdentifierInfo *kw_var; 00567 00568 // Java keywords. 00569 IdentifierInfo *kw_extends; 00570 IdentifierInfo *kw_implements; 00571 IdentifierInfo *kw_interface; 00572 IdentifierInfo *kw_synchronized; 00573 IdentifierInfo *kw_throws; 00574 00575 // Proto keywords. 00576 IdentifierInfo *kw_option; 00577 IdentifierInfo *kw_optional; 00578 IdentifierInfo *kw_repeated; 00579 IdentifierInfo *kw_required; 00580 IdentifierInfo *kw_returns; 00581 }; 00582 00583 } // namespace format 00584 } // namespace clang 00585 00586 #endif