clang API Documentation
00001 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// 00010 /// \file 00011 /// \brief This file implements an indenter that manages the indentation of 00012 /// continuations. 00013 /// 00014 //===----------------------------------------------------------------------===// 00015 00016 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 00017 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 00018 00019 #include "Encoding.h" 00020 #include "FormatToken.h" 00021 #include "clang/Format/Format.h" 00022 #include "llvm/Support/Regex.h" 00023 00024 namespace clang { 00025 class SourceManager; 00026 00027 namespace format { 00028 00029 class AnnotatedLine; 00030 struct FormatToken; 00031 struct LineState; 00032 struct ParenState; 00033 class WhitespaceManager; 00034 00035 class ContinuationIndenter { 00036 public: 00037 /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in 00038 /// column \p FirstIndent. 00039 ContinuationIndenter(const FormatStyle &Style, 00040 const AdditionalKeywords &Keywords, 00041 SourceManager &SourceMgr, WhitespaceManager &Whitespaces, 00042 encoding::Encoding Encoding, 00043 bool BinPackInconclusiveFunctions); 00044 00045 /// \brief Get the initial state, i.e. the state after placing \p Line's 00046 /// first token at \p FirstIndent. 00047 LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line, 00048 bool DryRun); 00049 00050 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 00051 // better home. 00052 /// \brief Returns \c true, if a line break after \p State is allowed. 00053 bool canBreak(const LineState &State); 00054 00055 /// \brief Returns \c true, if a line break after \p State is mandatory. 00056 bool mustBreak(const LineState &State); 00057 00058 /// \brief Appends the next token to \p State and updates information 00059 /// necessary for indentation. 00060 /// 00061 /// Puts the token on the current line if \p Newline is \c false and adds a 00062 /// line break and necessary indentation otherwise. 00063 /// 00064 /// If \p DryRun is \c false, also creates and stores the required 00065 /// \c Replacement. 00066 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 00067 unsigned ExtraSpaces = 0); 00068 00069 /// \brief Get the column limit for this line. This is the style's column 00070 /// limit, potentially reduced for preprocessor definitions. 00071 unsigned getColumnLimit(const LineState &State) const; 00072 00073 private: 00074 /// \brief Mark the next token as consumed in \p State and modify its stacks 00075 /// accordingly. 00076 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 00077 00078 /// \brief Update 'State' according to the next token's fake left parentheses. 00079 void moveStatePastFakeLParens(LineState &State, bool Newline); 00080 /// \brief Update 'State' according to the next token's fake r_parens. 00081 void moveStatePastFakeRParens(LineState &State); 00082 00083 /// \brief Update 'State' according to the next token being one of "(<{[". 00084 void moveStatePastScopeOpener(LineState &State, bool Newline); 00085 /// \brief Update 'State' according to the next token being one of ")>}]". 00086 void moveStatePastScopeCloser(LineState &State); 00087 /// \brief Update 'State' with the next token opening a nested block. 00088 void moveStateToNewBlock(LineState &State); 00089 00090 /// \brief If the current token sticks out over the end of the line, break 00091 /// it if possible. 00092 /// 00093 /// \returns An extra penalty if a token was broken, otherwise 0. 00094 /// 00095 /// The returned penalty will cover the cost of the additional line breaks and 00096 /// column limit violation in all lines except for the last one. The penalty 00097 /// for the column limit violation in the last line (and in single line 00098 /// tokens) is handled in \c addNextStateToQueue. 00099 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 00100 bool DryRun); 00101 00102 /// \brief Appends the next token to \p State and updates information 00103 /// necessary for indentation. 00104 /// 00105 /// Puts the token on the current line. 00106 /// 00107 /// If \p DryRun is \c false, also creates and stores the required 00108 /// \c Replacement. 00109 void addTokenOnCurrentLine(LineState &State, bool DryRun, 00110 unsigned ExtraSpaces); 00111 00112 /// \brief Appends the next token to \p State and updates information 00113 /// necessary for indentation. 00114 /// 00115 /// Adds a line break and necessary indentation. 00116 /// 00117 /// If \p DryRun is \c false, also creates and stores the required 00118 /// \c Replacement. 00119 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 00120 00121 /// \brief Calculate the new column for a line wrap before the next token. 00122 unsigned getNewLineColumn(const LineState &State); 00123 00124 /// \brief Adds a multiline token to the \p State. 00125 /// 00126 /// \returns Extra penalty for the first line of the literal: last line is 00127 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 00128 /// matter, as we don't change them. 00129 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 00130 00131 /// \brief Returns \c true if the next token starts a multiline string 00132 /// literal. 00133 /// 00134 /// This includes implicitly concatenated strings, strings that will be broken 00135 /// by clang-format and string literals with escaped newlines. 00136 bool nextIsMultilineString(const LineState &State); 00137 00138 FormatStyle Style; 00139 const AdditionalKeywords &Keywords; 00140 SourceManager &SourceMgr; 00141 WhitespaceManager &Whitespaces; 00142 encoding::Encoding Encoding; 00143 bool BinPackInconclusiveFunctions; 00144 llvm::Regex CommentPragmasRegex; 00145 }; 00146 00147 struct ParenState { 00148 ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace, 00149 bool AvoidBinPacking, bool NoLineBreak) 00150 : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace), 00151 FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0), 00152 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 00153 NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0), 00154 StartOfFunctionCall(0), StartOfArraySubscripts(0), 00155 NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0), 00156 ContainsLineBreak(false), ContainsUnwrappedBuilder(0), 00157 AlignColons(true), ObjCSelectorNameFound(false), 00158 HasMultipleNestedBlocks(false), JSFunctionInlined(false) {} 00159 00160 /// \brief The position to which a specific parenthesis level needs to be 00161 /// indented. 00162 unsigned Indent; 00163 00164 /// \brief The number of indentation levels of the block. 00165 unsigned IndentLevel; 00166 00167 /// \brief The position of the last space on each level. 00168 /// 00169 /// Used e.g. to break like: 00170 /// functionCall(Parameter, otherCall( 00171 /// OtherParameter)); 00172 unsigned LastSpace; 00173 00174 /// \brief The position the first "<<" operator encountered on each level. 00175 /// 00176 /// Used to align "<<" operators. 0 if no such operator has been encountered 00177 /// on a level. 00178 unsigned FirstLessLess; 00179 00180 /// \brief Whether a newline needs to be inserted before the block's closing 00181 /// brace. 00182 /// 00183 /// We only want to insert a newline before the closing brace if there also 00184 /// was a newline after the beginning left brace. 00185 bool BreakBeforeClosingBrace; 00186 00187 /// \brief The column of a \c ? in a conditional expression; 00188 unsigned QuestionColumn; 00189 00190 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 00191 /// lines, in this context. 00192 bool AvoidBinPacking; 00193 00194 /// \brief Break after the next comma (or all the commas in this context if 00195 /// \c AvoidBinPacking is \c true). 00196 bool BreakBeforeParameter; 00197 00198 /// \brief Line breaking in this context would break a formatting rule. 00199 bool NoLineBreak; 00200 00201 /// \brief True if the last binary operator on this level was wrapped to the 00202 /// next line. 00203 bool LastOperatorWrapped; 00204 00205 /// \brief The position of the colon in an ObjC method declaration/call. 00206 unsigned ColonPos; 00207 00208 /// \brief The start of the most recent function in a builder-type call. 00209 unsigned StartOfFunctionCall; 00210 00211 /// \brief Contains the start of array subscript expressions, so that they 00212 /// can be aligned. 00213 unsigned StartOfArraySubscripts; 00214 00215 /// \brief If a nested name specifier was broken over multiple lines, this 00216 /// contains the start column of the second line. Otherwise 0. 00217 unsigned NestedNameSpecifierContinuation; 00218 00219 /// \brief If a call expression was broken over multiple lines, this 00220 /// contains the start column of the second line. Otherwise 0. 00221 unsigned CallContinuation; 00222 00223 /// \brief The column of the first variable name in a variable declaration. 00224 /// 00225 /// Used to align further variables if necessary. 00226 unsigned VariablePos; 00227 00228 /// \brief \c true if this \c ParenState already contains a line-break. 00229 /// 00230 /// The first line break in a certain \c ParenState causes extra penalty so 00231 /// that clang-format prefers similar breaks, i.e. breaks in the same 00232 /// parenthesis. 00233 bool ContainsLineBreak; 00234 00235 /// \brief \c true if this \c ParenState contains multiple segments of a 00236 /// builder-type call on one line. 00237 bool ContainsUnwrappedBuilder; 00238 00239 /// \brief \c true if the colons of the curren ObjC method expression should 00240 /// be aligned. 00241 /// 00242 /// Not considered for memoization as it will always have the same value at 00243 /// the same token. 00244 bool AlignColons; 00245 00246 /// \brief \c true if at least one selector name was found in the current 00247 /// ObjC method expression. 00248 /// 00249 /// Not considered for memoization as it will always have the same value at 00250 /// the same token. 00251 bool ObjCSelectorNameFound; 00252 00253 /// \brief \c true if there are multiple nested blocks inside these parens. 00254 /// 00255 /// Not considered for memoization as it will always have the same value at 00256 /// the same token. 00257 bool HasMultipleNestedBlocks; 00258 00259 // \brief The previous JavaScript 'function' keyword is not wrapped to a new 00260 // line. 00261 bool JSFunctionInlined; 00262 00263 bool operator<(const ParenState &Other) const { 00264 if (Indent != Other.Indent) 00265 return Indent < Other.Indent; 00266 if (LastSpace != Other.LastSpace) 00267 return LastSpace < Other.LastSpace; 00268 if (FirstLessLess != Other.FirstLessLess) 00269 return FirstLessLess < Other.FirstLessLess; 00270 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 00271 return BreakBeforeClosingBrace; 00272 if (QuestionColumn != Other.QuestionColumn) 00273 return QuestionColumn < Other.QuestionColumn; 00274 if (AvoidBinPacking != Other.AvoidBinPacking) 00275 return AvoidBinPacking; 00276 if (BreakBeforeParameter != Other.BreakBeforeParameter) 00277 return BreakBeforeParameter; 00278 if (NoLineBreak != Other.NoLineBreak) 00279 return NoLineBreak; 00280 if (LastOperatorWrapped != Other.LastOperatorWrapped) 00281 return LastOperatorWrapped; 00282 if (ColonPos != Other.ColonPos) 00283 return ColonPos < Other.ColonPos; 00284 if (StartOfFunctionCall != Other.StartOfFunctionCall) 00285 return StartOfFunctionCall < Other.StartOfFunctionCall; 00286 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 00287 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 00288 if (CallContinuation != Other.CallContinuation) 00289 return CallContinuation < Other.CallContinuation; 00290 if (VariablePos != Other.VariablePos) 00291 return VariablePos < Other.VariablePos; 00292 if (ContainsLineBreak != Other.ContainsLineBreak) 00293 return ContainsLineBreak < Other.ContainsLineBreak; 00294 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 00295 return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder; 00296 if (JSFunctionInlined != Other.JSFunctionInlined) 00297 return JSFunctionInlined < Other.JSFunctionInlined; 00298 return false; 00299 } 00300 }; 00301 00302 /// \brief The current state when indenting a unwrapped line. 00303 /// 00304 /// As the indenting tries different combinations this is copied by value. 00305 struct LineState { 00306 /// \brief The number of used columns in the current line. 00307 unsigned Column; 00308 00309 /// \brief The token that needs to be next formatted. 00310 FormatToken *NextToken; 00311 00312 /// \brief \c true if this line contains a continued for-loop section. 00313 bool LineContainsContinuedForLoopSection; 00314 00315 /// \brief The \c NestingLevel at the start of this line. 00316 unsigned StartOfLineLevel; 00317 00318 /// \brief The lowest \c NestingLevel on the current line. 00319 unsigned LowestLevelOnLine; 00320 00321 /// \brief The start column of the string literal, if we're in a string 00322 /// literal sequence, 0 otherwise. 00323 unsigned StartOfStringLiteral; 00324 00325 /// \brief A stack keeping track of properties applying to parenthesis 00326 /// levels. 00327 std::vector<ParenState> Stack; 00328 00329 /// \brief Ignore the stack of \c ParenStates for state comparison. 00330 /// 00331 /// In long and deeply nested unwrapped lines, the current algorithm can 00332 /// be insufficient for finding the best formatting with a reasonable amount 00333 /// of time and memory. Setting this flag will effectively lead to the 00334 /// algorithm not analyzing some combinations. However, these combinations 00335 /// rarely contain the optimal solution: In short, accepting a higher 00336 /// penalty early would need to lead to different values in the \c 00337 /// ParenState stack (in an otherwise identical state) and these different 00338 /// values would need to lead to a significant amount of avoided penalty 00339 /// later. 00340 /// 00341 /// FIXME: Come up with a better algorithm instead. 00342 bool IgnoreStackForComparison; 00343 00344 /// \brief The indent of the first token. 00345 unsigned FirstIndent; 00346 00347 /// \brief The line that is being formatted. 00348 /// 00349 /// Does not need to be considered for memoization because it doesn't change. 00350 const AnnotatedLine *Line; 00351 00352 /// \brief Comparison operator to be able to used \c LineState in \c map. 00353 bool operator<(const LineState &Other) const { 00354 if (NextToken != Other.NextToken) 00355 return NextToken < Other.NextToken; 00356 if (Column != Other.Column) 00357 return Column < Other.Column; 00358 if (LineContainsContinuedForLoopSection != 00359 Other.LineContainsContinuedForLoopSection) 00360 return LineContainsContinuedForLoopSection; 00361 if (StartOfLineLevel != Other.StartOfLineLevel) 00362 return StartOfLineLevel < Other.StartOfLineLevel; 00363 if (LowestLevelOnLine != Other.LowestLevelOnLine) 00364 return LowestLevelOnLine < Other.LowestLevelOnLine; 00365 if (StartOfStringLiteral != Other.StartOfStringLiteral) 00366 return StartOfStringLiteral < Other.StartOfStringLiteral; 00367 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 00368 return false; 00369 return Stack < Other.Stack; 00370 } 00371 }; 00372 00373 } // end namespace format 00374 } // end namespace clang 00375 00376 #endif