clang API Documentation

ContinuationIndenter.h
Go to the documentation of this file.
00001 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 ///
00010 /// \file
00011 /// \brief This file implements an indenter that manages the indentation of
00012 /// continuations.
00013 ///
00014 //===----------------------------------------------------------------------===//
00015 
00016 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
00017 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H
00018 
00019 #include "Encoding.h"
00020 #include "FormatToken.h"
00021 #include "clang/Format/Format.h"
00022 #include "llvm/Support/Regex.h"
00023 
00024 namespace clang {
00025 class SourceManager;
00026 
00027 namespace format {
00028 
00029 class AnnotatedLine;
00030 struct FormatToken;
00031 struct LineState;
00032 struct ParenState;
00033 class WhitespaceManager;
00034 
00035 class ContinuationIndenter {
00036 public:
00037   /// \brief Constructs a \c ContinuationIndenter to format \p Line starting in
00038   /// column \p FirstIndent.
00039   ContinuationIndenter(const FormatStyle &Style,
00040                        const AdditionalKeywords &Keywords,
00041                        SourceManager &SourceMgr, WhitespaceManager &Whitespaces,
00042                        encoding::Encoding Encoding,
00043                        bool BinPackInconclusiveFunctions);
00044 
00045   /// \brief Get the initial state, i.e. the state after placing \p Line's
00046   /// first token at \p FirstIndent.
00047   LineState getInitialState(unsigned FirstIndent, const AnnotatedLine *Line,
00048                             bool DryRun);
00049 
00050   // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a
00051   // better home.
00052   /// \brief Returns \c true, if a line break after \p State is allowed.
00053   bool canBreak(const LineState &State);
00054 
00055   /// \brief Returns \c true, if a line break after \p State is mandatory.
00056   bool mustBreak(const LineState &State);
00057 
00058   /// \brief Appends the next token to \p State and updates information
00059   /// necessary for indentation.
00060   ///
00061   /// Puts the token on the current line if \p Newline is \c false and adds a
00062   /// line break and necessary indentation otherwise.
00063   ///
00064   /// If \p DryRun is \c false, also creates and stores the required
00065   /// \c Replacement.
00066   unsigned addTokenToState(LineState &State, bool Newline, bool DryRun,
00067                            unsigned ExtraSpaces = 0);
00068 
00069   /// \brief Get the column limit for this line. This is the style's column
00070   /// limit, potentially reduced for preprocessor definitions.
00071   unsigned getColumnLimit(const LineState &State) const;
00072 
00073 private:
00074   /// \brief Mark the next token as consumed in \p State and modify its stacks
00075   /// accordingly.
00076   unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline);
00077 
00078   /// \brief Update 'State' according to the next token's fake left parentheses.
00079   void moveStatePastFakeLParens(LineState &State, bool Newline);
00080   /// \brief Update 'State' according to the next token's fake r_parens.
00081   void moveStatePastFakeRParens(LineState &State);
00082 
00083   /// \brief Update 'State' according to the next token being one of "(<{[".
00084   void moveStatePastScopeOpener(LineState &State, bool Newline);
00085   /// \brief Update 'State' according to the next token being one of ")>}]".
00086   void moveStatePastScopeCloser(LineState &State);
00087   /// \brief Update 'State' with the next token opening a nested block.
00088   void moveStateToNewBlock(LineState &State);
00089 
00090   /// \brief If the current token sticks out over the end of the line, break
00091   /// it if possible.
00092   ///
00093   /// \returns An extra penalty if a token was broken, otherwise 0.
00094   ///
00095   /// The returned penalty will cover the cost of the additional line breaks and
00096   /// column limit violation in all lines except for the last one. The penalty
00097   /// for the column limit violation in the last line (and in single line
00098   /// tokens) is handled in \c addNextStateToQueue.
00099   unsigned breakProtrudingToken(const FormatToken &Current, LineState &State,
00100                                 bool DryRun);
00101 
00102   /// \brief Appends the next token to \p State and updates information
00103   /// necessary for indentation.
00104   ///
00105   /// Puts the token on the current line.
00106   ///
00107   /// If \p DryRun is \c false, also creates and stores the required
00108   /// \c Replacement.
00109   void addTokenOnCurrentLine(LineState &State, bool DryRun,
00110                              unsigned ExtraSpaces);
00111 
00112   /// \brief Appends the next token to \p State and updates information
00113   /// necessary for indentation.
00114   ///
00115   /// Adds a line break and necessary indentation.
00116   ///
00117   /// If \p DryRun is \c false, also creates and stores the required
00118   /// \c Replacement.
00119   unsigned addTokenOnNewLine(LineState &State, bool DryRun);
00120 
00121   /// \brief Calculate the new column for a line wrap before the next token.
00122   unsigned getNewLineColumn(const LineState &State);
00123 
00124   /// \brief Adds a multiline token to the \p State.
00125   ///
00126   /// \returns Extra penalty for the first line of the literal: last line is
00127   /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't
00128   /// matter, as we don't change them.
00129   unsigned addMultilineToken(const FormatToken &Current, LineState &State);
00130 
00131   /// \brief Returns \c true if the next token starts a multiline string
00132   /// literal.
00133   ///
00134   /// This includes implicitly concatenated strings, strings that will be broken
00135   /// by clang-format and string literals with escaped newlines.
00136   bool nextIsMultilineString(const LineState &State);
00137 
00138   FormatStyle Style;
00139   const AdditionalKeywords &Keywords;
00140   SourceManager &SourceMgr;
00141   WhitespaceManager &Whitespaces;
00142   encoding::Encoding Encoding;
00143   bool BinPackInconclusiveFunctions;
00144   llvm::Regex CommentPragmasRegex;
00145 };
00146 
00147 struct ParenState {
00148   ParenState(unsigned Indent, unsigned IndentLevel, unsigned LastSpace,
00149              bool AvoidBinPacking, bool NoLineBreak)
00150       : Indent(Indent), IndentLevel(IndentLevel), LastSpace(LastSpace),
00151         FirstLessLess(0), BreakBeforeClosingBrace(false), QuestionColumn(0),
00152         AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false),
00153         NoLineBreak(NoLineBreak), LastOperatorWrapped(true), ColonPos(0),
00154         StartOfFunctionCall(0), StartOfArraySubscripts(0),
00155         NestedNameSpecifierContinuation(0), CallContinuation(0), VariablePos(0),
00156         ContainsLineBreak(false), ContainsUnwrappedBuilder(0),
00157         AlignColons(true), ObjCSelectorNameFound(false),
00158         HasMultipleNestedBlocks(false), JSFunctionInlined(false) {}
00159 
00160   /// \brief The position to which a specific parenthesis level needs to be
00161   /// indented.
00162   unsigned Indent;
00163 
00164   /// \brief The number of indentation levels of the block.
00165   unsigned IndentLevel;
00166 
00167   /// \brief The position of the last space on each level.
00168   ///
00169   /// Used e.g. to break like:
00170   /// functionCall(Parameter, otherCall(
00171   ///                             OtherParameter));
00172   unsigned LastSpace;
00173 
00174   /// \brief The position the first "<<" operator encountered on each level.
00175   ///
00176   /// Used to align "<<" operators. 0 if no such operator has been encountered
00177   /// on a level.
00178   unsigned FirstLessLess;
00179 
00180   /// \brief Whether a newline needs to be inserted before the block's closing
00181   /// brace.
00182   ///
00183   /// We only want to insert a newline before the closing brace if there also
00184   /// was a newline after the beginning left brace.
00185   bool BreakBeforeClosingBrace;
00186 
00187   /// \brief The column of a \c ? in a conditional expression;
00188   unsigned QuestionColumn;
00189 
00190   /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple
00191   /// lines, in this context.
00192   bool AvoidBinPacking;
00193 
00194   /// \brief Break after the next comma (or all the commas in this context if
00195   /// \c AvoidBinPacking is \c true).
00196   bool BreakBeforeParameter;
00197 
00198   /// \brief Line breaking in this context would break a formatting rule.
00199   bool NoLineBreak;
00200 
00201   /// \brief True if the last binary operator on this level was wrapped to the
00202   /// next line.
00203   bool LastOperatorWrapped;
00204 
00205   /// \brief The position of the colon in an ObjC method declaration/call.
00206   unsigned ColonPos;
00207 
00208   /// \brief The start of the most recent function in a builder-type call.
00209   unsigned StartOfFunctionCall;
00210 
00211   /// \brief Contains the start of array subscript expressions, so that they
00212   /// can be aligned.
00213   unsigned StartOfArraySubscripts;
00214 
00215   /// \brief If a nested name specifier was broken over multiple lines, this
00216   /// contains the start column of the second line. Otherwise 0.
00217   unsigned NestedNameSpecifierContinuation;
00218 
00219   /// \brief If a call expression was broken over multiple lines, this
00220   /// contains the start column of the second line. Otherwise 0.
00221   unsigned CallContinuation;
00222 
00223   /// \brief The column of the first variable name in a variable declaration.
00224   ///
00225   /// Used to align further variables if necessary.
00226   unsigned VariablePos;
00227 
00228   /// \brief \c true if this \c ParenState already contains a line-break.
00229   ///
00230   /// The first line break in a certain \c ParenState causes extra penalty so
00231   /// that clang-format prefers similar breaks, i.e. breaks in the same
00232   /// parenthesis.
00233   bool ContainsLineBreak;
00234 
00235   /// \brief \c true if this \c ParenState contains multiple segments of a
00236   /// builder-type call on one line.
00237   bool ContainsUnwrappedBuilder;
00238 
00239   /// \brief \c true if the colons of the curren ObjC method expression should
00240   /// be aligned.
00241   ///
00242   /// Not considered for memoization as it will always have the same value at
00243   /// the same token.
00244   bool AlignColons;
00245 
00246   /// \brief \c true if at least one selector name was found in the current
00247   /// ObjC method expression.
00248   ///
00249   /// Not considered for memoization as it will always have the same value at
00250   /// the same token.
00251   bool ObjCSelectorNameFound;
00252 
00253   /// \brief \c true if there are multiple nested blocks inside these parens.
00254   ///
00255   /// Not considered for memoization as it will always have the same value at
00256   /// the same token.
00257   bool HasMultipleNestedBlocks;
00258 
00259   // \brief The previous JavaScript 'function' keyword is not wrapped to a new
00260   // line.
00261   bool JSFunctionInlined;
00262 
00263   bool operator<(const ParenState &Other) const {
00264     if (Indent != Other.Indent)
00265       return Indent < Other.Indent;
00266     if (LastSpace != Other.LastSpace)
00267       return LastSpace < Other.LastSpace;
00268     if (FirstLessLess != Other.FirstLessLess)
00269       return FirstLessLess < Other.FirstLessLess;
00270     if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace)
00271       return BreakBeforeClosingBrace;
00272     if (QuestionColumn != Other.QuestionColumn)
00273       return QuestionColumn < Other.QuestionColumn;
00274     if (AvoidBinPacking != Other.AvoidBinPacking)
00275       return AvoidBinPacking;
00276     if (BreakBeforeParameter != Other.BreakBeforeParameter)
00277       return BreakBeforeParameter;
00278     if (NoLineBreak != Other.NoLineBreak)
00279       return NoLineBreak;
00280     if (LastOperatorWrapped != Other.LastOperatorWrapped)
00281       return LastOperatorWrapped;
00282     if (ColonPos != Other.ColonPos)
00283       return ColonPos < Other.ColonPos;
00284     if (StartOfFunctionCall != Other.StartOfFunctionCall)
00285       return StartOfFunctionCall < Other.StartOfFunctionCall;
00286     if (StartOfArraySubscripts != Other.StartOfArraySubscripts)
00287       return StartOfArraySubscripts < Other.StartOfArraySubscripts;
00288     if (CallContinuation != Other.CallContinuation)
00289       return CallContinuation < Other.CallContinuation;
00290     if (VariablePos != Other.VariablePos)
00291       return VariablePos < Other.VariablePos;
00292     if (ContainsLineBreak != Other.ContainsLineBreak)
00293       return ContainsLineBreak < Other.ContainsLineBreak;
00294     if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder)
00295       return ContainsUnwrappedBuilder < Other.ContainsUnwrappedBuilder;
00296     if (JSFunctionInlined != Other.JSFunctionInlined)
00297       return JSFunctionInlined < Other.JSFunctionInlined;
00298     return false;
00299   }
00300 };
00301 
00302 /// \brief The current state when indenting a unwrapped line.
00303 ///
00304 /// As the indenting tries different combinations this is copied by value.
00305 struct LineState {
00306   /// \brief The number of used columns in the current line.
00307   unsigned Column;
00308 
00309   /// \brief The token that needs to be next formatted.
00310   FormatToken *NextToken;
00311 
00312   /// \brief \c true if this line contains a continued for-loop section.
00313   bool LineContainsContinuedForLoopSection;
00314 
00315   /// \brief The \c NestingLevel at the start of this line.
00316   unsigned StartOfLineLevel;
00317 
00318   /// \brief The lowest \c NestingLevel on the current line.
00319   unsigned LowestLevelOnLine;
00320 
00321   /// \brief The start column of the string literal, if we're in a string
00322   /// literal sequence, 0 otherwise.
00323   unsigned StartOfStringLiteral;
00324 
00325   /// \brief A stack keeping track of properties applying to parenthesis
00326   /// levels.
00327   std::vector<ParenState> Stack;
00328 
00329   /// \brief Ignore the stack of \c ParenStates for state comparison.
00330   ///
00331   /// In long and deeply nested unwrapped lines, the current algorithm can
00332   /// be insufficient for finding the best formatting with a reasonable amount
00333   /// of time and memory. Setting this flag will effectively lead to the
00334   /// algorithm not analyzing some combinations. However, these combinations
00335   /// rarely contain the optimal solution: In short, accepting a higher
00336   /// penalty early would need to lead to different values in the \c
00337   /// ParenState stack (in an otherwise identical state) and these different
00338   /// values would need to lead to a significant amount of avoided penalty
00339   /// later.
00340   ///
00341   /// FIXME: Come up with a better algorithm instead.
00342   bool IgnoreStackForComparison;
00343 
00344   /// \brief The indent of the first token.
00345   unsigned FirstIndent;
00346 
00347   /// \brief The line that is being formatted.
00348   ///
00349   /// Does not need to be considered for memoization because it doesn't change.
00350   const AnnotatedLine *Line;
00351 
00352   /// \brief Comparison operator to be able to used \c LineState in \c map.
00353   bool operator<(const LineState &Other) const {
00354     if (NextToken != Other.NextToken)
00355       return NextToken < Other.NextToken;
00356     if (Column != Other.Column)
00357       return Column < Other.Column;
00358     if (LineContainsContinuedForLoopSection !=
00359         Other.LineContainsContinuedForLoopSection)
00360       return LineContainsContinuedForLoopSection;
00361     if (StartOfLineLevel != Other.StartOfLineLevel)
00362       return StartOfLineLevel < Other.StartOfLineLevel;
00363     if (LowestLevelOnLine != Other.LowestLevelOnLine)
00364       return LowestLevelOnLine < Other.LowestLevelOnLine;
00365     if (StartOfStringLiteral != Other.StartOfStringLiteral)
00366       return StartOfStringLiteral < Other.StartOfStringLiteral;
00367     if (IgnoreStackForComparison || Other.IgnoreStackForComparison)
00368       return false;
00369     return Stack < Other.Stack;
00370   }
00371 };
00372 
00373 } // end namespace format
00374 } // end namespace clang
00375 
00376 #endif