clang API Documentation
00001 //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// 00010 /// \file 00011 /// \brief This file contains the declaration of the UnwrappedLineParser, 00012 /// which turns a stream of tokens into UnwrappedLines. 00013 /// 00014 //===----------------------------------------------------------------------===// 00015 00016 #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H 00017 #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H 00018 00019 #include "FormatToken.h" 00020 #include "clang/Basic/IdentifierTable.h" 00021 #include "clang/Format/Format.h" 00022 #include <list> 00023 #include <stack> 00024 00025 namespace clang { 00026 namespace format { 00027 00028 struct UnwrappedLineNode; 00029 00030 /// \brief An unwrapped line is a sequence of \c Token, that we would like to 00031 /// put on a single line if there was no column limit. 00032 /// 00033 /// This is used as a main interface between the \c UnwrappedLineParser and the 00034 /// \c UnwrappedLineFormatter. The key property is that changing the formatting 00035 /// within an unwrapped line does not affect any other unwrapped lines. 00036 struct UnwrappedLine { 00037 UnwrappedLine(); 00038 00039 // FIXME: Don't use std::list here. 00040 /// \brief The \c Tokens comprising this \c UnwrappedLine. 00041 std::list<UnwrappedLineNode> Tokens; 00042 00043 /// \brief The indent level of the \c UnwrappedLine. 00044 unsigned Level; 00045 00046 /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive. 00047 bool InPPDirective; 00048 00049 bool MustBeDeclaration; 00050 }; 00051 00052 class UnwrappedLineConsumer { 00053 public: 00054 virtual ~UnwrappedLineConsumer() {} 00055 virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; 00056 virtual void finishRun() = 0; 00057 }; 00058 00059 class FormatTokenSource; 00060 00061 class UnwrappedLineParser { 00062 public: 00063 UnwrappedLineParser(const FormatStyle &Style, 00064 const AdditionalKeywords &Keywords, 00065 ArrayRef<FormatToken *> Tokens, 00066 UnwrappedLineConsumer &Callback); 00067 00068 /// Returns true in case of a structural error. 00069 bool parse(); 00070 00071 private: 00072 void reset(); 00073 void parseFile(); 00074 void parseLevel(bool HasOpeningBrace); 00075 void parseBlock(bool MustBeDeclaration, bool AddLevel = true, 00076 bool MunchSemi = true); 00077 void parseChildBlock(); 00078 void parsePPDirective(); 00079 void parsePPDefine(); 00080 void parsePPIf(bool IfDef); 00081 void parsePPElIf(); 00082 void parsePPElse(); 00083 void parsePPEndIf(); 00084 void parsePPUnknown(); 00085 void parseStructuralElement(); 00086 bool tryToParseBracedList(); 00087 bool parseBracedList(bool ContinueOnSemicolons = false); 00088 void parseParens(); 00089 void parseSquare(); 00090 void parseIfThenElse(); 00091 void parseTryCatch(); 00092 void parseForOrWhileLoop(); 00093 void parseDoWhile(); 00094 void parseLabel(); 00095 void parseCaseLabel(); 00096 void parseSwitch(); 00097 void parseNamespace(); 00098 void parseAccessSpecifier(); 00099 void parseEnum(); 00100 void parseJavaEnumBody(); 00101 void parseRecord(); 00102 void parseObjCProtocolList(); 00103 void parseObjCUntilAtEnd(); 00104 void parseObjCInterfaceOrImplementation(); 00105 void parseObjCProtocol(); 00106 bool tryToParseLambda(); 00107 bool tryToParseLambdaIntroducer(); 00108 void tryToParseJSFunction(); 00109 void addUnwrappedLine(); 00110 bool eof() const; 00111 void nextToken(); 00112 void readToken(); 00113 void flushComments(bool NewlineBeforeNext); 00114 void pushToken(FormatToken *Tok); 00115 void calculateBraceTypes(); 00116 00117 // Marks a conditional compilation edge (for example, an '#if', '#ifdef', 00118 // '#else' or merge conflict marker). If 'Unreachable' is true, assumes 00119 // this branch either cannot be taken (for example '#if false'), or should 00120 // not be taken in this round. 00121 void conditionalCompilationCondition(bool Unreachable); 00122 void conditionalCompilationStart(bool Unreachable); 00123 void conditionalCompilationAlternative(); 00124 void conditionalCompilationEnd(); 00125 00126 bool isOnNewLine(const FormatToken &FormatTok); 00127 00128 // FIXME: We are constantly running into bugs where Line.Level is incorrectly 00129 // subtracted from beyond 0. Introduce a method to subtract from Line.Level 00130 // and use that everywhere in the Parser. 00131 std::unique_ptr<UnwrappedLine> Line; 00132 00133 // Comments are sorted into unwrapped lines by whether they are in the same 00134 // line as the previous token, or not. If not, they belong to the next token. 00135 // Since the next token might already be in a new unwrapped line, we need to 00136 // store the comments belonging to that token. 00137 SmallVector<FormatToken *, 1> CommentsBeforeNextToken; 00138 FormatToken *FormatTok; 00139 bool MustBreakBeforeNextToken; 00140 00141 // The parsed lines. Only added to through \c CurrentLines. 00142 SmallVector<UnwrappedLine, 8> Lines; 00143 00144 // Preprocessor directives are parsed out-of-order from other unwrapped lines. 00145 // Thus, we need to keep a list of preprocessor directives to be reported 00146 // after an unwarpped line that has been started was finished. 00147 SmallVector<UnwrappedLine, 4> PreprocessorDirectives; 00148 00149 // New unwrapped lines are added via CurrentLines. 00150 // Usually points to \c &Lines. While parsing a preprocessor directive when 00151 // there is an unfinished previous unwrapped line, will point to 00152 // \c &PreprocessorDirectives. 00153 SmallVectorImpl<UnwrappedLine> *CurrentLines; 00154 00155 // We store for each line whether it must be a declaration depending on 00156 // whether we are in a compound statement or not. 00157 std::vector<bool> DeclarationScopeStack; 00158 00159 // Will be true if we encounter an error that leads to possibily incorrect 00160 // indentation levels. 00161 bool StructuralError; 00162 00163 const FormatStyle &Style; 00164 const AdditionalKeywords &Keywords; 00165 00166 FormatTokenSource *Tokens; 00167 UnwrappedLineConsumer &Callback; 00168 00169 // FIXME: This is a temporary measure until we have reworked the ownership 00170 // of the format tokens. The goal is to have the actual tokens created and 00171 // owned outside of and handed into the UnwrappedLineParser. 00172 ArrayRef<FormatToken *> AllTokens; 00173 00174 // Represents preprocessor branch type, so we can find matching 00175 // #if/#else/#endif directives. 00176 enum PPBranchKind { 00177 PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0 00178 PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0 00179 }; 00180 00181 // Keeps a stack of currently active preprocessor branching directives. 00182 SmallVector<PPBranchKind, 16> PPStack; 00183 00184 // The \c UnwrappedLineParser re-parses the code for each combination 00185 // of preprocessor branches that can be taken. 00186 // To that end, we take the same branch (#if, #else, or one of the #elif 00187 // branches) for each nesting level of preprocessor branches. 00188 // \c PPBranchLevel stores the current nesting level of preprocessor 00189 // branches during one pass over the code. 00190 int PPBranchLevel; 00191 00192 // Contains the current branch (#if, #else or one of the #elif branches) 00193 // for each nesting level. 00194 SmallVector<int, 8> PPLevelBranchIndex; 00195 00196 // Contains the maximum number of branches at each nesting level. 00197 SmallVector<int, 8> PPLevelBranchCount; 00198 00199 // Contains the number of branches per nesting level we are currently 00200 // in while parsing a preprocessor branch sequence. 00201 // This is used to update PPLevelBranchCount at the end of a branch 00202 // sequence. 00203 std::stack<int> PPChainBranchIndex; 00204 00205 friend class ScopedLineState; 00206 friend class CompoundStatementIndenter; 00207 }; 00208 00209 struct UnwrappedLineNode { 00210 UnwrappedLineNode() : Tok(nullptr) {} 00211 UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} 00212 00213 FormatToken *Tok; 00214 SmallVector<UnwrappedLine, 0> Children; 00215 }; 00216 00217 inline UnwrappedLine::UnwrappedLine() 00218 : Level(0), InPPDirective(false), MustBeDeclaration(false) {} 00219 00220 } // end namespace format 00221 } // end namespace clang 00222 00223 #endif