clang API Documentation
00001 //===--- BreakableToken.h - Format C++ code -------------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// 00010 /// \file 00011 /// \brief Declares BreakableToken, BreakableStringLiteral, and 00012 /// BreakableBlockComment classes, that contain token type-specific logic to 00013 /// break long lines in tokens. 00014 /// 00015 //===----------------------------------------------------------------------===// 00016 00017 #ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H 00018 #define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H 00019 00020 #include "Encoding.h" 00021 #include "TokenAnnotator.h" 00022 #include "WhitespaceManager.h" 00023 #include <utility> 00024 00025 namespace clang { 00026 namespace format { 00027 00028 struct FormatStyle; 00029 00030 /// \brief Base class for strategies on how to break tokens. 00031 /// 00032 /// FIXME: The interface seems set in stone, so we might want to just pull the 00033 /// strategy into the class, instead of controlling it from the outside. 00034 class BreakableToken { 00035 public: 00036 /// \brief Contains starting character index and length of split. 00037 typedef std::pair<StringRef::size_type, unsigned> Split; 00038 00039 virtual ~BreakableToken() {} 00040 00041 /// \brief Returns the number of lines in this token in the original code. 00042 virtual unsigned getLineCount() const = 0; 00043 00044 /// \brief Returns the number of columns required to format the piece of line 00045 /// at \p LineIndex, from byte offset \p Offset with length \p Length. 00046 /// 00047 /// Note that previous breaks are not taken into account. \p Offset is always 00048 /// specified from the start of the (original) line. 00049 /// \p Length can be set to StringRef::npos, which means "to the end of line". 00050 virtual unsigned 00051 getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset, 00052 StringRef::size_type Length) const = 0; 00053 00054 /// \brief Returns a range (offset, length) at which to break the line at 00055 /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not 00056 /// violate \p ColumnLimit. 00057 virtual Split getSplit(unsigned LineIndex, unsigned TailOffset, 00058 unsigned ColumnLimit) const = 0; 00059 00060 /// \brief Emits the previously retrieved \p Split via \p Whitespaces. 00061 virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, 00062 WhitespaceManager &Whitespaces) = 0; 00063 00064 /// \brief Replaces the whitespace range described by \p Split with a single 00065 /// space. 00066 virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, 00067 Split Split, 00068 WhitespaceManager &Whitespaces) = 0; 00069 00070 /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex. 00071 virtual void replaceWhitespaceBefore(unsigned LineIndex, 00072 WhitespaceManager &Whitespaces) {} 00073 00074 protected: 00075 BreakableToken(const FormatToken &Tok, unsigned IndentLevel, 00076 bool InPPDirective, encoding::Encoding Encoding, 00077 const FormatStyle &Style) 00078 : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective), 00079 Encoding(Encoding), Style(Style) {} 00080 00081 const FormatToken &Tok; 00082 const unsigned IndentLevel; 00083 const bool InPPDirective; 00084 const encoding::Encoding Encoding; 00085 const FormatStyle &Style; 00086 }; 00087 00088 /// \brief Base class for single line tokens that can be broken. 00089 /// 00090 /// \c getSplit() needs to be implemented by child classes. 00091 class BreakableSingleLineToken : public BreakableToken { 00092 public: 00093 unsigned getLineCount() const override; 00094 unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, 00095 StringRef::size_type Length) const override; 00096 00097 protected: 00098 BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel, 00099 unsigned StartColumn, StringRef Prefix, 00100 StringRef Postfix, bool InPPDirective, 00101 encoding::Encoding Encoding, 00102 const FormatStyle &Style); 00103 00104 // The column in which the token starts. 00105 unsigned StartColumn; 00106 // The prefix a line needs after a break in the token. 00107 StringRef Prefix; 00108 // The postfix a line needs before introducing a break. 00109 StringRef Postfix; 00110 // The token text excluding the prefix and postfix. 00111 StringRef Line; 00112 }; 00113 00114 class BreakableStringLiteral : public BreakableSingleLineToken { 00115 public: 00116 /// \brief Creates a breakable token for a single line string literal. 00117 /// 00118 /// \p StartColumn specifies the column in which the token will start 00119 /// after formatting. 00120 BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel, 00121 unsigned StartColumn, StringRef Prefix, 00122 StringRef Postfix, bool InPPDirective, 00123 encoding::Encoding Encoding, const FormatStyle &Style); 00124 00125 Split getSplit(unsigned LineIndex, unsigned TailOffset, 00126 unsigned ColumnLimit) const override; 00127 void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, 00128 WhitespaceManager &Whitespaces) override; 00129 void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, 00130 WhitespaceManager &Whitespaces) override {} 00131 }; 00132 00133 class BreakableLineComment : public BreakableSingleLineToken { 00134 public: 00135 /// \brief Creates a breakable token for a line comment. 00136 /// 00137 /// \p StartColumn specifies the column in which the comment will start 00138 /// after formatting. 00139 BreakableLineComment(const FormatToken &Token, unsigned IndentLevel, 00140 unsigned StartColumn, bool InPPDirective, 00141 encoding::Encoding Encoding, const FormatStyle &Style); 00142 00143 Split getSplit(unsigned LineIndex, unsigned TailOffset, 00144 unsigned ColumnLimit) const override; 00145 void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, 00146 WhitespaceManager &Whitespaces) override; 00147 void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, 00148 WhitespaceManager &Whitespaces) override; 00149 void replaceWhitespaceBefore(unsigned LineIndex, 00150 WhitespaceManager &Whitespaces) override; 00151 00152 private: 00153 // The prefix without an additional space if one was added. 00154 StringRef OriginalPrefix; 00155 }; 00156 00157 class BreakableBlockComment : public BreakableToken { 00158 public: 00159 /// \brief Creates a breakable token for a block comment. 00160 /// 00161 /// \p StartColumn specifies the column in which the comment will start 00162 /// after formatting, while \p OriginalStartColumn specifies in which 00163 /// column the comment started before formatting. 00164 /// If the comment starts a line after formatting, set \p FirstInLine to true. 00165 BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel, 00166 unsigned StartColumn, unsigned OriginaStartColumn, 00167 bool FirstInLine, bool InPPDirective, 00168 encoding::Encoding Encoding, const FormatStyle &Style); 00169 00170 unsigned getLineCount() const override; 00171 unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset, 00172 StringRef::size_type Length) const override; 00173 Split getSplit(unsigned LineIndex, unsigned TailOffset, 00174 unsigned ColumnLimit) const override; 00175 void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, 00176 WhitespaceManager &Whitespaces) override; 00177 void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split, 00178 WhitespaceManager &Whitespaces) override; 00179 void replaceWhitespaceBefore(unsigned LineIndex, 00180 WhitespaceManager &Whitespaces) override; 00181 00182 private: 00183 // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex], 00184 // so that all whitespace between the lines is accounted to Lines[LineIndex] 00185 // as leading whitespace: 00186 // - Lines[LineIndex] points to the text after that whitespace 00187 // - Lines[LineIndex-1] shrinks by its trailing whitespace 00188 // - LeadingWhitespace[LineIndex] is updated with the complete whitespace 00189 // between the end of the text of Lines[LineIndex-1] and Lines[LineIndex] 00190 // 00191 // Sets StartOfLineColumn to the intended column in which the text at 00192 // Lines[LineIndex] starts (note that the decoration, if present, is not 00193 // considered part of the text). 00194 void adjustWhitespace(unsigned LineIndex, int IndentDelta); 00195 00196 // Returns the column at which the text in line LineIndex starts, when broken 00197 // at TailOffset. Note that the decoration (if present) is not considered part 00198 // of the text. 00199 unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const; 00200 00201 // Contains the text of the lines of the block comment, excluding the leading 00202 // /* in the first line and trailing */ in the last line, and excluding all 00203 // trailing whitespace between the lines. Note that the decoration (if 00204 // present) is also not considered part of the text. 00205 SmallVector<StringRef, 16> Lines; 00206 00207 // LeadingWhitespace[i] is the number of characters regarded as whitespace in 00208 // front of Lines[i]. Note that this can include "* " sequences, which we 00209 // regard as whitespace when all lines have a "*" prefix. 00210 SmallVector<unsigned, 16> LeadingWhitespace; 00211 00212 // StartOfLineColumn[i] is the target column at which Line[i] should be. 00213 // Note that this excludes a leading "* " or "*" in case all lines have 00214 // a "*" prefix. 00215 SmallVector<int, 16> StartOfLineColumn; 00216 00217 // The column at which the text of a broken line should start. 00218 // Note that an optional decoration would go before that column. 00219 // IndentAtLineBreak is a uniform position for all lines in a block comment, 00220 // regardless of their relative position. 00221 // FIXME: Revisit the decision to do this; the main reason was to support 00222 // patterns like 00223 // /**************//** 00224 // * Comment 00225 // We could also support such patterns by special casing the first line 00226 // instead. 00227 unsigned IndentAtLineBreak; 00228 00229 // This is to distinguish between the case when the last line was empty and 00230 // the case when it started with a decoration ("*" or "* "). 00231 bool LastLineNeedsDecoration; 00232 00233 // Either "* " if all lines begin with a "*", or empty. 00234 StringRef Decoration; 00235 }; 00236 00237 } // namespace format 00238 } // namespace clang 00239 00240 #endif