clang: BreakableToken.h Source File

Go to the documentation of this file.
00001 //===--- BreakableToken.h - Format C++ code -------------------------------===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 ///
00010 /// \file
00011 /// \brief Declares BreakableToken, BreakableStringLiteral, and
00012 /// BreakableBlockComment classes, that contain token type-specific logic to
00013 /// break long lines in tokens.
00014 ///
00015 //===----------------------------------------------------------------------===//
00016 
00017 #ifndef LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
00018 #define LLVM_CLANG_LIB_FORMAT_BREAKABLETOKEN_H
00019 
00020 #include "Encoding.h"
00021 #include "TokenAnnotator.h"
00022 #include "WhitespaceManager.h"
00023 #include <utility>
00024 
00025 namespace clang {
00026 namespace format {
00027 
00028 struct FormatStyle;
00029 
00030 /// \brief Base class for strategies on how to break tokens.
00031 ///
00032 /// FIXME: The interface seems set in stone, so we might want to just pull the
00033 /// strategy into the class, instead of controlling it from the outside.
00034 class BreakableToken {
00035 public:
00036   /// \brief Contains starting character index and length of split.
00037   typedef std::pair<StringRef::size_type, unsigned> Split;
00038 
00039   virtual ~BreakableToken() {}
00040 
00041   /// \brief Returns the number of lines in this token in the original code.
00042   virtual unsigned getLineCount() const = 0;
00043 
00044   /// \brief Returns the number of columns required to format the piece of line
00045   /// at \p LineIndex, from byte offset \p Offset with length \p Length.
00046   ///
00047   /// Note that previous breaks are not taken into account. \p Offset is always
00048   /// specified from the start of the (original) line.
00049   /// \p Length can be set to StringRef::npos, which means "to the end of line".
00050   virtual unsigned
00051   getLineLengthAfterSplit(unsigned LineIndex, unsigned Offset,
00052                           StringRef::size_type Length) const = 0;
00053 
00054   /// \brief Returns a range (offset, length) at which to break the line at
00055   /// \p LineIndex, if previously broken at \p TailOffset. If possible, do not
00056   /// violate \p ColumnLimit.
00057   virtual Split getSplit(unsigned LineIndex, unsigned TailOffset,
00058                          unsigned ColumnLimit) const = 0;
00059 
00060   /// \brief Emits the previously retrieved \p Split via \p Whitespaces.
00061   virtual void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
00062                            WhitespaceManager &Whitespaces) = 0;
00063 
00064   /// \brief Replaces the whitespace range described by \p Split with a single
00065   /// space.
00066   virtual void replaceWhitespace(unsigned LineIndex, unsigned TailOffset,
00067                                  Split Split,
00068                                  WhitespaceManager &Whitespaces) = 0;
00069 
00070   /// \brief Replaces the whitespace between \p LineIndex-1 and \p LineIndex.
00071   virtual void replaceWhitespaceBefore(unsigned LineIndex,
00072                                        WhitespaceManager &Whitespaces) {}
00073 
00074 protected:
00075   BreakableToken(const FormatToken &Tok, unsigned IndentLevel,
00076                  bool InPPDirective, encoding::Encoding Encoding,
00077                  const FormatStyle &Style)
00078       : Tok(Tok), IndentLevel(IndentLevel), InPPDirective(InPPDirective),
00079         Encoding(Encoding), Style(Style) {}
00080 
00081   const FormatToken &Tok;
00082   const unsigned IndentLevel;
00083   const bool InPPDirective;
00084   const encoding::Encoding Encoding;
00085   const FormatStyle &Style;
00086 };
00087 
00088 /// \brief Base class for single line tokens that can be broken.
00089 ///
00090 /// \c getSplit() needs to be implemented by child classes.
00091 class BreakableSingleLineToken : public BreakableToken {
00092 public:
00093   unsigned getLineCount() const override;
00094   unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
00095                                    StringRef::size_type Length) const override;
00096 
00097 protected:
00098   BreakableSingleLineToken(const FormatToken &Tok, unsigned IndentLevel,
00099                            unsigned StartColumn, StringRef Prefix,
00100                            StringRef Postfix, bool InPPDirective,
00101                            encoding::Encoding Encoding,
00102                            const FormatStyle &Style);
00103 
00104   // The column in which the token starts.
00105   unsigned StartColumn;
00106   // The prefix a line needs after a break in the token.
00107   StringRef Prefix;
00108   // The postfix a line needs before introducing a break.
00109   StringRef Postfix;
00110   // The token text excluding the prefix and postfix.
00111   StringRef Line;
00112 };
00113 
00114 class BreakableStringLiteral : public BreakableSingleLineToken {
00115 public:
00116   /// \brief Creates a breakable token for a single line string literal.
00117   ///
00118   /// \p StartColumn specifies the column in which the token will start
00119   /// after formatting.
00120   BreakableStringLiteral(const FormatToken &Tok, unsigned IndentLevel,
00121                          unsigned StartColumn, StringRef Prefix,
00122                          StringRef Postfix, bool InPPDirective,
00123                          encoding::Encoding Encoding, const FormatStyle &Style);
00124 
00125   Split getSplit(unsigned LineIndex, unsigned TailOffset,
00126                  unsigned ColumnLimit) const override;
00127   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
00128                    WhitespaceManager &Whitespaces) override;
00129   void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
00130                          WhitespaceManager &Whitespaces) override {}
00131 };
00132 
00133 class BreakableLineComment : public BreakableSingleLineToken {
00134 public:
00135   /// \brief Creates a breakable token for a line comment.
00136   ///
00137   /// \p StartColumn specifies the column in which the comment will start
00138   /// after formatting.
00139   BreakableLineComment(const FormatToken &Token, unsigned IndentLevel,
00140                        unsigned StartColumn, bool InPPDirective,
00141                        encoding::Encoding Encoding, const FormatStyle &Style);
00142 
00143   Split getSplit(unsigned LineIndex, unsigned TailOffset,
00144                  unsigned ColumnLimit) const override;
00145   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
00146                    WhitespaceManager &Whitespaces) override;
00147   void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
00148                          WhitespaceManager &Whitespaces) override;
00149   void replaceWhitespaceBefore(unsigned LineIndex,
00150                                WhitespaceManager &Whitespaces) override;
00151 
00152 private:
00153   // The prefix without an additional space if one was added.
00154   StringRef OriginalPrefix;
00155 };
00156 
00157 class BreakableBlockComment : public BreakableToken {
00158 public:
00159   /// \brief Creates a breakable token for a block comment.
00160   ///
00161   /// \p StartColumn specifies the column in which the comment will start
00162   /// after formatting, while \p OriginalStartColumn specifies in which
00163   /// column the comment started before formatting.
00164   /// If the comment starts a line after formatting, set \p FirstInLine to true.
00165   BreakableBlockComment(const FormatToken &Token, unsigned IndentLevel,
00166                         unsigned StartColumn, unsigned OriginaStartColumn,
00167                         bool FirstInLine, bool InPPDirective,
00168                         encoding::Encoding Encoding, const FormatStyle &Style);
00169 
00170   unsigned getLineCount() const override;
00171   unsigned getLineLengthAfterSplit(unsigned LineIndex, unsigned TailOffset,
00172                                    StringRef::size_type Length) const override;
00173   Split getSplit(unsigned LineIndex, unsigned TailOffset,
00174                  unsigned ColumnLimit) const override;
00175   void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split,
00176                    WhitespaceManager &Whitespaces) override;
00177   void replaceWhitespace(unsigned LineIndex, unsigned TailOffset, Split Split,
00178                          WhitespaceManager &Whitespaces) override;
00179   void replaceWhitespaceBefore(unsigned LineIndex,
00180                                WhitespaceManager &Whitespaces) override;
00181 
00182 private:
00183   // Rearranges the whitespace between Lines[LineIndex-1] and Lines[LineIndex],
00184   // so that all whitespace between the lines is accounted to Lines[LineIndex]
00185   // as leading whitespace:
00186   // - Lines[LineIndex] points to the text after that whitespace
00187   // - Lines[LineIndex-1] shrinks by its trailing whitespace
00188   // - LeadingWhitespace[LineIndex] is updated with the complete whitespace
00189   //   between the end of the text of Lines[LineIndex-1] and Lines[LineIndex]
00190   //
00191   // Sets StartOfLineColumn to the intended column in which the text at
00192   // Lines[LineIndex] starts (note that the decoration, if present, is not
00193   // considered part of the text).
00194   void adjustWhitespace(unsigned LineIndex, int IndentDelta);
00195 
00196   // Returns the column at which the text in line LineIndex starts, when broken
00197   // at TailOffset. Note that the decoration (if present) is not considered part
00198   // of the text.
00199   unsigned getContentStartColumn(unsigned LineIndex, unsigned TailOffset) const;
00200 
00201   // Contains the text of the lines of the block comment, excluding the leading
00202   // /* in the first line and trailing */ in the last line, and excluding all
00203   // trailing whitespace between the lines. Note that the decoration (if
00204   // present) is also not considered part of the text.
00205   SmallVector<StringRef, 16> Lines;
00206 
00207   // LeadingWhitespace[i] is the number of characters regarded as whitespace in
00208   // front of Lines[i]. Note that this can include "* " sequences, which we
00209   // regard as whitespace when all lines have a "*" prefix.
00210   SmallVector<unsigned, 16> LeadingWhitespace;
00211 
00212   // StartOfLineColumn[i] is the target column at which Line[i] should be.
00213   // Note that this excludes a leading "* " or "*" in case all lines have
00214   // a "*" prefix.
00215   SmallVector<int, 16> StartOfLineColumn;
00216 
00217   // The column at which the text of a broken line should start.
00218   // Note that an optional decoration would go before that column.
00219   // IndentAtLineBreak is a uniform position for all lines in a block comment,
00220   // regardless of their relative position.
00221   // FIXME: Revisit the decision to do this; the main reason was to support
00222   // patterns like
00223   // /**************//**
00224   //  * Comment
00225   // We could also support such patterns by special casing the first line
00226   // instead.
00227   unsigned IndentAtLineBreak;
00228 
00229   // This is to distinguish between the case when the last line was empty and
00230   // the case when it started with a decoration ("*" or "* ").
00231   bool LastLineNeedsDecoration;
00232 
00233   // Either "* " if all lines begin with a "*", or empty.
00234   StringRef Decoration;
00235 };
00236 
00237 } // namespace format
00238 } // namespace clang
00239 
00240 #endif