clang API Documentation
00001 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 /// 00010 /// \file 00011 /// \brief Contains implementation of BreakableToken class and classes derived 00012 /// from it. 00013 /// 00014 //===----------------------------------------------------------------------===// 00015 00016 #include "BreakableToken.h" 00017 #include "clang/Basic/CharInfo.h" 00018 #include "clang/Format/Format.h" 00019 #include "llvm/ADT/STLExtras.h" 00020 #include "llvm/Support/Debug.h" 00021 #include <algorithm> 00022 00023 #define DEBUG_TYPE "format-token-breaker" 00024 00025 namespace clang { 00026 namespace format { 00027 00028 static const char *const Blanks = " \t\v\f\r"; 00029 static bool IsBlank(char C) { 00030 switch (C) { 00031 case ' ': 00032 case '\t': 00033 case '\v': 00034 case '\f': 00035 case '\r': 00036 return true; 00037 default: 00038 return false; 00039 } 00040 } 00041 00042 static BreakableToken::Split getCommentSplit(StringRef Text, 00043 unsigned ContentStartColumn, 00044 unsigned ColumnLimit, 00045 unsigned TabWidth, 00046 encoding::Encoding Encoding) { 00047 if (ColumnLimit <= ContentStartColumn + 1) 00048 return BreakableToken::Split(StringRef::npos, 0); 00049 00050 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 00051 unsigned MaxSplitBytes = 0; 00052 00053 for (unsigned NumChars = 0; 00054 NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 00055 unsigned BytesInChar = 00056 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 00057 NumChars += 00058 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), 00059 ContentStartColumn, TabWidth, Encoding); 00060 MaxSplitBytes += BytesInChar; 00061 } 00062 00063 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 00064 if (SpaceOffset == StringRef::npos || 00065 // Don't break at leading whitespace. 00066 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 00067 // Make sure that we don't break at leading whitespace that 00068 // reaches past MaxSplit. 00069 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 00070 if (FirstNonWhitespace == StringRef::npos) 00071 // If the comment is only whitespace, we cannot split. 00072 return BreakableToken::Split(StringRef::npos, 0); 00073 SpaceOffset = Text.find_first_of( 00074 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 00075 } 00076 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 00077 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 00078 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 00079 return BreakableToken::Split(BeforeCut.size(), 00080 AfterCut.begin() - BeforeCut.end()); 00081 } 00082 return BreakableToken::Split(StringRef::npos, 0); 00083 } 00084 00085 static BreakableToken::Split 00086 getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, 00087 unsigned TabWidth, encoding::Encoding Encoding) { 00088 // FIXME: Reduce unit test case. 00089 if (Text.empty()) 00090 return BreakableToken::Split(StringRef::npos, 0); 00091 if (ColumnLimit <= UsedColumns) 00092 return BreakableToken::Split(StringRef::npos, 0); 00093 unsigned MaxSplit = ColumnLimit - UsedColumns; 00094 StringRef::size_type SpaceOffset = 0; 00095 StringRef::size_type SlashOffset = 0; 00096 StringRef::size_type WordStartOffset = 0; 00097 StringRef::size_type SplitPoint = 0; 00098 for (unsigned Chars = 0;;) { 00099 unsigned Advance; 00100 if (Text[0] == '\\') { 00101 Advance = encoding::getEscapeSequenceLength(Text); 00102 Chars += Advance; 00103 } else { 00104 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 00105 Chars += encoding::columnWidthWithTabs( 00106 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 00107 } 00108 00109 if (Chars > MaxSplit || Text.size() == Advance) 00110 break; 00111 00112 if (IsBlank(Text[0])) 00113 SpaceOffset = SplitPoint; 00114 if (Text[0] == '/') 00115 SlashOffset = SplitPoint; 00116 if (Advance == 1 && !isAlphanumeric(Text[0])) 00117 WordStartOffset = SplitPoint; 00118 00119 SplitPoint += Advance; 00120 Text = Text.substr(Advance); 00121 } 00122 00123 if (SpaceOffset != 0) 00124 return BreakableToken::Split(SpaceOffset + 1, 0); 00125 if (SlashOffset != 0) 00126 return BreakableToken::Split(SlashOffset + 1, 0); 00127 if (WordStartOffset != 0) 00128 return BreakableToken::Split(WordStartOffset + 1, 0); 00129 if (SplitPoint != 0) 00130 return BreakableToken::Split(SplitPoint, 0); 00131 return BreakableToken::Split(StringRef::npos, 0); 00132 } 00133 00134 unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 00135 00136 unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 00137 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 00138 return StartColumn + Prefix.size() + Postfix.size() + 00139 encoding::columnWidthWithTabs(Line.substr(Offset, Length), 00140 StartColumn + Prefix.size(), 00141 Style.TabWidth, Encoding); 00142 } 00143 00144 BreakableSingleLineToken::BreakableSingleLineToken( 00145 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 00146 StringRef Prefix, StringRef Postfix, bool InPPDirective, 00147 encoding::Encoding Encoding, const FormatStyle &Style) 00148 : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), 00149 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { 00150 assert(Tok.TokenText.endswith(Postfix)); 00151 Line = Tok.TokenText.substr( 00152 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 00153 } 00154 00155 BreakableStringLiteral::BreakableStringLiteral( 00156 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 00157 StringRef Prefix, StringRef Postfix, bool InPPDirective, 00158 encoding::Encoding Encoding, const FormatStyle &Style) 00159 : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, 00160 InPPDirective, Encoding, Style) {} 00161 00162 BreakableToken::Split 00163 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 00164 unsigned ColumnLimit) const { 00165 return getStringSplit(Line.substr(TailOffset), 00166 StartColumn + Prefix.size() + Postfix.size(), 00167 ColumnLimit, Style.TabWidth, Encoding); 00168 } 00169 00170 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 00171 unsigned TailOffset, Split Split, 00172 WhitespaceManager &Whitespaces) { 00173 unsigned LeadingSpaces = StartColumn; 00174 // The '@' of an ObjC string literal (@"Test") does not become part of the 00175 // string token. 00176 // FIXME: It might be a cleaner solution to merge the tokens as a 00177 // precomputation step. 00178 if (Prefix.startswith("@")) 00179 --LeadingSpaces; 00180 Whitespaces.replaceWhitespaceInToken( 00181 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 00182 Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); 00183 } 00184 00185 static StringRef getLineCommentIndentPrefix(StringRef Comment) { 00186 static const char *const KnownPrefixes[] = { "///", "//" }; 00187 StringRef LongestPrefix; 00188 for (StringRef KnownPrefix : KnownPrefixes) { 00189 if (Comment.startswith(KnownPrefix)) { 00190 size_t PrefixLength = KnownPrefix.size(); 00191 while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') 00192 ++PrefixLength; 00193 if (PrefixLength > LongestPrefix.size()) 00194 LongestPrefix = Comment.substr(0, PrefixLength); 00195 } 00196 } 00197 return LongestPrefix; 00198 } 00199 00200 BreakableLineComment::BreakableLineComment( 00201 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 00202 bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) 00203 : BreakableSingleLineToken(Token, IndentLevel, StartColumn, 00204 getLineCommentIndentPrefix(Token.TokenText), "", 00205 InPPDirective, Encoding, Style) { 00206 OriginalPrefix = Prefix; 00207 if (Token.TokenText.size() > Prefix.size() && 00208 isAlphanumeric(Token.TokenText[Prefix.size()])) { 00209 if (Prefix == "//") 00210 Prefix = "// "; 00211 else if (Prefix == "///") 00212 Prefix = "/// "; 00213 } 00214 } 00215 00216 BreakableToken::Split 00217 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, 00218 unsigned ColumnLimit) const { 00219 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), 00220 ColumnLimit, Style.TabWidth, Encoding); 00221 } 00222 00223 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 00224 Split Split, 00225 WhitespaceManager &Whitespaces) { 00226 Whitespaces.replaceWhitespaceInToken( 00227 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, 00228 Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); 00229 } 00230 00231 void BreakableLineComment::replaceWhitespace(unsigned LineIndex, 00232 unsigned TailOffset, Split Split, 00233 WhitespaceManager &Whitespaces) { 00234 Whitespaces.replaceWhitespaceInToken( 00235 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", 00236 "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, 00237 /*Spaces=*/1); 00238 } 00239 00240 void 00241 BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, 00242 WhitespaceManager &Whitespaces) { 00243 if (OriginalPrefix != Prefix) { 00244 Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", 00245 /*InPPDirective=*/false, 00246 /*Newlines=*/0, /*IndentLevel=*/0, 00247 /*Spaces=*/1); 00248 } 00249 } 00250 00251 BreakableBlockComment::BreakableBlockComment( 00252 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 00253 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 00254 encoding::Encoding Encoding, const FormatStyle &Style) 00255 : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { 00256 StringRef TokenText(Token.TokenText); 00257 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 00258 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 00259 00260 int IndentDelta = StartColumn - OriginalStartColumn; 00261 LeadingWhitespace.resize(Lines.size()); 00262 StartOfLineColumn.resize(Lines.size()); 00263 StartOfLineColumn[0] = StartColumn + 2; 00264 for (size_t i = 1; i < Lines.size(); ++i) 00265 adjustWhitespace(i, IndentDelta); 00266 00267 Decoration = "* "; 00268 if (Lines.size() == 1 && !FirstInLine) { 00269 // Comments for which FirstInLine is false can start on arbitrary column, 00270 // and available horizontal space can be too small to align consecutive 00271 // lines with the first one. 00272 // FIXME: We could, probably, align them to current indentation level, but 00273 // now we just wrap them without stars. 00274 Decoration = ""; 00275 } 00276 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 00277 // If the last line is empty, the closing "*/" will have a star. 00278 if (i + 1 == e && Lines[i].empty()) 00279 break; 00280 while (!Lines[i].startswith(Decoration)) 00281 Decoration = Decoration.substr(0, Decoration.size() - 1); 00282 } 00283 00284 LastLineNeedsDecoration = true; 00285 IndentAtLineBreak = StartOfLineColumn[0] + 1; 00286 for (size_t i = 1; i < Lines.size(); ++i) { 00287 if (Lines[i].empty()) { 00288 if (i + 1 == Lines.size()) { 00289 // Empty last line means that we already have a star as a part of the 00290 // trailing */. We also need to preserve whitespace, so that */ is 00291 // correctly indented. 00292 LastLineNeedsDecoration = false; 00293 } else if (Decoration.empty()) { 00294 // For all other lines, set the start column to 0 if they're empty, so 00295 // we do not insert trailing whitespace anywhere. 00296 StartOfLineColumn[i] = 0; 00297 } 00298 continue; 00299 } 00300 // The first line already excludes the star. 00301 // For all other lines, adjust the line to exclude the star and 00302 // (optionally) the first whitespace. 00303 StartOfLineColumn[i] += Decoration.size(); 00304 Lines[i] = Lines[i].substr(Decoration.size()); 00305 LeadingWhitespace[i] += Decoration.size(); 00306 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); 00307 } 00308 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 00309 DEBUG({ 00310 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 00311 for (size_t i = 0; i < Lines.size(); ++i) { 00312 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] 00313 << "\n"; 00314 } 00315 }); 00316 } 00317 00318 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 00319 int IndentDelta) { 00320 // When in a preprocessor directive, the trailing backslash in a block comment 00321 // is not needed, but can serve a purpose of uniformity with necessary escaped 00322 // newlines outside the comment. In this case we remove it here before 00323 // trimming the trailing whitespace. The backslash will be re-added later when 00324 // inserting a line break. 00325 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 00326 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 00327 --EndOfPreviousLine; 00328 00329 // Calculate the end of the non-whitespace text in the previous line. 00330 EndOfPreviousLine = 00331 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 00332 if (EndOfPreviousLine == StringRef::npos) 00333 EndOfPreviousLine = 0; 00334 else 00335 ++EndOfPreviousLine; 00336 // Calculate the start of the non-whitespace text in the current line. 00337 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 00338 if (StartOfLine == StringRef::npos) 00339 StartOfLine = Lines[LineIndex].size(); 00340 00341 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 00342 // Adjust Lines to only contain relevant text. 00343 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); 00344 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); 00345 // Adjust LeadingWhitespace to account all whitespace between the lines 00346 // to the current line. 00347 LeadingWhitespace[LineIndex] = 00348 Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); 00349 00350 // Adjust the start column uniformly across all lines. 00351 StartOfLineColumn[LineIndex] = 00352 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 00353 IndentDelta; 00354 } 00355 00356 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } 00357 00358 unsigned BreakableBlockComment::getLineLengthAfterSplit( 00359 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 00360 unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); 00361 return ContentStartColumn + 00362 encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), 00363 ContentStartColumn, Style.TabWidth, 00364 Encoding) + 00365 // The last line gets a "*/" postfix. 00366 (LineIndex + 1 == Lines.size() ? 2 : 0); 00367 } 00368 00369 BreakableToken::Split 00370 BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, 00371 unsigned ColumnLimit) const { 00372 return getCommentSplit(Lines[LineIndex].substr(TailOffset), 00373 getContentStartColumn(LineIndex, TailOffset), 00374 ColumnLimit, Style.TabWidth, Encoding); 00375 } 00376 00377 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 00378 Split Split, 00379 WhitespaceManager &Whitespaces) { 00380 StringRef Text = Lines[LineIndex].substr(TailOffset); 00381 StringRef Prefix = Decoration; 00382 if (LineIndex + 1 == Lines.size() && 00383 Text.size() == Split.first + Split.second) { 00384 // For the last line we need to break before "*/", but not to add "* ". 00385 Prefix = ""; 00386 } 00387 00388 unsigned BreakOffsetInToken = 00389 Text.data() - Tok.TokenText.data() + Split.first; 00390 unsigned CharsToRemove = Split.second; 00391 assert(IndentAtLineBreak >= Decoration.size()); 00392 Whitespaces.replaceWhitespaceInToken( 00393 Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, 00394 IndentLevel, IndentAtLineBreak - Decoration.size()); 00395 } 00396 00397 void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, 00398 unsigned TailOffset, Split Split, 00399 WhitespaceManager &Whitespaces) { 00400 StringRef Text = Lines[LineIndex].substr(TailOffset); 00401 unsigned BreakOffsetInToken = 00402 Text.data() - Tok.TokenText.data() + Split.first; 00403 unsigned CharsToRemove = Split.second; 00404 Whitespaces.replaceWhitespaceInToken( 00405 Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, 00406 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); 00407 } 00408 00409 void 00410 BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, 00411 WhitespaceManager &Whitespaces) { 00412 if (LineIndex == 0) 00413 return; 00414 StringRef Prefix = Decoration; 00415 if (Lines[LineIndex].empty()) { 00416 if (LineIndex + 1 == Lines.size()) { 00417 if (!LastLineNeedsDecoration) { 00418 // If the last line was empty, we don't need a prefix, as the */ will 00419 // line up with the decoration (if it exists). 00420 Prefix = ""; 00421 } 00422 } else if (!Decoration.empty()) { 00423 // For other empty lines, if we do have a decoration, adapt it to not 00424 // contain a trailing whitespace. 00425 Prefix = Prefix.substr(0, 1); 00426 } 00427 } else { 00428 if (StartOfLineColumn[LineIndex] == 1) { 00429 // This line starts immediately after the decorating *. 00430 Prefix = Prefix.substr(0, 1); 00431 } 00432 } 00433 00434 unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - 00435 Tok.TokenText.data() - 00436 LeadingWhitespace[LineIndex]; 00437 Whitespaces.replaceWhitespaceInToken( 00438 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, 00439 InPPDirective, 1, IndentLevel, 00440 StartOfLineColumn[LineIndex] - Prefix.size()); 00441 } 00442 00443 unsigned 00444 BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 00445 unsigned TailOffset) const { 00446 // If we break, we always break at the predefined indent. 00447 if (TailOffset != 0) 00448 return IndentAtLineBreak; 00449 return std::max(0, StartOfLineColumn[LineIndex]); 00450 } 00451 00452 } // namespace format 00453 } // namespace clang