1 1.1 joerg //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 1.1 joerg // 3 1.1 joerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 1.1 joerg // See https://llvm.org/LICENSE.txt for license information. 5 1.1 joerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 1.1 joerg // 7 1.1 joerg //===----------------------------------------------------------------------===// 8 1.1 joerg /// 9 1.1 joerg /// \file 10 1.1 joerg /// Contains implementation of BreakableToken class and classes derived 11 1.1 joerg /// from it. 12 1.1 joerg /// 13 1.1 joerg //===----------------------------------------------------------------------===// 14 1.1 joerg 15 1.1 joerg #include "BreakableToken.h" 16 1.1 joerg #include "ContinuationIndenter.h" 17 1.1 joerg #include "clang/Basic/CharInfo.h" 18 1.1 joerg #include "clang/Format/Format.h" 19 1.1 joerg #include "llvm/ADT/STLExtras.h" 20 1.1 joerg #include "llvm/Support/Debug.h" 21 1.1 joerg #include <algorithm> 22 1.1 joerg 23 1.1 joerg #define DEBUG_TYPE "format-token-breaker" 24 1.1 joerg 25 1.1 joerg namespace clang { 26 1.1 joerg namespace format { 27 1.1 joerg 28 1.1.1.2 joerg static constexpr StringRef Blanks = " \t\v\f\r"; 29 1.1 joerg static bool IsBlank(char C) { 30 1.1 joerg switch (C) { 31 1.1 joerg case ' ': 32 1.1 joerg case '\t': 33 1.1 joerg case '\v': 34 1.1 joerg case '\f': 35 1.1 joerg case '\r': 36 1.1 joerg return true; 37 1.1 joerg default: 38 1.1 joerg return false; 39 1.1 joerg } 40 1.1 joerg } 41 1.1 joerg 42 1.1 joerg static StringRef getLineCommentIndentPrefix(StringRef Comment, 43 1.1 joerg const FormatStyle &Style) { 44 1.1.1.2 joerg static constexpr StringRef KnownCStylePrefixes[] = {"///<", "//!<", "///", 45 1.1.1.2 joerg "//!", "//:", "//"}; 46 1.1.1.2 joerg static constexpr StringRef KnownTextProtoPrefixes[] = {"####", "###", "##", 47 1.1.1.2 joerg "//", "#"}; 48 1.1.1.2 joerg ArrayRef<StringRef> KnownPrefixes(KnownCStylePrefixes); 49 1.1 joerg if (Style.Language == FormatStyle::LK_TextProto) 50 1.1 joerg KnownPrefixes = KnownTextProtoPrefixes; 51 1.1 joerg 52 1.1.1.2 joerg assert(std::is_sorted(KnownPrefixes.begin(), KnownPrefixes.end(), 53 1.1.1.2 joerg [](StringRef Lhs, StringRef Rhs) noexcept { 54 1.1.1.2 joerg return Lhs.size() > Rhs.size(); 55 1.1.1.2 joerg })); 56 1.1.1.2 joerg 57 1.1 joerg for (StringRef KnownPrefix : KnownPrefixes) { 58 1.1 joerg if (Comment.startswith(KnownPrefix)) { 59 1.1.1.2 joerg const auto PrefixLength = 60 1.1.1.2 joerg Comment.find_first_not_of(' ', KnownPrefix.size()); 61 1.1.1.2 joerg return Comment.substr(0, PrefixLength); 62 1.1 joerg } 63 1.1 joerg } 64 1.1.1.2 joerg return {}; 65 1.1 joerg } 66 1.1 joerg 67 1.1 joerg static BreakableToken::Split 68 1.1 joerg getCommentSplit(StringRef Text, unsigned ContentStartColumn, 69 1.1 joerg unsigned ColumnLimit, unsigned TabWidth, 70 1.1 joerg encoding::Encoding Encoding, const FormatStyle &Style, 71 1.1 joerg bool DecorationEndsWithStar = false) { 72 1.1 joerg LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text 73 1.1 joerg << "\", Column limit: " << ColumnLimit 74 1.1 joerg << ", Content start: " << ContentStartColumn << "\n"); 75 1.1 joerg if (ColumnLimit <= ContentStartColumn + 1) 76 1.1 joerg return BreakableToken::Split(StringRef::npos, 0); 77 1.1 joerg 78 1.1 joerg unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 79 1.1 joerg unsigned MaxSplitBytes = 0; 80 1.1 joerg 81 1.1 joerg for (unsigned NumChars = 0; 82 1.1 joerg NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 83 1.1 joerg unsigned BytesInChar = 84 1.1 joerg encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 85 1.1 joerg NumChars += 86 1.1 joerg encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), 87 1.1 joerg ContentStartColumn, TabWidth, Encoding); 88 1.1 joerg MaxSplitBytes += BytesInChar; 89 1.1 joerg } 90 1.1 joerg 91 1.1.1.2 joerg // In JavaScript, some @tags can be followed by {, and machinery that parses 92 1.1.1.2 joerg // these comments will fail to understand the comment if followed by a line 93 1.1.1.2 joerg // break. So avoid ever breaking before a {. 94 1.1.1.2 joerg if (Style.Language == FormatStyle::LK_JavaScript) { 95 1.1.1.2 joerg StringRef::size_type SpaceOffset = 96 1.1.1.2 joerg Text.find_first_of(Blanks, MaxSplitBytes); 97 1.1.1.2 joerg if (SpaceOffset != StringRef::npos && SpaceOffset + 1 < Text.size() && 98 1.1.1.2 joerg Text[SpaceOffset + 1] == '{') { 99 1.1.1.2 joerg MaxSplitBytes = SpaceOffset + 1; 100 1.1.1.2 joerg } 101 1.1.1.2 joerg } 102 1.1.1.2 joerg 103 1.1 joerg StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 104 1.1 joerg 105 1.1.1.2 joerg static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); 106 1.1.1.2 joerg // Some spaces are unacceptable to break on, rewind past them. 107 1.1 joerg while (SpaceOffset != StringRef::npos) { 108 1.1.1.2 joerg // If a line-comment ends with `\`, the next line continues the comment, 109 1.1.1.2 joerg // whether or not it starts with `//`. This is confusing and triggers 110 1.1.1.2 joerg // -Wcomment. 111 1.1.1.2 joerg // Avoid introducing multiline comments by not allowing a break right 112 1.1.1.2 joerg // after '\'. 113 1.1.1.2 joerg if (Style.isCpp()) { 114 1.1.1.2 joerg StringRef::size_type LastNonBlank = 115 1.1.1.2 joerg Text.find_last_not_of(Blanks, SpaceOffset); 116 1.1.1.2 joerg if (LastNonBlank != StringRef::npos && Text[LastNonBlank] == '\\') { 117 1.1.1.2 joerg SpaceOffset = Text.find_last_of(Blanks, LastNonBlank); 118 1.1.1.2 joerg continue; 119 1.1.1.2 joerg } 120 1.1.1.2 joerg } 121 1.1.1.2 joerg 122 1.1 joerg // Do not split before a number followed by a dot: this would be interpreted 123 1.1 joerg // as a numbered list, which would prevent re-flowing in subsequent passes. 124 1.1.1.2 joerg if (kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) { 125 1.1 joerg SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); 126 1.1.1.2 joerg continue; 127 1.1.1.2 joerg } 128 1.1.1.2 joerg 129 1.1.1.2 joerg // Avoid ever breaking before a @tag or a { in JavaScript. 130 1.1.1.2 joerg if (Style.Language == FormatStyle::LK_JavaScript && 131 1.1.1.2 joerg SpaceOffset + 1 < Text.size() && 132 1.1.1.2 joerg (Text[SpaceOffset + 1] == '{' || Text[SpaceOffset + 1] == '@')) { 133 1.1 joerg SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); 134 1.1.1.2 joerg continue; 135 1.1.1.2 joerg } 136 1.1.1.2 joerg 137 1.1.1.2 joerg break; 138 1.1 joerg } 139 1.1 joerg 140 1.1 joerg if (SpaceOffset == StringRef::npos || 141 1.1 joerg // Don't break at leading whitespace. 142 1.1 joerg Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 143 1.1 joerg // Make sure that we don't break at leading whitespace that 144 1.1 joerg // reaches past MaxSplit. 145 1.1 joerg StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 146 1.1 joerg if (FirstNonWhitespace == StringRef::npos) 147 1.1 joerg // If the comment is only whitespace, we cannot split. 148 1.1 joerg return BreakableToken::Split(StringRef::npos, 0); 149 1.1 joerg SpaceOffset = Text.find_first_of( 150 1.1 joerg Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 151 1.1 joerg } 152 1.1 joerg if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 153 1.1 joerg // adaptStartOfLine will break after lines starting with /** if the comment 154 1.1 joerg // is broken anywhere. Avoid emitting this break twice here. 155 1.1 joerg // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will 156 1.1 joerg // insert a break after /**, so this code must not insert the same break. 157 1.1 joerg if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*') 158 1.1 joerg return BreakableToken::Split(StringRef::npos, 0); 159 1.1 joerg StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 160 1.1 joerg StringRef AfterCut = Text.substr(SpaceOffset); 161 1.1 joerg // Don't trim the leading blanks if it would create a */ after the break. 162 1.1 joerg if (!DecorationEndsWithStar || AfterCut.size() <= 1 || AfterCut[1] != '/') 163 1.1 joerg AfterCut = AfterCut.ltrim(Blanks); 164 1.1 joerg return BreakableToken::Split(BeforeCut.size(), 165 1.1 joerg AfterCut.begin() - BeforeCut.end()); 166 1.1 joerg } 167 1.1 joerg return BreakableToken::Split(StringRef::npos, 0); 168 1.1 joerg } 169 1.1 joerg 170 1.1 joerg static BreakableToken::Split 171 1.1 joerg getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, 172 1.1 joerg unsigned TabWidth, encoding::Encoding Encoding) { 173 1.1 joerg // FIXME: Reduce unit test case. 174 1.1 joerg if (Text.empty()) 175 1.1 joerg return BreakableToken::Split(StringRef::npos, 0); 176 1.1 joerg if (ColumnLimit <= UsedColumns) 177 1.1 joerg return BreakableToken::Split(StringRef::npos, 0); 178 1.1 joerg unsigned MaxSplit = ColumnLimit - UsedColumns; 179 1.1 joerg StringRef::size_type SpaceOffset = 0; 180 1.1 joerg StringRef::size_type SlashOffset = 0; 181 1.1 joerg StringRef::size_type WordStartOffset = 0; 182 1.1 joerg StringRef::size_type SplitPoint = 0; 183 1.1 joerg for (unsigned Chars = 0;;) { 184 1.1 joerg unsigned Advance; 185 1.1 joerg if (Text[0] == '\\') { 186 1.1 joerg Advance = encoding::getEscapeSequenceLength(Text); 187 1.1 joerg Chars += Advance; 188 1.1 joerg } else { 189 1.1 joerg Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 190 1.1 joerg Chars += encoding::columnWidthWithTabs( 191 1.1 joerg Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 192 1.1 joerg } 193 1.1 joerg 194 1.1 joerg if (Chars > MaxSplit || Text.size() <= Advance) 195 1.1 joerg break; 196 1.1 joerg 197 1.1 joerg if (IsBlank(Text[0])) 198 1.1 joerg SpaceOffset = SplitPoint; 199 1.1 joerg if (Text[0] == '/') 200 1.1 joerg SlashOffset = SplitPoint; 201 1.1 joerg if (Advance == 1 && !isAlphanumeric(Text[0])) 202 1.1 joerg WordStartOffset = SplitPoint; 203 1.1 joerg 204 1.1 joerg SplitPoint += Advance; 205 1.1 joerg Text = Text.substr(Advance); 206 1.1 joerg } 207 1.1 joerg 208 1.1 joerg if (SpaceOffset != 0) 209 1.1 joerg return BreakableToken::Split(SpaceOffset + 1, 0); 210 1.1 joerg if (SlashOffset != 0) 211 1.1 joerg return BreakableToken::Split(SlashOffset + 1, 0); 212 1.1 joerg if (WordStartOffset != 0) 213 1.1 joerg return BreakableToken::Split(WordStartOffset + 1, 0); 214 1.1 joerg if (SplitPoint != 0) 215 1.1 joerg return BreakableToken::Split(SplitPoint, 0); 216 1.1 joerg return BreakableToken::Split(StringRef::npos, 0); 217 1.1 joerg } 218 1.1 joerg 219 1.1 joerg bool switchesFormatting(const FormatToken &Token) { 220 1.1 joerg assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) && 221 1.1 joerg "formatting regions are switched by comment tokens"); 222 1.1 joerg StringRef Content = Token.TokenText.substr(2).ltrim(); 223 1.1 joerg return Content.startswith("clang-format on") || 224 1.1 joerg Content.startswith("clang-format off"); 225 1.1 joerg } 226 1.1 joerg 227 1.1 joerg unsigned 228 1.1 joerg BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns, 229 1.1 joerg Split Split) const { 230 1.1 joerg // Example: consider the content 231 1.1 joerg // lala lala 232 1.1 joerg // - RemainingTokenColumns is the original number of columns, 10; 233 1.1 joerg // - Split is (4, 2), denoting the two spaces between the two words; 234 1.1 joerg // 235 1.1 joerg // We compute the number of columns when the split is compressed into a single 236 1.1 joerg // space, like: 237 1.1 joerg // lala lala 238 1.1 joerg // 239 1.1 joerg // FIXME: Correctly measure the length of whitespace in Split.second so it 240 1.1 joerg // works with tabs. 241 1.1 joerg return RemainingTokenColumns + 1 - Split.second; 242 1.1 joerg } 243 1.1 joerg 244 1.1 joerg unsigned BreakableStringLiteral::getLineCount() const { return 1; } 245 1.1 joerg 246 1.1 joerg unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex, 247 1.1 joerg unsigned Offset, 248 1.1 joerg StringRef::size_type Length, 249 1.1 joerg unsigned StartColumn) const { 250 1.1 joerg llvm_unreachable("Getting the length of a part of the string literal " 251 1.1 joerg "indicates that the code tries to reflow it."); 252 1.1 joerg } 253 1.1 joerg 254 1.1 joerg unsigned 255 1.1 joerg BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset, 256 1.1 joerg unsigned StartColumn) const { 257 1.1 joerg return UnbreakableTailLength + Postfix.size() + 258 1.1 joerg encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos), 259 1.1 joerg StartColumn, Style.TabWidth, Encoding); 260 1.1 joerg } 261 1.1 joerg 262 1.1 joerg unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex, 263 1.1 joerg bool Break) const { 264 1.1 joerg return StartColumn + Prefix.size(); 265 1.1 joerg } 266 1.1 joerg 267 1.1 joerg BreakableStringLiteral::BreakableStringLiteral( 268 1.1 joerg const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, 269 1.1 joerg StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective, 270 1.1 joerg encoding::Encoding Encoding, const FormatStyle &Style) 271 1.1 joerg : BreakableToken(Tok, InPPDirective, Encoding, Style), 272 1.1 joerg StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix), 273 1.1 joerg UnbreakableTailLength(UnbreakableTailLength) { 274 1.1 joerg assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); 275 1.1 joerg Line = Tok.TokenText.substr( 276 1.1 joerg Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 277 1.1 joerg } 278 1.1 joerg 279 1.1 joerg BreakableToken::Split BreakableStringLiteral::getSplit( 280 1.1 joerg unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, 281 1.1.1.2 joerg unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const { 282 1.1 joerg return getStringSplit(Line.substr(TailOffset), ContentStartColumn, 283 1.1 joerg ColumnLimit - Postfix.size(), Style.TabWidth, Encoding); 284 1.1 joerg } 285 1.1 joerg 286 1.1 joerg void BreakableStringLiteral::insertBreak(unsigned LineIndex, 287 1.1 joerg unsigned TailOffset, Split Split, 288 1.1 joerg unsigned ContentIndent, 289 1.1 joerg WhitespaceManager &Whitespaces) const { 290 1.1 joerg Whitespaces.replaceWhitespaceInToken( 291 1.1 joerg Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 292 1.1 joerg Prefix, InPPDirective, 1, StartColumn); 293 1.1 joerg } 294 1.1 joerg 295 1.1 joerg BreakableComment::BreakableComment(const FormatToken &Token, 296 1.1 joerg unsigned StartColumn, bool InPPDirective, 297 1.1 joerg encoding::Encoding Encoding, 298 1.1 joerg const FormatStyle &Style) 299 1.1 joerg : BreakableToken(Token, InPPDirective, Encoding, Style), 300 1.1 joerg StartColumn(StartColumn) {} 301 1.1 joerg 302 1.1 joerg unsigned BreakableComment::getLineCount() const { return Lines.size(); } 303 1.1 joerg 304 1.1 joerg BreakableToken::Split 305 1.1 joerg BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, 306 1.1 joerg unsigned ColumnLimit, unsigned ContentStartColumn, 307 1.1.1.2 joerg const llvm::Regex &CommentPragmasRegex) const { 308 1.1 joerg // Don't break lines matching the comment pragmas regex. 309 1.1 joerg if (CommentPragmasRegex.match(Content[LineIndex])) 310 1.1 joerg return Split(StringRef::npos, 0); 311 1.1 joerg return getCommentSplit(Content[LineIndex].substr(TailOffset), 312 1.1 joerg ContentStartColumn, ColumnLimit, Style.TabWidth, 313 1.1 joerg Encoding, Style); 314 1.1 joerg } 315 1.1 joerg 316 1.1 joerg void BreakableComment::compressWhitespace( 317 1.1 joerg unsigned LineIndex, unsigned TailOffset, Split Split, 318 1.1 joerg WhitespaceManager &Whitespaces) const { 319 1.1 joerg StringRef Text = Content[LineIndex].substr(TailOffset); 320 1.1 joerg // Text is relative to the content line, but Whitespaces operates relative to 321 1.1 joerg // the start of the corresponding token, so compute the start of the Split 322 1.1 joerg // that needs to be compressed into a single space relative to the start of 323 1.1 joerg // its token. 324 1.1 joerg unsigned BreakOffsetInToken = 325 1.1 joerg Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 326 1.1 joerg unsigned CharsToRemove = Split.second; 327 1.1 joerg Whitespaces.replaceWhitespaceInToken( 328 1.1 joerg tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "", 329 1.1 joerg /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); 330 1.1 joerg } 331 1.1 joerg 332 1.1 joerg const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { 333 1.1 joerg return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; 334 1.1 joerg } 335 1.1 joerg 336 1.1 joerg static bool mayReflowContent(StringRef Content) { 337 1.1 joerg Content = Content.trim(Blanks); 338 1.1 joerg // Lines starting with '@' commonly have special meaning. 339 1.1 joerg // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. 340 1.1 joerg bool hasSpecialMeaningPrefix = false; 341 1.1 joerg for (StringRef Prefix : 342 1.1 joerg {"@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) { 343 1.1 joerg if (Content.startswith(Prefix)) { 344 1.1 joerg hasSpecialMeaningPrefix = true; 345 1.1 joerg break; 346 1.1 joerg } 347 1.1 joerg } 348 1.1 joerg 349 1.1 joerg // Numbered lists may also start with a number followed by '.' 350 1.1 joerg // To avoid issues if a line starts with a number which is actually the end 351 1.1 joerg // of a previous line, we only consider numbers with up to 2 digits. 352 1.1.1.2 joerg static const auto kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. "); 353 1.1 joerg hasSpecialMeaningPrefix = 354 1.1.1.2 joerg hasSpecialMeaningPrefix || kNumberedListRegexp.match(Content); 355 1.1 joerg 356 1.1 joerg // Simple heuristic for what to reflow: content should contain at least two 357 1.1 joerg // characters and either the first or second character must be 358 1.1 joerg // non-punctuation. 359 1.1 joerg return Content.size() >= 2 && !hasSpecialMeaningPrefix && 360 1.1 joerg !Content.endswith("\\") && 361 1.1 joerg // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is 362 1.1 joerg // true, then the first code point must be 1 byte long. 363 1.1 joerg (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); 364 1.1 joerg } 365 1.1 joerg 366 1.1 joerg BreakableBlockComment::BreakableBlockComment( 367 1.1 joerg const FormatToken &Token, unsigned StartColumn, 368 1.1 joerg unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 369 1.1 joerg encoding::Encoding Encoding, const FormatStyle &Style, bool UseCRLF) 370 1.1 joerg : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style), 371 1.1 joerg DelimitersOnNewline(false), 372 1.1 joerg UnbreakableTailLength(Token.UnbreakableTailLength) { 373 1.1 joerg assert(Tok.is(TT_BlockComment) && 374 1.1 joerg "block comment section must start with a block comment"); 375 1.1 joerg 376 1.1 joerg StringRef TokenText(Tok.TokenText); 377 1.1 joerg assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 378 1.1 joerg TokenText.substr(2, TokenText.size() - 4) 379 1.1 joerg .split(Lines, UseCRLF ? "\r\n" : "\n"); 380 1.1 joerg 381 1.1 joerg int IndentDelta = StartColumn - OriginalStartColumn; 382 1.1 joerg Content.resize(Lines.size()); 383 1.1 joerg Content[0] = Lines[0]; 384 1.1 joerg ContentColumn.resize(Lines.size()); 385 1.1 joerg // Account for the initial '/*'. 386 1.1 joerg ContentColumn[0] = StartColumn + 2; 387 1.1 joerg Tokens.resize(Lines.size()); 388 1.1 joerg for (size_t i = 1; i < Lines.size(); ++i) 389 1.1 joerg adjustWhitespace(i, IndentDelta); 390 1.1 joerg 391 1.1 joerg // Align decorations with the column of the star on the first line, 392 1.1 joerg // that is one column after the start "/*". 393 1.1 joerg DecorationColumn = StartColumn + 1; 394 1.1 joerg 395 1.1 joerg // Account for comment decoration patterns like this: 396 1.1 joerg // 397 1.1 joerg // /* 398 1.1 joerg // ** blah blah blah 399 1.1 joerg // */ 400 1.1 joerg if (Lines.size() >= 2 && Content[1].startswith("**") && 401 1.1 joerg static_cast<unsigned>(ContentColumn[1]) == StartColumn) { 402 1.1 joerg DecorationColumn = StartColumn; 403 1.1 joerg } 404 1.1 joerg 405 1.1 joerg Decoration = "* "; 406 1.1 joerg if (Lines.size() == 1 && !FirstInLine) { 407 1.1 joerg // Comments for which FirstInLine is false can start on arbitrary column, 408 1.1 joerg // and available horizontal space can be too small to align consecutive 409 1.1 joerg // lines with the first one. 410 1.1 joerg // FIXME: We could, probably, align them to current indentation level, but 411 1.1 joerg // now we just wrap them without stars. 412 1.1 joerg Decoration = ""; 413 1.1 joerg } 414 1.1 joerg for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 415 1.1 joerg // If the last line is empty, the closing "*/" will have a star. 416 1.1 joerg if (i + 1 == e && Content[i].empty()) 417 1.1 joerg break; 418 1.1 joerg if (!Content[i].empty() && i + 1 != e && Decoration.startswith(Content[i])) 419 1.1 joerg continue; 420 1.1 joerg while (!Content[i].startswith(Decoration)) 421 1.1 joerg Decoration = Decoration.substr(0, Decoration.size() - 1); 422 1.1 joerg } 423 1.1 joerg 424 1.1 joerg LastLineNeedsDecoration = true; 425 1.1 joerg IndentAtLineBreak = ContentColumn[0] + 1; 426 1.1 joerg for (size_t i = 1, e = Lines.size(); i < e; ++i) { 427 1.1 joerg if (Content[i].empty()) { 428 1.1 joerg if (i + 1 == e) { 429 1.1 joerg // Empty last line means that we already have a star as a part of the 430 1.1 joerg // trailing */. We also need to preserve whitespace, so that */ is 431 1.1 joerg // correctly indented. 432 1.1 joerg LastLineNeedsDecoration = false; 433 1.1 joerg // Align the star in the last '*/' with the stars on the previous lines. 434 1.1 joerg if (e >= 2 && !Decoration.empty()) { 435 1.1 joerg ContentColumn[i] = DecorationColumn; 436 1.1 joerg } 437 1.1 joerg } else if (Decoration.empty()) { 438 1.1 joerg // For all other lines, set the start column to 0 if they're empty, so 439 1.1 joerg // we do not insert trailing whitespace anywhere. 440 1.1 joerg ContentColumn[i] = 0; 441 1.1 joerg } 442 1.1 joerg continue; 443 1.1 joerg } 444 1.1 joerg 445 1.1 joerg // The first line already excludes the star. 446 1.1 joerg // The last line excludes the star if LastLineNeedsDecoration is false. 447 1.1 joerg // For all other lines, adjust the line to exclude the star and 448 1.1 joerg // (optionally) the first whitespace. 449 1.1 joerg unsigned DecorationSize = Decoration.startswith(Content[i]) 450 1.1 joerg ? Content[i].size() 451 1.1 joerg : Decoration.size(); 452 1.1 joerg if (DecorationSize) { 453 1.1 joerg ContentColumn[i] = DecorationColumn + DecorationSize; 454 1.1 joerg } 455 1.1 joerg Content[i] = Content[i].substr(DecorationSize); 456 1.1 joerg if (!Decoration.startswith(Content[i])) 457 1.1 joerg IndentAtLineBreak = 458 1.1 joerg std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i])); 459 1.1 joerg } 460 1.1 joerg IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 461 1.1 joerg 462 1.1 joerg // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case. 463 1.1 joerg if (Style.Language == FormatStyle::LK_JavaScript || 464 1.1 joerg Style.Language == FormatStyle::LK_Java) { 465 1.1 joerg if ((Lines[0] == "*" || Lines[0].startswith("* ")) && Lines.size() > 1) { 466 1.1 joerg // This is a multiline jsdoc comment. 467 1.1 joerg DelimitersOnNewline = true; 468 1.1 joerg } else if (Lines[0].startswith("* ") && Lines.size() == 1) { 469 1.1 joerg // Detect a long single-line comment, like: 470 1.1 joerg // /** long long long */ 471 1.1 joerg // Below, '2' is the width of '*/'. 472 1.1 joerg unsigned EndColumn = 473 1.1 joerg ContentColumn[0] + 474 1.1 joerg encoding::columnWidthWithTabs(Lines[0], ContentColumn[0], 475 1.1 joerg Style.TabWidth, Encoding) + 476 1.1 joerg 2; 477 1.1 joerg DelimitersOnNewline = EndColumn > Style.ColumnLimit; 478 1.1 joerg } 479 1.1 joerg } 480 1.1 joerg 481 1.1 joerg LLVM_DEBUG({ 482 1.1 joerg llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 483 1.1 joerg llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n"; 484 1.1 joerg for (size_t i = 0; i < Lines.size(); ++i) { 485 1.1 joerg llvm::dbgs() << i << " |" << Content[i] << "| " 486 1.1 joerg << "CC=" << ContentColumn[i] << "| " 487 1.1 joerg << "IN=" << (Content[i].data() - Lines[i].data()) << "\n"; 488 1.1 joerg } 489 1.1 joerg }); 490 1.1 joerg } 491 1.1 joerg 492 1.1 joerg BreakableToken::Split BreakableBlockComment::getSplit( 493 1.1 joerg unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, 494 1.1.1.2 joerg unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const { 495 1.1 joerg // Don't break lines matching the comment pragmas regex. 496 1.1 joerg if (CommentPragmasRegex.match(Content[LineIndex])) 497 1.1 joerg return Split(StringRef::npos, 0); 498 1.1 joerg return getCommentSplit(Content[LineIndex].substr(TailOffset), 499 1.1 joerg ContentStartColumn, ColumnLimit, Style.TabWidth, 500 1.1 joerg Encoding, Style, Decoration.endswith("*")); 501 1.1 joerg } 502 1.1 joerg 503 1.1 joerg void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 504 1.1 joerg int IndentDelta) { 505 1.1 joerg // When in a preprocessor directive, the trailing backslash in a block comment 506 1.1 joerg // is not needed, but can serve a purpose of uniformity with necessary escaped 507 1.1 joerg // newlines outside the comment. In this case we remove it here before 508 1.1 joerg // trimming the trailing whitespace. The backslash will be re-added later when 509 1.1 joerg // inserting a line break. 510 1.1 joerg size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 511 1.1 joerg if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 512 1.1 joerg --EndOfPreviousLine; 513 1.1 joerg 514 1.1 joerg // Calculate the end of the non-whitespace text in the previous line. 515 1.1 joerg EndOfPreviousLine = 516 1.1 joerg Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 517 1.1 joerg if (EndOfPreviousLine == StringRef::npos) 518 1.1 joerg EndOfPreviousLine = 0; 519 1.1 joerg else 520 1.1 joerg ++EndOfPreviousLine; 521 1.1 joerg // Calculate the start of the non-whitespace text in the current line. 522 1.1 joerg size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 523 1.1 joerg if (StartOfLine == StringRef::npos) 524 1.1 joerg StartOfLine = Lines[LineIndex].size(); 525 1.1 joerg 526 1.1 joerg StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 527 1.1 joerg // Adjust Lines to only contain relevant text. 528 1.1 joerg size_t PreviousContentOffset = 529 1.1 joerg Content[LineIndex - 1].data() - Lines[LineIndex - 1].data(); 530 1.1 joerg Content[LineIndex - 1] = Lines[LineIndex - 1].substr( 531 1.1 joerg PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset); 532 1.1 joerg Content[LineIndex] = Lines[LineIndex].substr(StartOfLine); 533 1.1 joerg 534 1.1 joerg // Adjust the start column uniformly across all lines. 535 1.1 joerg ContentColumn[LineIndex] = 536 1.1 joerg encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 537 1.1 joerg IndentDelta; 538 1.1 joerg } 539 1.1 joerg 540 1.1 joerg unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex, 541 1.1 joerg unsigned Offset, 542 1.1 joerg StringRef::size_type Length, 543 1.1 joerg unsigned StartColumn) const { 544 1.1 joerg unsigned LineLength = 545 1.1 joerg encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length), 546 1.1 joerg StartColumn, Style.TabWidth, Encoding); 547 1.1 joerg // FIXME: This should go into getRemainingLength instead, but we currently 548 1.1 joerg // break tests when putting it there. Investigate how to fix those tests. 549 1.1 joerg // The last line gets a "*/" postfix. 550 1.1 joerg if (LineIndex + 1 == Lines.size()) { 551 1.1 joerg LineLength += 2; 552 1.1 joerg // We never need a decoration when breaking just the trailing "*/" postfix. 553 1.1 joerg // Note that checking that Length == 0 is not enough, since Length could 554 1.1 joerg // also be StringRef::npos. 555 1.1 joerg if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) { 556 1.1 joerg LineLength -= Decoration.size(); 557 1.1 joerg } 558 1.1 joerg } 559 1.1 joerg return LineLength; 560 1.1 joerg } 561 1.1 joerg 562 1.1 joerg unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex, 563 1.1 joerg unsigned Offset, 564 1.1 joerg unsigned StartColumn) const { 565 1.1 joerg return UnbreakableTailLength + 566 1.1 joerg getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn); 567 1.1 joerg } 568 1.1 joerg 569 1.1 joerg unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 570 1.1 joerg bool Break) const { 571 1.1 joerg if (Break) 572 1.1 joerg return IndentAtLineBreak; 573 1.1 joerg return std::max(0, ContentColumn[LineIndex]); 574 1.1 joerg } 575 1.1 joerg 576 1.1 joerg const llvm::StringSet<> 577 1.1 joerg BreakableBlockComment::ContentIndentingJavadocAnnotations = { 578 1.1 joerg "@param", "@return", "@returns", "@throws", "@type", "@template", 579 1.1 joerg "@see", "@deprecated", "@define", "@exports", "@mods", "@private", 580 1.1 joerg }; 581 1.1 joerg 582 1.1 joerg unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const { 583 1.1 joerg if (Style.Language != FormatStyle::LK_Java && 584 1.1 joerg Style.Language != FormatStyle::LK_JavaScript) 585 1.1 joerg return 0; 586 1.1 joerg // The content at LineIndex 0 of a comment like: 587 1.1 joerg // /** line 0 */ 588 1.1 joerg // is "* line 0", so we need to skip over the decoration in that case. 589 1.1 joerg StringRef ContentWithNoDecoration = Content[LineIndex]; 590 1.1 joerg if (LineIndex == 0 && ContentWithNoDecoration.startswith("*")) { 591 1.1 joerg ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks); 592 1.1 joerg } 593 1.1 joerg StringRef FirstWord = ContentWithNoDecoration.substr( 594 1.1 joerg 0, ContentWithNoDecoration.find_first_of(Blanks)); 595 1.1 joerg if (ContentIndentingJavadocAnnotations.find(FirstWord) != 596 1.1 joerg ContentIndentingJavadocAnnotations.end()) 597 1.1 joerg return Style.ContinuationIndentWidth; 598 1.1 joerg return 0; 599 1.1 joerg } 600 1.1 joerg 601 1.1 joerg void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 602 1.1 joerg Split Split, unsigned ContentIndent, 603 1.1 joerg WhitespaceManager &Whitespaces) const { 604 1.1 joerg StringRef Text = Content[LineIndex].substr(TailOffset); 605 1.1 joerg StringRef Prefix = Decoration; 606 1.1 joerg // We need this to account for the case when we have a decoration "* " for all 607 1.1 joerg // the lines except for the last one, where the star in "*/" acts as a 608 1.1 joerg // decoration. 609 1.1 joerg unsigned LocalIndentAtLineBreak = IndentAtLineBreak; 610 1.1 joerg if (LineIndex + 1 == Lines.size() && 611 1.1 joerg Text.size() == Split.first + Split.second) { 612 1.1 joerg // For the last line we need to break before "*/", but not to add "* ". 613 1.1 joerg Prefix = ""; 614 1.1 joerg if (LocalIndentAtLineBreak >= 2) 615 1.1 joerg LocalIndentAtLineBreak -= 2; 616 1.1 joerg } 617 1.1 joerg // The split offset is from the beginning of the line. Convert it to an offset 618 1.1 joerg // from the beginning of the token text. 619 1.1 joerg unsigned BreakOffsetInToken = 620 1.1 joerg Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 621 1.1 joerg unsigned CharsToRemove = Split.second; 622 1.1 joerg assert(LocalIndentAtLineBreak >= Prefix.size()); 623 1.1.1.2 joerg std::string PrefixWithTrailingIndent = std::string(Prefix); 624 1.1.1.2 joerg PrefixWithTrailingIndent.append(ContentIndent, ' '); 625 1.1 joerg Whitespaces.replaceWhitespaceInToken( 626 1.1 joerg tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", 627 1.1 joerg PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1, 628 1.1 joerg /*Spaces=*/LocalIndentAtLineBreak + ContentIndent - 629 1.1 joerg PrefixWithTrailingIndent.size()); 630 1.1 joerg } 631 1.1 joerg 632 1.1.1.2 joerg BreakableToken::Split BreakableBlockComment::getReflowSplit( 633 1.1.1.2 joerg unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 634 1.1 joerg if (!mayReflow(LineIndex, CommentPragmasRegex)) 635 1.1 joerg return Split(StringRef::npos, 0); 636 1.1 joerg 637 1.1 joerg // If we're reflowing into a line with content indent, only reflow the next 638 1.1 joerg // line if its starting whitespace matches the content indent. 639 1.1 joerg size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); 640 1.1 joerg if (LineIndex) { 641 1.1 joerg unsigned PreviousContentIndent = getContentIndent(LineIndex - 1); 642 1.1 joerg if (PreviousContentIndent && Trimmed != StringRef::npos && 643 1.1 joerg Trimmed != PreviousContentIndent) 644 1.1 joerg return Split(StringRef::npos, 0); 645 1.1 joerg } 646 1.1 joerg 647 1.1 joerg return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); 648 1.1 joerg } 649 1.1 joerg 650 1.1 joerg bool BreakableBlockComment::introducesBreakBeforeToken() const { 651 1.1 joerg // A break is introduced when we want delimiters on newline. 652 1.1 joerg return DelimitersOnNewline && 653 1.1 joerg Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos; 654 1.1 joerg } 655 1.1 joerg 656 1.1 joerg void BreakableBlockComment::reflow(unsigned LineIndex, 657 1.1 joerg WhitespaceManager &Whitespaces) const { 658 1.1 joerg StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); 659 1.1 joerg // Here we need to reflow. 660 1.1 joerg assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && 661 1.1 joerg "Reflowing whitespace within a token"); 662 1.1 joerg // This is the offset of the end of the last line relative to the start of 663 1.1 joerg // the token text in the token. 664 1.1 joerg unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + 665 1.1 joerg Content[LineIndex - 1].size() - 666 1.1 joerg tokenAt(LineIndex).TokenText.data(); 667 1.1 joerg unsigned WhitespaceLength = TrimmedContent.data() - 668 1.1 joerg tokenAt(LineIndex).TokenText.data() - 669 1.1 joerg WhitespaceOffsetInToken; 670 1.1 joerg Whitespaces.replaceWhitespaceInToken( 671 1.1 joerg tokenAt(LineIndex), WhitespaceOffsetInToken, 672 1.1 joerg /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", 673 1.1 joerg /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, 674 1.1 joerg /*Spaces=*/0); 675 1.1 joerg } 676 1.1 joerg 677 1.1 joerg void BreakableBlockComment::adaptStartOfLine( 678 1.1 joerg unsigned LineIndex, WhitespaceManager &Whitespaces) const { 679 1.1 joerg if (LineIndex == 0) { 680 1.1 joerg if (DelimitersOnNewline) { 681 1.1 joerg // Since we're breaking at index 1 below, the break position and the 682 1.1 joerg // break length are the same. 683 1.1 joerg // Note: this works because getCommentSplit is careful never to split at 684 1.1 joerg // the beginning of a line. 685 1.1 joerg size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks); 686 1.1 joerg if (BreakLength != StringRef::npos) 687 1.1 joerg insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0, 688 1.1 joerg Whitespaces); 689 1.1 joerg } 690 1.1 joerg return; 691 1.1 joerg } 692 1.1 joerg // Here no reflow with the previous line will happen. 693 1.1 joerg // Fix the decoration of the line at LineIndex. 694 1.1 joerg StringRef Prefix = Decoration; 695 1.1 joerg if (Content[LineIndex].empty()) { 696 1.1 joerg if (LineIndex + 1 == Lines.size()) { 697 1.1 joerg if (!LastLineNeedsDecoration) { 698 1.1 joerg // If the last line was empty, we don't need a prefix, as the */ will 699 1.1 joerg // line up with the decoration (if it exists). 700 1.1 joerg Prefix = ""; 701 1.1 joerg } 702 1.1 joerg } else if (!Decoration.empty()) { 703 1.1 joerg // For other empty lines, if we do have a decoration, adapt it to not 704 1.1 joerg // contain a trailing whitespace. 705 1.1 joerg Prefix = Prefix.substr(0, 1); 706 1.1 joerg } 707 1.1 joerg } else { 708 1.1 joerg if (ContentColumn[LineIndex] == 1) { 709 1.1 joerg // This line starts immediately after the decorating *. 710 1.1 joerg Prefix = Prefix.substr(0, 1); 711 1.1 joerg } 712 1.1 joerg } 713 1.1 joerg // This is the offset of the end of the last line relative to the start of the 714 1.1 joerg // token text in the token. 715 1.1 joerg unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + 716 1.1 joerg Content[LineIndex - 1].size() - 717 1.1 joerg tokenAt(LineIndex).TokenText.data(); 718 1.1 joerg unsigned WhitespaceLength = Content[LineIndex].data() - 719 1.1 joerg tokenAt(LineIndex).TokenText.data() - 720 1.1 joerg WhitespaceOffsetInToken; 721 1.1 joerg Whitespaces.replaceWhitespaceInToken( 722 1.1 joerg tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix, 723 1.1 joerg InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); 724 1.1 joerg } 725 1.1 joerg 726 1.1 joerg BreakableToken::Split 727 1.1 joerg BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const { 728 1.1 joerg if (DelimitersOnNewline) { 729 1.1 joerg // Replace the trailing whitespace of the last line with a newline. 730 1.1 joerg // In case the last line is empty, the ending '*/' is already on its own 731 1.1 joerg // line. 732 1.1 joerg StringRef Line = Content.back().substr(TailOffset); 733 1.1 joerg StringRef TrimmedLine = Line.rtrim(Blanks); 734 1.1 joerg if (!TrimmedLine.empty()) 735 1.1 joerg return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size()); 736 1.1 joerg } 737 1.1 joerg return Split(StringRef::npos, 0); 738 1.1 joerg } 739 1.1 joerg 740 1.1.1.2 joerg bool BreakableBlockComment::mayReflow( 741 1.1.1.2 joerg unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 742 1.1 joerg // Content[LineIndex] may exclude the indent after the '*' decoration. In that 743 1.1 joerg // case, we compute the start of the comment pragma manually. 744 1.1 joerg StringRef IndentContent = Content[LineIndex]; 745 1.1 joerg if (Lines[LineIndex].ltrim(Blanks).startswith("*")) { 746 1.1 joerg IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1); 747 1.1 joerg } 748 1.1 joerg return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && 749 1.1 joerg mayReflowContent(Content[LineIndex]) && !Tok.Finalized && 750 1.1 joerg !switchesFormatting(tokenAt(LineIndex)); 751 1.1 joerg } 752 1.1 joerg 753 1.1 joerg BreakableLineCommentSection::BreakableLineCommentSection( 754 1.1.1.2 joerg const FormatToken &Token, unsigned StartColumn, bool InPPDirective, 755 1.1 joerg encoding::Encoding Encoding, const FormatStyle &Style) 756 1.1 joerg : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { 757 1.1 joerg assert(Tok.is(TT_LineComment) && 758 1.1 joerg "line comment section must start with a line comment"); 759 1.1 joerg FormatToken *LineTok = nullptr; 760 1.1.1.2 joerg // How many spaces we changed in the first line of the section, this will be 761 1.1.1.2 joerg // applied in all following lines 762 1.1.1.2 joerg int FirstLineSpaceChange = 0; 763 1.1 joerg for (const FormatToken *CurrentTok = &Tok; 764 1.1 joerg CurrentTok && CurrentTok->is(TT_LineComment); 765 1.1 joerg CurrentTok = CurrentTok->Next) { 766 1.1 joerg LastLineTok = LineTok; 767 1.1 joerg StringRef TokenText(CurrentTok->TokenText); 768 1.1 joerg assert((TokenText.startswith("//") || TokenText.startswith("#")) && 769 1.1 joerg "unsupported line comment prefix, '//' and '#' are supported"); 770 1.1 joerg size_t FirstLineIndex = Lines.size(); 771 1.1 joerg TokenText.split(Lines, "\n"); 772 1.1 joerg Content.resize(Lines.size()); 773 1.1 joerg ContentColumn.resize(Lines.size()); 774 1.1.1.2 joerg PrefixSpaceChange.resize(Lines.size()); 775 1.1 joerg Tokens.resize(Lines.size()); 776 1.1 joerg Prefix.resize(Lines.size()); 777 1.1 joerg OriginalPrefix.resize(Lines.size()); 778 1.1 joerg for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { 779 1.1 joerg Lines[i] = Lines[i].ltrim(Blanks); 780 1.1.1.2 joerg StringRef IndentPrefix = getLineCommentIndentPrefix(Lines[i], Style); 781 1.1.1.2 joerg OriginalPrefix[i] = IndentPrefix; 782 1.1.1.2 joerg const unsigned SpacesInPrefix = 783 1.1.1.2 joerg std::count(IndentPrefix.begin(), IndentPrefix.end(), ' '); 784 1.1.1.2 joerg 785 1.1.1.2 joerg // On the first line of the comment section we calculate how many spaces 786 1.1.1.2 joerg // are to be added or removed, all lines after that just get only the 787 1.1.1.2 joerg // change and we will not look at the maximum anymore. Additionally to the 788 1.1.1.2 joerg // actual first line, we calculate that when the non space Prefix changes, 789 1.1.1.2 joerg // e.g. from "///" to "//". 790 1.1.1.2 joerg if (i == 0 || OriginalPrefix[i].rtrim(Blanks) != 791 1.1.1.2 joerg OriginalPrefix[i - 1].rtrim(Blanks)) { 792 1.1.1.2 joerg if (SpacesInPrefix < Style.SpacesInLineCommentPrefix.Minimum && 793 1.1.1.2 joerg Lines[i].size() > IndentPrefix.size() && 794 1.1.1.2 joerg isAlphanumeric(Lines[i][IndentPrefix.size()])) { 795 1.1.1.2 joerg FirstLineSpaceChange = 796 1.1.1.2 joerg Style.SpacesInLineCommentPrefix.Minimum - SpacesInPrefix; 797 1.1.1.2 joerg } else if (SpacesInPrefix > Style.SpacesInLineCommentPrefix.Maximum) { 798 1.1.1.2 joerg FirstLineSpaceChange = 799 1.1.1.2 joerg Style.SpacesInLineCommentPrefix.Maximum - SpacesInPrefix; 800 1.1.1.2 joerg } else { 801 1.1.1.2 joerg FirstLineSpaceChange = 0; 802 1.1.1.2 joerg } 803 1.1.1.2 joerg } 804 1.1.1.2 joerg 805 1.1.1.2 joerg if (Lines[i].size() != IndentPrefix.size()) { 806 1.1.1.2 joerg PrefixSpaceChange[i] = FirstLineSpaceChange; 807 1.1.1.2 joerg 808 1.1.1.2 joerg if (SpacesInPrefix + PrefixSpaceChange[i] < 809 1.1.1.2 joerg Style.SpacesInLineCommentPrefix.Minimum) { 810 1.1.1.2 joerg PrefixSpaceChange[i] += Style.SpacesInLineCommentPrefix.Minimum - 811 1.1.1.2 joerg (SpacesInPrefix + PrefixSpaceChange[i]); 812 1.1.1.2 joerg } 813 1.1.1.2 joerg 814 1.1.1.2 joerg assert(Lines[i].size() > IndentPrefix.size()); 815 1.1.1.2 joerg const auto FirstNonSpace = Lines[i][IndentPrefix.size()]; 816 1.1.1.2 joerg const auto AllowsSpaceChange = 817 1.1.1.2 joerg SpacesInPrefix != 0 || 818 1.1.1.2 joerg (isAlphanumeric(FirstNonSpace) || 819 1.1.1.2 joerg (FirstNonSpace == '}' && FirstLineSpaceChange != 0)); 820 1.1.1.2 joerg 821 1.1.1.2 joerg if (PrefixSpaceChange[i] > 0 && AllowsSpaceChange) { 822 1.1.1.2 joerg Prefix[i] = IndentPrefix.str(); 823 1.1.1.2 joerg Prefix[i].append(PrefixSpaceChange[i], ' '); 824 1.1.1.2 joerg } else if (PrefixSpaceChange[i] < 0 && AllowsSpaceChange) { 825 1.1.1.2 joerg Prefix[i] = IndentPrefix 826 1.1.1.2 joerg .drop_back(std::min<std::size_t>( 827 1.1.1.2 joerg -PrefixSpaceChange[i], SpacesInPrefix)) 828 1.1.1.2 joerg .str(); 829 1.1.1.2 joerg } else { 830 1.1.1.2 joerg Prefix[i] = IndentPrefix.str(); 831 1.1.1.2 joerg } 832 1.1.1.2 joerg } else { 833 1.1.1.2 joerg // If the IndentPrefix is the whole line, there is no content and we 834 1.1.1.2 joerg // drop just all space 835 1.1.1.2 joerg Prefix[i] = IndentPrefix.drop_back(SpacesInPrefix).str(); 836 1.1 joerg } 837 1.1 joerg 838 1.1 joerg Tokens[i] = LineTok; 839 1.1 joerg Content[i] = Lines[i].substr(IndentPrefix.size()); 840 1.1 joerg ContentColumn[i] = 841 1.1 joerg StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn, 842 1.1 joerg Style.TabWidth, Encoding); 843 1.1 joerg 844 1.1 joerg // Calculate the end of the non-whitespace text in this line. 845 1.1 joerg size_t EndOfLine = Content[i].find_last_not_of(Blanks); 846 1.1 joerg if (EndOfLine == StringRef::npos) 847 1.1 joerg EndOfLine = Content[i].size(); 848 1.1 joerg else 849 1.1 joerg ++EndOfLine; 850 1.1 joerg Content[i] = Content[i].substr(0, EndOfLine); 851 1.1 joerg } 852 1.1 joerg LineTok = CurrentTok->Next; 853 1.1 joerg if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) { 854 1.1 joerg // A line comment section needs to broken by a line comment that is 855 1.1 joerg // preceded by at least two newlines. Note that we put this break here 856 1.1 joerg // instead of breaking at a previous stage during parsing, since that 857 1.1 joerg // would split the contents of the enum into two unwrapped lines in this 858 1.1 joerg // example, which is undesirable: 859 1.1 joerg // enum A { 860 1.1 joerg // a, // comment about a 861 1.1 joerg // 862 1.1 joerg // // comment about b 863 1.1 joerg // b 864 1.1 joerg // }; 865 1.1 joerg // 866 1.1 joerg // FIXME: Consider putting separate line comment sections as children to 867 1.1 joerg // the unwrapped line instead. 868 1.1 joerg break; 869 1.1 joerg } 870 1.1 joerg } 871 1.1 joerg } 872 1.1 joerg 873 1.1 joerg unsigned 874 1.1 joerg BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset, 875 1.1 joerg StringRef::size_type Length, 876 1.1 joerg unsigned StartColumn) const { 877 1.1 joerg return encoding::columnWidthWithTabs( 878 1.1 joerg Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth, 879 1.1 joerg Encoding); 880 1.1 joerg } 881 1.1 joerg 882 1.1.1.2 joerg unsigned 883 1.1.1.2 joerg BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, 884 1.1.1.2 joerg bool /*Break*/) const { 885 1.1 joerg return ContentColumn[LineIndex]; 886 1.1 joerg } 887 1.1 joerg 888 1.1 joerg void BreakableLineCommentSection::insertBreak( 889 1.1 joerg unsigned LineIndex, unsigned TailOffset, Split Split, 890 1.1 joerg unsigned ContentIndent, WhitespaceManager &Whitespaces) const { 891 1.1 joerg StringRef Text = Content[LineIndex].substr(TailOffset); 892 1.1 joerg // Compute the offset of the split relative to the beginning of the token 893 1.1 joerg // text. 894 1.1 joerg unsigned BreakOffsetInToken = 895 1.1 joerg Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 896 1.1 joerg unsigned CharsToRemove = Split.second; 897 1.1 joerg Whitespaces.replaceWhitespaceInToken( 898 1.1 joerg tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", 899 1.1 joerg Prefix[LineIndex], InPPDirective, /*Newlines=*/1, 900 1.1.1.2 joerg /*Spaces=*/ContentColumn[LineIndex] - Prefix[LineIndex].size()); 901 1.1 joerg } 902 1.1 joerg 903 1.1 joerg BreakableComment::Split BreakableLineCommentSection::getReflowSplit( 904 1.1.1.2 joerg unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 905 1.1 joerg if (!mayReflow(LineIndex, CommentPragmasRegex)) 906 1.1 joerg return Split(StringRef::npos, 0); 907 1.1 joerg 908 1.1 joerg size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks); 909 1.1 joerg 910 1.1 joerg // In a line comment section each line is a separate token; thus, after a 911 1.1 joerg // split we replace all whitespace before the current line comment token 912 1.1 joerg // (which does not need to be included in the split), plus the start of the 913 1.1 joerg // line up to where the content starts. 914 1.1 joerg return Split(0, Trimmed != StringRef::npos ? Trimmed : 0); 915 1.1 joerg } 916 1.1 joerg 917 1.1 joerg void BreakableLineCommentSection::reflow(unsigned LineIndex, 918 1.1 joerg WhitespaceManager &Whitespaces) const { 919 1.1 joerg if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { 920 1.1 joerg // Reflow happens between tokens. Replace the whitespace between the 921 1.1 joerg // tokens by the empty string. 922 1.1 joerg Whitespaces.replaceWhitespace( 923 1.1 joerg *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, 924 1.1.1.2 joerg /*StartOfTokenColumn=*/StartColumn, /*IsAligned=*/true, 925 1.1.1.2 joerg /*InPPDirective=*/false); 926 1.1 joerg } else if (LineIndex > 0) { 927 1.1 joerg // In case we're reflowing after the '\' in: 928 1.1 joerg // 929 1.1 joerg // // line comment \ 930 1.1 joerg // // line 2 931 1.1 joerg // 932 1.1 joerg // the reflow happens inside the single comment token (it is a single line 933 1.1 joerg // comment with an unescaped newline). 934 1.1 joerg // Replace the whitespace between the '\' and '//' with the empty string. 935 1.1 joerg // 936 1.1 joerg // Offset points to after the '\' relative to start of the token. 937 1.1 joerg unsigned Offset = Lines[LineIndex - 1].data() + 938 1.1 joerg Lines[LineIndex - 1].size() - 939 1.1 joerg tokenAt(LineIndex - 1).TokenText.data(); 940 1.1 joerg // WhitespaceLength is the number of chars between the '\' and the '//' on 941 1.1 joerg // the next line. 942 1.1 joerg unsigned WhitespaceLength = 943 1.1 joerg Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset; 944 1.1 joerg Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset, 945 1.1 joerg /*ReplaceChars=*/WhitespaceLength, 946 1.1 joerg /*PreviousPostfix=*/"", 947 1.1 joerg /*CurrentPrefix=*/"", 948 1.1 joerg /*InPPDirective=*/false, 949 1.1 joerg /*Newlines=*/0, 950 1.1 joerg /*Spaces=*/0); 951 1.1 joerg } 952 1.1 joerg // Replace the indent and prefix of the token with the reflow prefix. 953 1.1 joerg unsigned Offset = 954 1.1 joerg Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); 955 1.1 joerg unsigned WhitespaceLength = 956 1.1 joerg Content[LineIndex].data() - Lines[LineIndex].data(); 957 1.1 joerg Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset, 958 1.1 joerg /*ReplaceChars=*/WhitespaceLength, 959 1.1 joerg /*PreviousPostfix=*/"", 960 1.1 joerg /*CurrentPrefix=*/ReflowPrefix, 961 1.1 joerg /*InPPDirective=*/false, 962 1.1 joerg /*Newlines=*/0, 963 1.1 joerg /*Spaces=*/0); 964 1.1 joerg } 965 1.1 joerg 966 1.1 joerg void BreakableLineCommentSection::adaptStartOfLine( 967 1.1 joerg unsigned LineIndex, WhitespaceManager &Whitespaces) const { 968 1.1 joerg // If this is the first line of a token, we need to inform Whitespace Manager 969 1.1 joerg // about it: either adapt the whitespace range preceding it, or mark it as an 970 1.1 joerg // untouchable token. 971 1.1 joerg // This happens for instance here: 972 1.1 joerg // // line 1 \ 973 1.1 joerg // // line 2 974 1.1 joerg if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { 975 1.1 joerg // This is the first line for the current token, but no reflow with the 976 1.1 joerg // previous token is necessary. However, we still may need to adjust the 977 1.1 joerg // start column. Note that ContentColumn[LineIndex] is the expected 978 1.1 joerg // content column after a possible update to the prefix, hence the prefix 979 1.1 joerg // length change is included. 980 1.1 joerg unsigned LineColumn = 981 1.1 joerg ContentColumn[LineIndex] - 982 1.1 joerg (Content[LineIndex].data() - Lines[LineIndex].data()) + 983 1.1 joerg (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); 984 1.1 joerg 985 1.1 joerg // We always want to create a replacement instead of adding an untouchable 986 1.1 joerg // token, even if LineColumn is the same as the original column of the 987 1.1 joerg // token. This is because WhitespaceManager doesn't align trailing 988 1.1 joerg // comments if they are untouchable. 989 1.1 joerg Whitespaces.replaceWhitespace(*Tokens[LineIndex], 990 1.1 joerg /*Newlines=*/1, 991 1.1 joerg /*Spaces=*/LineColumn, 992 1.1 joerg /*StartOfTokenColumn=*/LineColumn, 993 1.1.1.2 joerg /*IsAligned=*/true, 994 1.1 joerg /*InPPDirective=*/false); 995 1.1 joerg } 996 1.1 joerg if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { 997 1.1 joerg // Adjust the prefix if necessary. 998 1.1.1.2 joerg const auto SpacesToRemove = -std::min(PrefixSpaceChange[LineIndex], 0); 999 1.1.1.2 joerg const auto SpacesToAdd = std::max(PrefixSpaceChange[LineIndex], 0); 1000 1.1 joerg Whitespaces.replaceWhitespaceInToken( 1001 1.1.1.2 joerg tokenAt(LineIndex), OriginalPrefix[LineIndex].size() - SpacesToRemove, 1002 1.1.1.2 joerg /*ReplaceChars=*/SpacesToRemove, "", "", /*InPPDirective=*/false, 1003 1.1.1.2 joerg /*Newlines=*/0, /*Spaces=*/SpacesToAdd); 1004 1.1 joerg } 1005 1.1 joerg } 1006 1.1 joerg 1007 1.1 joerg void BreakableLineCommentSection::updateNextToken(LineState &State) const { 1008 1.1 joerg if (LastLineTok) { 1009 1.1 joerg State.NextToken = LastLineTok->Next; 1010 1.1 joerg } 1011 1.1 joerg } 1012 1.1 joerg 1013 1.1 joerg bool BreakableLineCommentSection::mayReflow( 1014 1.1.1.2 joerg unsigned LineIndex, const llvm::Regex &CommentPragmasRegex) const { 1015 1.1 joerg // Line comments have the indent as part of the prefix, so we need to 1016 1.1 joerg // recompute the start of the line. 1017 1.1 joerg StringRef IndentContent = Content[LineIndex]; 1018 1.1 joerg if (Lines[LineIndex].startswith("//")) { 1019 1.1 joerg IndentContent = Lines[LineIndex].substr(2); 1020 1.1 joerg } 1021 1.1 joerg // FIXME: Decide whether we want to reflow non-regular indents: 1022 1.1 joerg // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the 1023 1.1 joerg // OriginalPrefix[LineIndex-1]. That means we don't reflow 1024 1.1 joerg // // text that protrudes 1025 1.1 joerg // // into text with different indent 1026 1.1 joerg // We do reflow in that case in block comments. 1027 1.1 joerg return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && 1028 1.1 joerg mayReflowContent(Content[LineIndex]) && !Tok.Finalized && 1029 1.1 joerg !switchesFormatting(tokenAt(LineIndex)) && 1030 1.1 joerg OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; 1031 1.1 joerg } 1032 1.1 joerg 1033 1.1 joerg } // namespace format 1034 1.1 joerg } // namespace clang 1035