Home | History | Annotate | Line # | Download | only in Format
      1 //===--- WhitespaceManager.h - Format C++ code ------------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 ///
      9 /// \file
     10 /// WhitespaceManager class manages whitespace around tokens and their
     11 /// replacements.
     12 ///
     13 //===----------------------------------------------------------------------===//
     14 
     15 #ifndef LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
     16 #define LLVM_CLANG_LIB_FORMAT_WHITESPACEMANAGER_H
     17 
     18 #include "TokenAnnotator.h"
     19 #include "clang/Basic/SourceManager.h"
     20 #include "clang/Format/Format.h"
     21 #include <string>
     22 #include <tuple>
     23 
     24 namespace clang {
     25 namespace format {
     26 
     27 /// Manages the whitespaces around tokens and their replacements.
     28 ///
     29 /// This includes special handling for certain constructs, e.g. the alignment of
     30 /// trailing line comments.
     31 ///
     32 /// To guarantee correctness of alignment operations, the \c WhitespaceManager
     33 /// must be informed about every token in the source file; for each token, there
     34 /// must be exactly one call to either \c replaceWhitespace or
     35 /// \c addUntouchableToken.
     36 ///
     37 /// There may be multiple calls to \c breakToken for a given token.
     38 class WhitespaceManager {
     39 public:
     40   WhitespaceManager(const SourceManager &SourceMgr, const FormatStyle &Style,
     41                     bool UseCRLF)
     42       : SourceMgr(SourceMgr), Style(Style), UseCRLF(UseCRLF) {}
     43 
     44   bool useCRLF() const { return UseCRLF; }
     45 
     46   /// Replaces the whitespace in front of \p Tok. Only call once for
     47   /// each \c AnnotatedToken.
     48   ///
     49   /// \p StartOfTokenColumn is the column at which the token will start after
     50   /// this replacement. It is needed for determining how \p Spaces is turned
     51   /// into tabs and spaces for some format styles.
     52   void replaceWhitespace(FormatToken &Tok, unsigned Newlines, unsigned Spaces,
     53                          unsigned StartOfTokenColumn, bool isAligned = false,
     54                          bool InPPDirective = false);
     55 
     56   /// Adds information about an unchangeable token's whitespace.
     57   ///
     58   /// Needs to be called for every token for which \c replaceWhitespace
     59   /// was not called.
     60   void addUntouchableToken(const FormatToken &Tok, bool InPPDirective);
     61 
     62   llvm::Error addReplacement(const tooling::Replacement &Replacement);
     63 
     64   /// Inserts or replaces whitespace in the middle of a token.
     65   ///
     66   /// Inserts \p PreviousPostfix, \p Newlines, \p Spaces and \p CurrentPrefix
     67   /// (in this order) at \p Offset inside \p Tok, replacing \p ReplaceChars
     68   /// characters.
     69   ///
     70   /// Note: \p Spaces can be negative to retain information about initial
     71   /// relative column offset between a line of a block comment and the start of
     72   /// the comment. This negative offset may be compensated by trailing comment
     73   /// alignment here. In all other cases negative \p Spaces will be truncated to
     74   /// 0.
     75   ///
     76   /// When \p InPPDirective is true, escaped newlines are inserted. \p Spaces is
     77   /// used to align backslashes correctly.
     78   void replaceWhitespaceInToken(const FormatToken &Tok, unsigned Offset,
     79                                 unsigned ReplaceChars,
     80                                 StringRef PreviousPostfix,
     81                                 StringRef CurrentPrefix, bool InPPDirective,
     82                                 unsigned Newlines, int Spaces);
     83 
     84   /// Returns all the \c Replacements created during formatting.
     85   const tooling::Replacements &generateReplacements();
     86 
     87   /// Represents a change before a token, a break inside a token,
     88   /// or the layout of an unchanged token (or whitespace within).
     89   struct Change {
     90     /// Functor to sort changes in original source order.
     91     class IsBeforeInFile {
     92     public:
     93       IsBeforeInFile(const SourceManager &SourceMgr) : SourceMgr(SourceMgr) {}
     94       bool operator()(const Change &C1, const Change &C2) const;
     95 
     96     private:
     97       const SourceManager &SourceMgr;
     98     };
     99 
    100     /// Creates a \c Change.
    101     ///
    102     /// The generated \c Change will replace the characters at
    103     /// \p OriginalWhitespaceRange with a concatenation of
    104     /// \p PreviousLinePostfix, \p NewlinesBefore line breaks, \p Spaces spaces
    105     /// and \p CurrentLinePrefix.
    106     ///
    107     /// \p StartOfTokenColumn and \p InPPDirective will be used to lay out
    108     /// trailing comments and escaped newlines.
    109     Change(const FormatToken &Tok, bool CreateReplacement,
    110            SourceRange OriginalWhitespaceRange, int Spaces,
    111            unsigned StartOfTokenColumn, unsigned NewlinesBefore,
    112            StringRef PreviousLinePostfix, StringRef CurrentLinePrefix,
    113            bool IsAligned, bool ContinuesPPDirective, bool IsInsideToken);
    114 
    115     // The kind of the token whose whitespace this change replaces, or in which
    116     // this change inserts whitespace.
    117     // FIXME: Currently this is not set correctly for breaks inside comments, as
    118     // the \c BreakableToken is still doing its own alignment.
    119     const FormatToken *Tok;
    120 
    121     bool CreateReplacement;
    122     // Changes might be in the middle of a token, so we cannot just keep the
    123     // FormatToken around to query its information.
    124     SourceRange OriginalWhitespaceRange;
    125     unsigned StartOfTokenColumn;
    126     unsigned NewlinesBefore;
    127     std::string PreviousLinePostfix;
    128     std::string CurrentLinePrefix;
    129     bool IsAligned;
    130     bool ContinuesPPDirective;
    131 
    132     // The number of spaces in front of the token or broken part of the token.
    133     // This will be adapted when aligning tokens.
    134     // Can be negative to retain information about the initial relative offset
    135     // of the lines in a block comment. This is used when aligning trailing
    136     // comments. Uncompensated negative offset is truncated to 0.
    137     int Spaces;
    138 
    139     // If this change is inside of a token but not at the start of the token or
    140     // directly after a newline.
    141     bool IsInsideToken;
    142 
    143     // \c IsTrailingComment, \c TokenLength, \c PreviousEndOfTokenColumn and
    144     // \c EscapedNewlineColumn will be calculated in
    145     // \c calculateLineBreakInformation.
    146     bool IsTrailingComment;
    147     unsigned TokenLength;
    148     unsigned PreviousEndOfTokenColumn;
    149     unsigned EscapedNewlineColumn;
    150 
    151     // These fields are used to retain correct relative line indentation in a
    152     // block comment when aligning trailing comments.
    153     //
    154     // If this Change represents a continuation of a block comment,
    155     // \c StartOfBlockComment is pointer to the first Change in the block
    156     // comment. \c IndentationOffset is a relative column offset to this
    157     // change, so that the correct column can be reconstructed at the end of
    158     // the alignment process.
    159     const Change *StartOfBlockComment;
    160     int IndentationOffset;
    161 
    162     // Depth of conditionals. Computed from tracking fake parenthesis, except
    163     // it does not increase the indent for "chained" conditionals.
    164     int ConditionalsLevel;
    165 
    166     // A combination of indent, nesting and conditionals levels, which are used
    167     // in tandem to compute lexical scope, for the purposes of deciding
    168     // when to stop consecutive alignment runs.
    169     std::tuple<unsigned, unsigned, unsigned> indentAndNestingLevel() const {
    170       return std::make_tuple(Tok->IndentLevel, Tok->NestingLevel,
    171                              ConditionalsLevel);
    172     }
    173   };
    174 
    175 private:
    176   /// Calculate \c IsTrailingComment, \c TokenLength for the last tokens
    177   /// or token parts in a line and \c PreviousEndOfTokenColumn and
    178   /// \c EscapedNewlineColumn for the first tokens or token parts in a line.
    179   void calculateLineBreakInformation();
    180 
    181   /// \brief Align consecutive C/C++ preprocessor macros over all \c Changes.
    182   void alignConsecutiveMacros();
    183 
    184   /// Align consecutive assignments over all \c Changes.
    185   void alignConsecutiveAssignments();
    186 
    187   /// Align consecutive bitfields over all \c Changes.
    188   void alignConsecutiveBitFields();
    189 
    190   /// Align consecutive declarations over all \c Changes.
    191   void alignConsecutiveDeclarations();
    192 
    193   /// Align consecutive declarations over all \c Changes.
    194   void alignChainedConditionals();
    195 
    196   /// Align trailing comments over all \c Changes.
    197   void alignTrailingComments();
    198 
    199   /// Align trailing comments from change \p Start to change \p End at
    200   /// the specified \p Column.
    201   void alignTrailingComments(unsigned Start, unsigned End, unsigned Column);
    202 
    203   /// Align escaped newlines over all \c Changes.
    204   void alignEscapedNewlines();
    205 
    206   /// Align escaped newlines from change \p Start to change \p End at
    207   /// the specified \p Column.
    208   void alignEscapedNewlines(unsigned Start, unsigned End, unsigned Column);
    209 
    210   /// Fill \c Replaces with the replacements for all effective changes.
    211   void generateChanges();
    212 
    213   /// Stores \p Text as the replacement for the whitespace in \p Range.
    214   void storeReplacement(SourceRange Range, StringRef Text);
    215   void appendNewlineText(std::string &Text, unsigned Newlines);
    216   void appendEscapedNewlineText(std::string &Text, unsigned Newlines,
    217                                 unsigned PreviousEndOfTokenColumn,
    218                                 unsigned EscapedNewlineColumn);
    219   void appendIndentText(std::string &Text, unsigned IndentLevel,
    220                         unsigned Spaces, unsigned WhitespaceStartColumn,
    221                         bool IsAligned);
    222   unsigned appendTabIndent(std::string &Text, unsigned Spaces,
    223                            unsigned Indentation);
    224 
    225   SmallVector<Change, 16> Changes;
    226   const SourceManager &SourceMgr;
    227   tooling::Replacements Replaces;
    228   const FormatStyle &Style;
    229   bool UseCRLF;
    230 };
    231 
    232 } // namespace format
    233 } // namespace clang
    234 
    235 #endif
    236