Home | History | Annotate | Line # | Download | only in ADT
      1 //===- Twine.h - Fast Temporary String Concatenation ------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #ifndef LLVM_ADT_TWINE_H
     10 #define LLVM_ADT_TWINE_H
     11 
     12 #include "llvm/ADT/SmallVector.h"
     13 #include "llvm/ADT/StringRef.h"
     14 #include "llvm/Support/ErrorHandling.h"
     15 #include <cassert>
     16 #include <cstdint>
     17 #include <string>
     18 
     19 namespace llvm {
     20 
     21   class formatv_object_base;
     22   class raw_ostream;
     23 
     24   /// Twine - A lightweight data structure for efficiently representing the
     25   /// concatenation of temporary values as strings.
     26   ///
     27   /// A Twine is a kind of rope, it represents a concatenated string using a
     28   /// binary-tree, where the string is the preorder of the nodes. Since the
     29   /// Twine can be efficiently rendered into a buffer when its result is used,
     30   /// it avoids the cost of generating temporary values for intermediate string
     31   /// results -- particularly in cases when the Twine result is never
     32   /// required. By explicitly tracking the type of leaf nodes, we can also avoid
     33   /// the creation of temporary strings for conversions operations (such as
     34   /// appending an integer to a string).
     35   ///
     36   /// A Twine is not intended for use directly and should not be stored, its
     37   /// implementation relies on the ability to store pointers to temporary stack
     38   /// objects which may be deallocated at the end of a statement. Twines should
     39   /// only be used accepted as const references in arguments, when an API wishes
     40   /// to accept possibly-concatenated strings.
     41   ///
     42   /// Twines support a special 'null' value, which always concatenates to form
     43   /// itself, and renders as an empty string. This can be returned from APIs to
     44   /// effectively nullify any concatenations performed on the result.
     45   ///
     46   /// \b Implementation
     47   ///
     48   /// Given the nature of a Twine, it is not possible for the Twine's
     49   /// concatenation method to construct interior nodes; the result must be
     50   /// represented inside the returned value. For this reason a Twine object
     51   /// actually holds two values, the left- and right-hand sides of a
     52   /// concatenation. We also have nullary Twine objects, which are effectively
     53   /// sentinel values that represent empty strings.
     54   ///
     55   /// Thus, a Twine can effectively have zero, one, or two children. The \see
     56   /// isNullary(), \see isUnary(), and \see isBinary() predicates exist for
     57   /// testing the number of children.
     58   ///
     59   /// We maintain a number of invariants on Twine objects (FIXME: Why):
     60   ///  - Nullary twines are always represented with their Kind on the left-hand
     61   ///    side, and the Empty kind on the right-hand side.
     62   ///  - Unary twines are always represented with the value on the left-hand
     63   ///    side, and the Empty kind on the right-hand side.
     64   ///  - If a Twine has another Twine as a child, that child should always be
     65   ///    binary (otherwise it could have been folded into the parent).
     66   ///
     67   /// These invariants are check by \see isValid().
     68   ///
     69   /// \b Efficiency Considerations
     70   ///
     71   /// The Twine is designed to yield efficient and small code for common
     72   /// situations. For this reason, the concat() method is inlined so that
     73   /// concatenations of leaf nodes can be optimized into stores directly into a
     74   /// single stack allocated object.
     75   ///
     76   /// In practice, not all compilers can be trusted to optimize concat() fully,
     77   /// so we provide two additional methods (and accompanying operator+
     78   /// overloads) to guarantee that particularly important cases (cstring plus
     79   /// StringRef) codegen as desired.
     80   class Twine {
     81     /// NodeKind - Represent the type of an argument.
     82     enum NodeKind : unsigned char {
     83       /// An empty string; the result of concatenating anything with it is also
     84       /// empty.
     85       NullKind,
     86 
     87       /// The empty string.
     88       EmptyKind,
     89 
     90       /// A pointer to a Twine instance.
     91       TwineKind,
     92 
     93       /// A pointer to a C string instance.
     94       CStringKind,
     95 
     96       /// A pointer to an std::string instance.
     97       StdStringKind,
     98 
     99       /// A pointer to a StringRef instance.
    100       StringRefKind,
    101 
    102       /// A pointer to a SmallString instance.
    103       SmallStringKind,
    104 
    105       /// A pointer to a formatv_object_base instance.
    106       FormatvObjectKind,
    107 
    108       /// A char value, to render as a character.
    109       CharKind,
    110 
    111       /// An unsigned int value, to render as an unsigned decimal integer.
    112       DecUIKind,
    113 
    114       /// An int value, to render as a signed decimal integer.
    115       DecIKind,
    116 
    117       /// A pointer to an unsigned long value, to render as an unsigned decimal
    118       /// integer.
    119       DecULKind,
    120 
    121       /// A pointer to a long value, to render as a signed decimal integer.
    122       DecLKind,
    123 
    124       /// A pointer to an unsigned long long value, to render as an unsigned
    125       /// decimal integer.
    126       DecULLKind,
    127 
    128       /// A pointer to a long long value, to render as a signed decimal integer.
    129       DecLLKind,
    130 
    131       /// A pointer to a uint64_t value, to render as an unsigned hexadecimal
    132       /// integer.
    133       UHexKind
    134     };
    135 
    136     union Child
    137     {
    138       const Twine *twine;
    139       const char *cString;
    140       const std::string *stdString;
    141       const StringRef *stringRef;
    142       const SmallVectorImpl<char> *smallString;
    143       const formatv_object_base *formatvObject;
    144       char character;
    145       unsigned int decUI;
    146       int decI;
    147       const unsigned long *decUL;
    148       const long *decL;
    149       const unsigned long long *decULL;
    150       const long long *decLL;
    151       const uint64_t *uHex;
    152     };
    153 
    154     /// LHS - The prefix in the concatenation, which may be uninitialized for
    155     /// Null or Empty kinds.
    156     Child LHS;
    157 
    158     /// RHS - The suffix in the concatenation, which may be uninitialized for
    159     /// Null or Empty kinds.
    160     Child RHS;
    161 
    162     /// LHSKind - The NodeKind of the left hand side, \see getLHSKind().
    163     NodeKind LHSKind = EmptyKind;
    164 
    165     /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
    166     NodeKind RHSKind = EmptyKind;
    167 
    168     /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
    169     explicit Twine(NodeKind Kind) : LHSKind(Kind) {
    170       assert(isNullary() && "Invalid kind!");
    171     }
    172 
    173     /// Construct a binary twine.
    174     explicit Twine(const Twine &LHS, const Twine &RHS)
    175         : LHSKind(TwineKind), RHSKind(TwineKind) {
    176       this->LHS.twine = &LHS;
    177       this->RHS.twine = &RHS;
    178       assert(isValid() && "Invalid twine!");
    179     }
    180 
    181     /// Construct a twine from explicit values.
    182     explicit Twine(Child LHS, NodeKind LHSKind, Child RHS, NodeKind RHSKind)
    183         : LHS(LHS), RHS(RHS), LHSKind(LHSKind), RHSKind(RHSKind) {
    184       assert(isValid() && "Invalid twine!");
    185     }
    186 
    187     /// Check for the null twine.
    188     bool isNull() const {
    189       return getLHSKind() == NullKind;
    190     }
    191 
    192     /// Check for the empty twine.
    193     bool isEmpty() const {
    194       return getLHSKind() == EmptyKind;
    195     }
    196 
    197     /// Check if this is a nullary twine (null or empty).
    198     bool isNullary() const {
    199       return isNull() || isEmpty();
    200     }
    201 
    202     /// Check if this is a unary twine.
    203     bool isUnary() const {
    204       return getRHSKind() == EmptyKind && !isNullary();
    205     }
    206 
    207     /// Check if this is a binary twine.
    208     bool isBinary() const {
    209       return getLHSKind() != NullKind && getRHSKind() != EmptyKind;
    210     }
    211 
    212     /// Check if this is a valid twine (satisfying the invariants on
    213     /// order and number of arguments).
    214     bool isValid() const {
    215       // Nullary twines always have Empty on the RHS.
    216       if (isNullary() && getRHSKind() != EmptyKind)
    217         return false;
    218 
    219       // Null should never appear on the RHS.
    220       if (getRHSKind() == NullKind)
    221         return false;
    222 
    223       // The RHS cannot be non-empty if the LHS is empty.
    224       if (getRHSKind() != EmptyKind && getLHSKind() == EmptyKind)
    225         return false;
    226 
    227       // A twine child should always be binary.
    228       if (getLHSKind() == TwineKind &&
    229           !LHS.twine->isBinary())
    230         return false;
    231       if (getRHSKind() == TwineKind &&
    232           !RHS.twine->isBinary())
    233         return false;
    234 
    235       return true;
    236     }
    237 
    238     /// Get the NodeKind of the left-hand side.
    239     NodeKind getLHSKind() const { return LHSKind; }
    240 
    241     /// Get the NodeKind of the right-hand side.
    242     NodeKind getRHSKind() const { return RHSKind; }
    243 
    244     /// Print one child from a twine.
    245     void printOneChild(raw_ostream &OS, Child Ptr, NodeKind Kind) const;
    246 
    247     /// Print the representation of one child from a twine.
    248     void printOneChildRepr(raw_ostream &OS, Child Ptr,
    249                            NodeKind Kind) const;
    250 
    251   public:
    252     /// @name Constructors
    253     /// @{
    254 
    255     /// Construct from an empty string.
    256     /*implicit*/ Twine() {
    257       assert(isValid() && "Invalid twine!");
    258     }
    259 
    260     Twine(const Twine &) = default;
    261 
    262     /// Construct from a C string.
    263     ///
    264     /// We take care here to optimize "" into the empty twine -- this will be
    265     /// optimized out for string constants. This allows Twine arguments have
    266     /// default "" values, without introducing unnecessary string constants.
    267     /*implicit*/ Twine(const char *Str) {
    268       if (Str[0] != '\0') {
    269         LHS.cString = Str;
    270         LHSKind = CStringKind;
    271       } else
    272         LHSKind = EmptyKind;
    273 
    274       assert(isValid() && "Invalid twine!");
    275     }
    276     /// Delete the implicit conversion from nullptr as Twine(const char *)
    277     /// cannot take nullptr.
    278     /*implicit*/ Twine(std::nullptr_t) = delete;
    279 
    280     /// Construct from an std::string.
    281     /*implicit*/ Twine(const std::string &Str) : LHSKind(StdStringKind) {
    282       LHS.stdString = &Str;
    283       assert(isValid() && "Invalid twine!");
    284     }
    285 
    286     /// Construct from a StringRef.
    287     /*implicit*/ Twine(const StringRef &Str) : LHSKind(StringRefKind) {
    288       LHS.stringRef = &Str;
    289       assert(isValid() && "Invalid twine!");
    290     }
    291 
    292     /// Construct from a SmallString.
    293     /*implicit*/ Twine(const SmallVectorImpl<char> &Str)
    294         : LHSKind(SmallStringKind) {
    295       LHS.smallString = &Str;
    296       assert(isValid() && "Invalid twine!");
    297     }
    298 
    299     /// Construct from a formatv_object_base.
    300     /*implicit*/ Twine(const formatv_object_base &Fmt)
    301         : LHSKind(FormatvObjectKind) {
    302       LHS.formatvObject = &Fmt;
    303       assert(isValid() && "Invalid twine!");
    304     }
    305 
    306     /// Construct from a char.
    307     explicit Twine(char Val) : LHSKind(CharKind) {
    308       LHS.character = Val;
    309     }
    310 
    311     /// Construct from a signed char.
    312     explicit Twine(signed char Val) : LHSKind(CharKind) {
    313       LHS.character = static_cast<char>(Val);
    314     }
    315 
    316     /// Construct from an unsigned char.
    317     explicit Twine(unsigned char Val) : LHSKind(CharKind) {
    318       LHS.character = static_cast<char>(Val);
    319     }
    320 
    321     /// Construct a twine to print \p Val as an unsigned decimal integer.
    322     explicit Twine(unsigned Val) : LHSKind(DecUIKind) {
    323       LHS.decUI = Val;
    324     }
    325 
    326     /// Construct a twine to print \p Val as a signed decimal integer.
    327     explicit Twine(int Val) : LHSKind(DecIKind) {
    328       LHS.decI = Val;
    329     }
    330 
    331     /// Construct a twine to print \p Val as an unsigned decimal integer.
    332     explicit Twine(const unsigned long &Val) : LHSKind(DecULKind) {
    333       LHS.decUL = &Val;
    334     }
    335 
    336     /// Construct a twine to print \p Val as a signed decimal integer.
    337     explicit Twine(const long &Val) : LHSKind(DecLKind) {
    338       LHS.decL = &Val;
    339     }
    340 
    341     /// Construct a twine to print \p Val as an unsigned decimal integer.
    342     explicit Twine(const unsigned long long &Val) : LHSKind(DecULLKind) {
    343       LHS.decULL = &Val;
    344     }
    345 
    346     /// Construct a twine to print \p Val as a signed decimal integer.
    347     explicit Twine(const long long &Val) : LHSKind(DecLLKind) {
    348       LHS.decLL = &Val;
    349     }
    350 
    351     // FIXME: Unfortunately, to make sure this is as efficient as possible we
    352     // need extra binary constructors from particular types. We can't rely on
    353     // the compiler to be smart enough to fold operator+()/concat() down to the
    354     // right thing. Yet.
    355 
    356     /// Construct as the concatenation of a C string and a StringRef.
    357     /*implicit*/ Twine(const char *LHS, const StringRef &RHS)
    358         : LHSKind(CStringKind), RHSKind(StringRefKind) {
    359       this->LHS.cString = LHS;
    360       this->RHS.stringRef = &RHS;
    361       assert(isValid() && "Invalid twine!");
    362     }
    363 
    364     /// Construct as the concatenation of a StringRef and a C string.
    365     /*implicit*/ Twine(const StringRef &LHS, const char *RHS)
    366         : LHSKind(StringRefKind), RHSKind(CStringKind) {
    367       this->LHS.stringRef = &LHS;
    368       this->RHS.cString = RHS;
    369       assert(isValid() && "Invalid twine!");
    370     }
    371 
    372     /// Since the intended use of twines is as temporary objects, assignments
    373     /// when concatenating might cause undefined behavior or stack corruptions
    374     Twine &operator=(const Twine &) = delete;
    375 
    376     /// Create a 'null' string, which is an empty string that always
    377     /// concatenates to form another empty string.
    378     static Twine createNull() {
    379       return Twine(NullKind);
    380     }
    381 
    382     /// @}
    383     /// @name Numeric Conversions
    384     /// @{
    385 
    386     // Construct a twine to print \p Val as an unsigned hexadecimal integer.
    387     static Twine utohexstr(const uint64_t &Val) {
    388       Child LHS, RHS;
    389       LHS.uHex = &Val;
    390       RHS.twine = nullptr;
    391       return Twine(LHS, UHexKind, RHS, EmptyKind);
    392     }
    393 
    394     /// @}
    395     /// @name Predicate Operations
    396     /// @{
    397 
    398     /// Check if this twine is trivially empty; a false return value does not
    399     /// necessarily mean the twine is empty.
    400     bool isTriviallyEmpty() const {
    401       return isNullary();
    402     }
    403 
    404     /// Return true if this twine can be dynamically accessed as a single
    405     /// StringRef value with getSingleStringRef().
    406     bool isSingleStringRef() const {
    407       if (getRHSKind() != EmptyKind) return false;
    408 
    409       switch (getLHSKind()) {
    410       case EmptyKind:
    411       case CStringKind:
    412       case StdStringKind:
    413       case StringRefKind:
    414       case SmallStringKind:
    415         return true;
    416       default:
    417         return false;
    418       }
    419     }
    420 
    421     /// @}
    422     /// @name String Operations
    423     /// @{
    424 
    425     Twine concat(const Twine &Suffix) const;
    426 
    427     /// @}
    428     /// @name Output & Conversion.
    429     /// @{
    430 
    431     /// Return the twine contents as a std::string.
    432     std::string str() const;
    433 
    434     /// Append the concatenated string into the given SmallString or SmallVector.
    435     void toVector(SmallVectorImpl<char> &Out) const;
    436 
    437     /// This returns the twine as a single StringRef.  This method is only valid
    438     /// if isSingleStringRef() is true.
    439     StringRef getSingleStringRef() const {
    440       assert(isSingleStringRef() &&"This cannot be had as a single stringref!");
    441       switch (getLHSKind()) {
    442       default: llvm_unreachable("Out of sync with isSingleStringRef");
    443       case EmptyKind:      return StringRef();
    444       case CStringKind:    return StringRef(LHS.cString);
    445       case StdStringKind:  return StringRef(*LHS.stdString);
    446       case StringRefKind:  return *LHS.stringRef;
    447       case SmallStringKind:
    448         return StringRef(LHS.smallString->data(), LHS.smallString->size());
    449       }
    450     }
    451 
    452     /// This returns the twine as a single StringRef if it can be
    453     /// represented as such. Otherwise the twine is written into the given
    454     /// SmallVector and a StringRef to the SmallVector's data is returned.
    455     StringRef toStringRef(SmallVectorImpl<char> &Out) const {
    456       if (isSingleStringRef())
    457         return getSingleStringRef();
    458       toVector(Out);
    459       return StringRef(Out.data(), Out.size());
    460     }
    461 
    462     /// This returns the twine as a single null terminated StringRef if it
    463     /// can be represented as such. Otherwise the twine is written into the
    464     /// given SmallVector and a StringRef to the SmallVector's data is returned.
    465     ///
    466     /// The returned StringRef's size does not include the null terminator.
    467     StringRef toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const;
    468 
    469     /// Write the concatenated string represented by this twine to the
    470     /// stream \p OS.
    471     void print(raw_ostream &OS) const;
    472 
    473     /// Dump the concatenated string represented by this twine to stderr.
    474     void dump() const;
    475 
    476     /// Write the representation of this twine to the stream \p OS.
    477     void printRepr(raw_ostream &OS) const;
    478 
    479     /// Dump the representation of this twine to stderr.
    480     void dumpRepr() const;
    481 
    482     /// @}
    483   };
    484 
    485   /// @name Twine Inline Implementations
    486   /// @{
    487 
    488   inline Twine Twine::concat(const Twine &Suffix) const {
    489     // Concatenation with null is null.
    490     if (isNull() || Suffix.isNull())
    491       return Twine(NullKind);
    492 
    493     // Concatenation with empty yields the other side.
    494     if (isEmpty())
    495       return Suffix;
    496     if (Suffix.isEmpty())
    497       return *this;
    498 
    499     // Otherwise we need to create a new node, taking care to fold in unary
    500     // twines.
    501     Child NewLHS, NewRHS;
    502     NewLHS.twine = this;
    503     NewRHS.twine = &Suffix;
    504     NodeKind NewLHSKind = TwineKind, NewRHSKind = TwineKind;
    505     if (isUnary()) {
    506       NewLHS = LHS;
    507       NewLHSKind = getLHSKind();
    508     }
    509     if (Suffix.isUnary()) {
    510       NewRHS = Suffix.LHS;
    511       NewRHSKind = Suffix.getLHSKind();
    512     }
    513 
    514     return Twine(NewLHS, NewLHSKind, NewRHS, NewRHSKind);
    515   }
    516 
    517   inline Twine operator+(const Twine &LHS, const Twine &RHS) {
    518     return LHS.concat(RHS);
    519   }
    520 
    521   /// Additional overload to guarantee simplified codegen; this is equivalent to
    522   /// concat().
    523 
    524   inline Twine operator+(const char *LHS, const StringRef &RHS) {
    525     return Twine(LHS, RHS);
    526   }
    527 
    528   /// Additional overload to guarantee simplified codegen; this is equivalent to
    529   /// concat().
    530 
    531   inline Twine operator+(const StringRef &LHS, const char *RHS) {
    532     return Twine(LHS, RHS);
    533   }
    534 
    535   inline raw_ostream &operator<<(raw_ostream &OS, const Twine &RHS) {
    536     RHS.print(OS);
    537     return OS;
    538   }
    539 
    540   /// @}
    541 
    542 } // end namespace llvm
    543 
    544 #endif // LLVM_ADT_TWINE_H
    545