Home | History | Annotate | Line # | Download | only in BinaryFormat
      1 //===-- MsgPackDocument.h - MsgPack Document --------------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 /// \file
      9 /// This file declares a class that exposes a simple in-memory representation
     10 /// of a document of MsgPack objects, that can be read from MsgPack, written to
     11 /// MsgPack, and inspected and modified in memory. This is intended to be a
     12 /// lighter-weight (in terms of memory allocations) replacement for
     13 /// MsgPackTypes.
     14 ///
     15 //===----------------------------------------------------------------------===//
     16 
     17 #ifndef LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
     18 #define LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
     19 
     20 #include "llvm/BinaryFormat/MsgPackReader.h"
     21 #include <map>
     22 
     23 namespace llvm {
     24 namespace msgpack {
     25 
     26 class ArrayDocNode;
     27 class Document;
     28 class MapDocNode;
     29 
     30 /// The kind of a DocNode and its owning Document.
     31 struct KindAndDocument {
     32   Document *Doc;
     33   Type Kind;
     34 };
     35 
     36 /// A node in a MsgPack Document. This is a simple copyable and
     37 /// passable-by-value type that does not own any memory.
     38 class DocNode {
     39   friend Document;
     40 
     41 public:
     42   typedef std::map<DocNode, DocNode> MapTy;
     43   typedef std::vector<DocNode> ArrayTy;
     44 
     45 private:
     46   // Using KindAndDocument allows us to squeeze Kind and a pointer to the
     47   // owning Document into the same word. Having a pointer to the owning
     48   // Document makes the API of DocNode more convenient, and allows its use in
     49   // YAMLIO.
     50   const KindAndDocument *KindAndDoc;
     51 
     52 protected:
     53   // The union of different values.
     54   union {
     55     int64_t Int;
     56     uint64_t UInt;
     57     bool Bool;
     58     double Float;
     59     StringRef Raw;
     60     ArrayTy *Array;
     61     MapTy *Map;
     62   };
     63 
     64 public:
     65   // Default constructor gives an empty node with no associated Document. All
     66   // you can do with it is "isEmpty()".
     67   DocNode() : KindAndDoc(nullptr) {}
     68 
     69   // Type methods
     70   bool isMap() const { return getKind() == Type::Map; }
     71   bool isArray() const { return getKind() == Type::Array; }
     72   bool isScalar() const { return !isMap() && !isArray(); }
     73   bool isString() const { return getKind() == Type::String; }
     74 
     75   // Accessors. isEmpty() returns true for both a default-constructed DocNode
     76   // that has no associated Document, and the result of getEmptyNode(), which
     77   // does have an associated document.
     78   bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; }
     79   Type getKind() const { return KindAndDoc->Kind; }
     80   Document *getDocument() const { return KindAndDoc->Doc; }
     81 
     82   int64_t &getInt() {
     83     assert(getKind() == Type::Int);
     84     return Int;
     85   }
     86 
     87   uint64_t &getUInt() {
     88     assert(getKind() == Type::UInt);
     89     return UInt;
     90   }
     91 
     92   bool &getBool() {
     93     assert(getKind() == Type::Boolean);
     94     return Bool;
     95   }
     96 
     97   double &getFloat() {
     98     assert(getKind() == Type::Float);
     99     return Float;
    100   }
    101 
    102   int64_t getInt() const {
    103     assert(getKind() == Type::Int);
    104     return Int;
    105   }
    106 
    107   uint64_t getUInt() const {
    108     assert(getKind() == Type::UInt);
    109     return UInt;
    110   }
    111 
    112   bool getBool() const {
    113     assert(getKind() == Type::Boolean);
    114     return Bool;
    115   }
    116 
    117   double getFloat() const {
    118     assert(getKind() == Type::Float);
    119     return Float;
    120   }
    121 
    122   StringRef getString() const {
    123     assert(getKind() == Type::String);
    124     return Raw;
    125   }
    126 
    127   /// Get an ArrayDocNode for an array node. If Convert, convert the node to an
    128   /// array node if necessary.
    129   ArrayDocNode &getArray(bool Convert = false) {
    130     if (getKind() != Type::Array) {
    131       assert(Convert);
    132       convertToArray();
    133     }
    134     // This could be a static_cast, except ArrayDocNode is a forward reference.
    135     return *reinterpret_cast<ArrayDocNode *>(this);
    136   }
    137 
    138   /// Get a MapDocNode for a map node. If Convert, convert the node to a map
    139   /// node if necessary.
    140   MapDocNode &getMap(bool Convert = false) {
    141     if (getKind() != Type::Map) {
    142       assert(Convert);
    143       convertToMap();
    144     }
    145     // This could be a static_cast, except MapDocNode is a forward reference.
    146     return *reinterpret_cast<MapDocNode *>(this);
    147   }
    148 
    149   /// Comparison operator, used for map keys.
    150   friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) {
    151     // This has to cope with one or both of the nodes being default-constructed,
    152     // such that KindAndDoc is not set.
    153     if (Rhs.isEmpty())
    154       return false;
    155     if (Lhs.KindAndDoc != Rhs.KindAndDoc) {
    156       if (Lhs.isEmpty())
    157         return true;
    158       return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind();
    159     }
    160     switch (Lhs.getKind()) {
    161     case Type::Int:
    162       return Lhs.Int < Rhs.Int;
    163     case Type::UInt:
    164       return Lhs.UInt < Rhs.UInt;
    165     case Type::Nil:
    166       return false;
    167     case Type::Boolean:
    168       return Lhs.Bool < Rhs.Bool;
    169     case Type::Float:
    170       return Lhs.Float < Rhs.Float;
    171     case Type::String:
    172     case Type::Binary:
    173       return Lhs.Raw < Rhs.Raw;
    174     default:
    175       llvm_unreachable("bad map key type");
    176     }
    177   }
    178 
    179   /// Equality operator
    180   friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) {
    181     return !(Lhs < Rhs) && !(Rhs < Lhs);
    182   }
    183 
    184   /// Inequality operator
    185   friend bool operator!=(const DocNode &Lhs, const DocNode &Rhs) {
    186     return !(Lhs == Rhs);
    187   }
    188 
    189   /// Convert this node to a string, assuming it is scalar.
    190   std::string toString() const;
    191 
    192   /// Convert the StringRef and use it to set this DocNode (assuming scalar). If
    193   /// it is a string, copy the string into the Document's strings list so we do
    194   /// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag.
    195   StringRef fromString(StringRef S, StringRef Tag = "");
    196 
    197   /// Convenience assignment operators. This only works if the destination
    198   /// DocNode has an associated Document, i.e. it was not constructed using the
    199   /// default constructor. The string one does not copy, so the string must
    200   /// remain valid for the lifetime of the Document. Use fromString to avoid
    201   /// that restriction.
    202   DocNode &operator=(const char *Val) { return *this = StringRef(Val); }
    203   DocNode &operator=(StringRef Val);
    204   DocNode &operator=(bool Val);
    205   DocNode &operator=(int Val);
    206   DocNode &operator=(unsigned Val);
    207   DocNode &operator=(int64_t Val);
    208   DocNode &operator=(uint64_t Val);
    209 
    210 private:
    211   // Private constructor setting KindAndDoc, used by methods in Document.
    212   DocNode(const KindAndDocument *KindAndDoc) : KindAndDoc(KindAndDoc) {}
    213 
    214   void convertToArray();
    215   void convertToMap();
    216 };
    217 
    218 /// A DocNode that is a map.
    219 class MapDocNode : public DocNode {
    220 public:
    221   MapDocNode() {}
    222   MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); }
    223 
    224   // Map access methods.
    225   size_t size() const { return Map->size(); }
    226   bool empty() const { return !size(); }
    227   MapTy::iterator begin() { return Map->begin(); }
    228   MapTy::iterator end() { return Map->end(); }
    229   MapTy::iterator find(DocNode Key) { return Map->find(Key); }
    230   MapTy::iterator find(StringRef Key);
    231   MapTy::iterator erase(MapTy::const_iterator I) { return Map->erase(I); }
    232   size_t erase(DocNode Key) { return Map->erase(Key); }
    233   MapTy::iterator erase(MapTy::const_iterator First,
    234                         MapTy::const_iterator Second) {
    235     return Map->erase(First, Second);
    236   }
    237   /// Member access. The string data must remain valid for the lifetime of the
    238   /// Document.
    239   DocNode &operator[](StringRef S);
    240   /// Member access, with convenience versions for an integer key.
    241   DocNode &operator[](DocNode Key);
    242   DocNode &operator[](int Key);
    243   DocNode &operator[](unsigned Key);
    244   DocNode &operator[](int64_t Key);
    245   DocNode &operator[](uint64_t Key);
    246 };
    247 
    248 /// A DocNode that is an array.
    249 class ArrayDocNode : public DocNode {
    250 public:
    251   ArrayDocNode() {}
    252   ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); }
    253 
    254   // Array access methods.
    255   size_t size() const { return Array->size(); }
    256   bool empty() const { return !size(); }
    257   DocNode &back() const { return Array->back(); }
    258   ArrayTy::iterator begin() { return Array->begin(); }
    259   ArrayTy::iterator end() { return Array->end(); }
    260   void push_back(DocNode N) {
    261     assert(N.isEmpty() || N.getDocument() == getDocument());
    262     Array->push_back(N);
    263   }
    264 
    265   /// Element access. This extends the array if necessary, with empty nodes.
    266   DocNode &operator[](size_t Index);
    267 };
    268 
    269 /// Simple in-memory representation of a document of msgpack objects with
    270 /// ability to find and create array and map elements.  Does not currently cope
    271 /// with any extension types.
    272 class Document {
    273   // Maps, arrays and strings used by nodes in the document. No attempt is made
    274   // to free unused ones.
    275   std::vector<std::unique_ptr<DocNode::MapTy>> Maps;
    276   std::vector<std::unique_ptr<DocNode::ArrayTy>> Arrays;
    277   std::vector<std::unique_ptr<char[]>> Strings;
    278 
    279   // The root node of the document.
    280   DocNode Root;
    281 
    282   // The KindAndDocument structs pointed to by nodes in the document.
    283   KindAndDocument KindAndDocs[size_t(Type::Empty) + 1];
    284 
    285   // Whether YAML output uses hex for UInt.
    286   bool HexMode = false;
    287 
    288 public:
    289   Document() {
    290     clear();
    291     for (unsigned T = 0; T != unsigned(Type::Empty) + 1; ++T)
    292       KindAndDocs[T] = {this, Type(T)};
    293   }
    294 
    295   /// Get ref to the document's root element.
    296   DocNode &getRoot() { return Root; }
    297 
    298   /// Restore the Document to an empty state.
    299   void clear() { getRoot() = getEmptyNode(); }
    300 
    301   /// Create an empty node associated with this Document.
    302   DocNode getEmptyNode() {
    303     auto N = DocNode(&KindAndDocs[size_t(Type::Empty)]);
    304     return N;
    305   }
    306 
    307   /// Create a nil node associated with this Document.
    308   DocNode getNode() {
    309     auto N = DocNode(&KindAndDocs[size_t(Type::Nil)]);
    310     return N;
    311   }
    312 
    313   /// Create an Int node associated with this Document.
    314   DocNode getNode(int64_t V) {
    315     auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
    316     N.Int = V;
    317     return N;
    318   }
    319 
    320   /// Create an Int node associated with this Document.
    321   DocNode getNode(int V) {
    322     auto N = DocNode(&KindAndDocs[size_t(Type::Int)]);
    323     N.Int = V;
    324     return N;
    325   }
    326 
    327   /// Create a UInt node associated with this Document.
    328   DocNode getNode(uint64_t V) {
    329     auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
    330     N.UInt = V;
    331     return N;
    332   }
    333 
    334   /// Create a UInt node associated with this Document.
    335   DocNode getNode(unsigned V) {
    336     auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]);
    337     N.UInt = V;
    338     return N;
    339   }
    340 
    341   /// Create a Boolean node associated with this Document.
    342   DocNode getNode(bool V) {
    343     auto N = DocNode(&KindAndDocs[size_t(Type::Boolean)]);
    344     N.Bool = V;
    345     return N;
    346   }
    347 
    348   /// Create a Float node associated with this Document.
    349   DocNode getNode(double V) {
    350     auto N = DocNode(&KindAndDocs[size_t(Type::Float)]);
    351     N.Float = V;
    352     return N;
    353   }
    354 
    355   /// Create a String node associated with this Document. If !Copy, the passed
    356   /// string must remain valid for the lifetime of the Document.
    357   DocNode getNode(StringRef V, bool Copy = false) {
    358     if (Copy)
    359       V = addString(V);
    360     auto N = DocNode(&KindAndDocs[size_t(Type::String)]);
    361     N.Raw = V;
    362     return N;
    363   }
    364 
    365   /// Create a String node associated with this Document. If !Copy, the passed
    366   /// string must remain valid for the lifetime of the Document.
    367   DocNode getNode(const char *V, bool Copy = false) {
    368     return getNode(StringRef(V), Copy);
    369   }
    370 
    371   /// Create an empty Map node associated with this Document.
    372   MapDocNode getMapNode() {
    373     auto N = DocNode(&KindAndDocs[size_t(Type::Map)]);
    374     Maps.push_back(std::unique_ptr<DocNode::MapTy>(new DocNode::MapTy));
    375     N.Map = Maps.back().get();
    376     return N.getMap();
    377   }
    378 
    379   /// Create an empty Array node associated with this Document.
    380   ArrayDocNode getArrayNode() {
    381     auto N = DocNode(&KindAndDocs[size_t(Type::Array)]);
    382     Arrays.push_back(std::unique_ptr<DocNode::ArrayTy>(new DocNode::ArrayTy));
    383     N.Array = Arrays.back().get();
    384     return N.getArray();
    385   }
    386 
    387   /// Read a document from a binary msgpack blob, merging into anything already
    388   /// in the Document. The blob data must remain valid for the lifetime of this
    389   /// Document (because a string object in the document contains a StringRef
    390   /// into the original blob). If Multi, then this sets root to an array and
    391   /// adds top-level objects to it. If !Multi, then it only reads a single
    392   /// top-level object, even if there are more, and sets root to that. Returns
    393   /// false if failed due to illegal format or merge error.
    394   ///
    395   /// The Merger arg is a callback function that is called when the merge has a
    396   /// conflict, that is, it is trying to set an item that is already set. If the
    397   /// conflict cannot be resolved, the callback function returns -1. If the
    398   /// conflict can be resolved, the callback returns a non-negative number and
    399   /// sets *DestNode to the resolved node. The returned non-negative number is
    400   /// significant only for an array node; it is then the array index to start
    401   /// populating at. That allows Merger to choose whether to merge array
    402   /// elements (returns 0) or append new elements (returns existing size).
    403   ///
    404   /// If SrcNode is an array or map, the resolution must be that *DestNode is an
    405   /// array or map respectively, although it could be the array or map
    406   /// (respectively) that was already there. MapKey is the key if *DestNode is a
    407   /// map entry, a nil node otherwise.
    408   ///
    409   /// The default for Merger is to disallow any conflict.
    410   bool readFromBlob(
    411       StringRef Blob, bool Multi,
    412       function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)>
    413           Merger = [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) {
    414             return -1;
    415           });
    416 
    417   /// Write a MsgPack document to a binary MsgPack blob.
    418   void writeToBlob(std::string &Blob);
    419 
    420   /// Copy a string into the Document's strings list, and return the copy that
    421   /// is owned by the Document.
    422   StringRef addString(StringRef S) {
    423     Strings.push_back(std::unique_ptr<char[]>(new char[S.size()]));
    424     memcpy(&Strings.back()[0], S.data(), S.size());
    425     return StringRef(&Strings.back()[0], S.size());
    426   }
    427 
    428   /// Set whether YAML output uses hex for UInt. Default off.
    429   void setHexMode(bool Val = true) { HexMode = Val; }
    430 
    431   /// Get Hexmode flag.
    432   bool getHexMode() const { return HexMode; }
    433 
    434   /// Convert MsgPack Document to YAML text.
    435   void toYAML(raw_ostream &OS);
    436 
    437   /// Read YAML text into the MsgPack document. Returns false on failure.
    438   bool fromYAML(StringRef S);
    439 };
    440 
    441 } // namespace msgpack
    442 } // namespace llvm
    443 
    444 #endif // LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H
    445