1 //===-- MsgPackDocument.h - MsgPack Document --------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file declares a class that exposes a simple in-memory representation 10 /// of a document of MsgPack objects, that can be read from MsgPack, written to 11 /// MsgPack, and inspected and modified in memory. This is intended to be a 12 /// lighter-weight (in terms of memory allocations) replacement for 13 /// MsgPackTypes. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #ifndef LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H 18 #define LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H 19 20 #include "llvm/BinaryFormat/MsgPackReader.h" 21 #include <map> 22 23 namespace llvm { 24 namespace msgpack { 25 26 class ArrayDocNode; 27 class Document; 28 class MapDocNode; 29 30 /// The kind of a DocNode and its owning Document. 31 struct KindAndDocument { 32 Document *Doc; 33 Type Kind; 34 }; 35 36 /// A node in a MsgPack Document. This is a simple copyable and 37 /// passable-by-value type that does not own any memory. 38 class DocNode { 39 friend Document; 40 41 public: 42 typedef std::map<DocNode, DocNode> MapTy; 43 typedef std::vector<DocNode> ArrayTy; 44 45 private: 46 // Using KindAndDocument allows us to squeeze Kind and a pointer to the 47 // owning Document into the same word. Having a pointer to the owning 48 // Document makes the API of DocNode more convenient, and allows its use in 49 // YAMLIO. 50 const KindAndDocument *KindAndDoc; 51 52 protected: 53 // The union of different values. 54 union { 55 int64_t Int; 56 uint64_t UInt; 57 bool Bool; 58 double Float; 59 StringRef Raw; 60 ArrayTy *Array; 61 MapTy *Map; 62 }; 63 64 public: 65 // Default constructor gives an empty node with no associated Document. All 66 // you can do with it is "isEmpty()". DocNode()67 DocNode() : KindAndDoc(nullptr) {} 68 69 // Type methods isMap()70 bool isMap() const { return getKind() == Type::Map; } isArray()71 bool isArray() const { return getKind() == Type::Array; } isScalar()72 bool isScalar() const { return !isMap() && !isArray(); } isString()73 bool isString() const { return getKind() == Type::String; } 74 75 // Accessors. isEmpty() returns true for both a default-constructed DocNode 76 // that has no associated Document, and the result of getEmptyNode(), which 77 // does have an associated document. isEmpty()78 bool isEmpty() const { return !KindAndDoc || getKind() == Type::Empty; } getKind()79 Type getKind() const { return KindAndDoc->Kind; } getDocument()80 Document *getDocument() const { return KindAndDoc->Doc; } 81 getInt()82 int64_t &getInt() { 83 assert(getKind() == Type::Int); 84 return Int; 85 } 86 getUInt()87 uint64_t &getUInt() { 88 assert(getKind() == Type::UInt); 89 return UInt; 90 } 91 getBool()92 bool &getBool() { 93 assert(getKind() == Type::Boolean); 94 return Bool; 95 } 96 getFloat()97 double &getFloat() { 98 assert(getKind() == Type::Float); 99 return Float; 100 } 101 getInt()102 int64_t getInt() const { 103 assert(getKind() == Type::Int); 104 return Int; 105 } 106 getUInt()107 uint64_t getUInt() const { 108 assert(getKind() == Type::UInt); 109 return UInt; 110 } 111 getBool()112 bool getBool() const { 113 assert(getKind() == Type::Boolean); 114 return Bool; 115 } 116 getFloat()117 double getFloat() const { 118 assert(getKind() == Type::Float); 119 return Float; 120 } 121 getString()122 StringRef getString() const { 123 assert(getKind() == Type::String); 124 return Raw; 125 } 126 getBinary()127 MemoryBufferRef getBinary() const { 128 assert(getKind() == Type::Binary); 129 return MemoryBufferRef(Raw, ""); 130 } 131 132 /// Get an ArrayDocNode for an array node. If Convert, convert the node to an 133 /// array node if necessary. 134 ArrayDocNode &getArray(bool Convert = false) { 135 if (getKind() != Type::Array) { 136 assert(Convert); 137 convertToArray(); 138 } 139 // This could be a static_cast, except ArrayDocNode is a forward reference. 140 return *reinterpret_cast<ArrayDocNode *>(this); 141 } 142 143 /// Get a MapDocNode for a map node. If Convert, convert the node to a map 144 /// node if necessary. 145 MapDocNode &getMap(bool Convert = false) { 146 if (getKind() != Type::Map) { 147 assert(Convert); 148 convertToMap(); 149 } 150 // This could be a static_cast, except MapDocNode is a forward reference. 151 return *reinterpret_cast<MapDocNode *>(this); 152 } 153 154 /// Comparison operator, used for map keys. 155 friend bool operator<(const DocNode &Lhs, const DocNode &Rhs) { 156 // This has to cope with one or both of the nodes being default-constructed, 157 // such that KindAndDoc is not set. 158 if (Rhs.isEmpty()) 159 return false; 160 if (Lhs.KindAndDoc != Rhs.KindAndDoc) { 161 if (Lhs.isEmpty()) 162 return true; 163 return (unsigned)Lhs.getKind() < (unsigned)Rhs.getKind(); 164 } 165 switch (Lhs.getKind()) { 166 case Type::Int: 167 return Lhs.Int < Rhs.Int; 168 case Type::UInt: 169 return Lhs.UInt < Rhs.UInt; 170 case Type::Nil: 171 return false; 172 case Type::Boolean: 173 return Lhs.Bool < Rhs.Bool; 174 case Type::Float: 175 return Lhs.Float < Rhs.Float; 176 case Type::String: 177 case Type::Binary: 178 return Lhs.Raw < Rhs.Raw; 179 default: 180 llvm_unreachable("bad map key type"); 181 } 182 } 183 184 /// Equality operator 185 friend bool operator==(const DocNode &Lhs, const DocNode &Rhs) { 186 return !(Lhs < Rhs) && !(Rhs < Lhs); 187 } 188 189 /// Inequality operator 190 friend bool operator!=(const DocNode &Lhs, const DocNode &Rhs) { 191 return !(Lhs == Rhs); 192 } 193 194 /// Convert this node to a string, assuming it is scalar. 195 std::string toString() const; 196 197 /// Convert the StringRef and use it to set this DocNode (assuming scalar). If 198 /// it is a string, copy the string into the Document's strings list so we do 199 /// not rely on S having a lifetime beyond this call. Tag is "" or a YAML tag. 200 StringRef fromString(StringRef S, StringRef Tag = ""); 201 202 /// Convenience assignment operators. This only works if the destination 203 /// DocNode has an associated Document, i.e. it was not constructed using the 204 /// default constructor. The string one does not copy, so the string must 205 /// remain valid for the lifetime of the Document. Use fromString to avoid 206 /// that restriction. 207 DocNode &operator=(const char *Val) { return *this = StringRef(Val); } 208 DocNode &operator=(StringRef Val); 209 DocNode &operator=(MemoryBufferRef Val); 210 DocNode &operator=(bool Val); 211 DocNode &operator=(int Val); 212 DocNode &operator=(unsigned Val); 213 DocNode &operator=(int64_t Val); 214 DocNode &operator=(uint64_t Val); 215 216 private: 217 // Private constructor setting KindAndDoc, used by methods in Document. DocNode(const KindAndDocument * KindAndDoc)218 DocNode(const KindAndDocument *KindAndDoc) : KindAndDoc(KindAndDoc) {} 219 220 void convertToArray(); 221 void convertToMap(); 222 }; 223 224 /// A DocNode that is a map. 225 class MapDocNode : public DocNode { 226 public: 227 MapDocNode() = default; MapDocNode(DocNode & N)228 MapDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Map); } 229 230 // Map access methods. size()231 size_t size() const { return Map->size(); } empty()232 bool empty() const { return !size(); } begin()233 MapTy::iterator begin() { return Map->begin(); } end()234 MapTy::iterator end() { return Map->end(); } find(DocNode Key)235 MapTy::iterator find(DocNode Key) { return Map->find(Key); } 236 MapTy::iterator find(StringRef Key); erase(MapTy::const_iterator I)237 MapTy::iterator erase(MapTy::const_iterator I) { return Map->erase(I); } erase(DocNode Key)238 size_t erase(DocNode Key) { return Map->erase(Key); } erase(MapTy::const_iterator First,MapTy::const_iterator Second)239 MapTy::iterator erase(MapTy::const_iterator First, 240 MapTy::const_iterator Second) { 241 return Map->erase(First, Second); 242 } 243 /// Member access. The string data must remain valid for the lifetime of the 244 /// Document. 245 DocNode &operator[](StringRef S); 246 /// Member access, with convenience versions for an integer key. 247 DocNode &operator[](DocNode Key); 248 DocNode &operator[](int Key); 249 DocNode &operator[](unsigned Key); 250 DocNode &operator[](int64_t Key); 251 DocNode &operator[](uint64_t Key); 252 }; 253 254 /// A DocNode that is an array. 255 class ArrayDocNode : public DocNode { 256 public: 257 ArrayDocNode() = default; ArrayDocNode(DocNode & N)258 ArrayDocNode(DocNode &N) : DocNode(N) { assert(getKind() == Type::Array); } 259 260 // Array access methods. size()261 size_t size() const { return Array->size(); } empty()262 bool empty() const { return !size(); } back()263 DocNode &back() const { return Array->back(); } begin()264 ArrayTy::iterator begin() { return Array->begin(); } end()265 ArrayTy::iterator end() { return Array->end(); } push_back(DocNode N)266 void push_back(DocNode N) { 267 assert(N.isEmpty() || N.getDocument() == getDocument()); 268 Array->push_back(N); 269 } 270 271 /// Element access. This extends the array if necessary, with empty nodes. 272 DocNode &operator[](size_t Index); 273 }; 274 275 /// Simple in-memory representation of a document of msgpack objects with 276 /// ability to find and create array and map elements. Does not currently cope 277 /// with any extension types. 278 class Document { 279 // Maps, arrays and strings used by nodes in the document. No attempt is made 280 // to free unused ones. 281 std::vector<std::unique_ptr<DocNode::MapTy>> Maps; 282 std::vector<std::unique_ptr<DocNode::ArrayTy>> Arrays; 283 std::vector<std::unique_ptr<char[]>> Strings; 284 285 // The root node of the document. 286 DocNode Root; 287 288 // The KindAndDocument structs pointed to by nodes in the document. 289 KindAndDocument KindAndDocs[size_t(Type::Empty) + 1]; 290 291 // Whether YAML output uses hex for UInt. 292 bool HexMode = false; 293 294 public: Document()295 Document() { 296 clear(); 297 for (unsigned T = 0; T != unsigned(Type::Empty) + 1; ++T) 298 KindAndDocs[T] = {this, Type(T)}; 299 } 300 301 /// Get ref to the document's root element. getRoot()302 DocNode &getRoot() { return Root; } 303 304 /// Restore the Document to an empty state. clear()305 void clear() { getRoot() = getEmptyNode(); } 306 307 /// Create an empty node associated with this Document. getEmptyNode()308 DocNode getEmptyNode() { 309 auto N = DocNode(&KindAndDocs[size_t(Type::Empty)]); 310 return N; 311 } 312 313 /// Create a nil node associated with this Document. getNode()314 DocNode getNode() { 315 auto N = DocNode(&KindAndDocs[size_t(Type::Nil)]); 316 return N; 317 } 318 319 /// Create an Int node associated with this Document. getNode(int64_t V)320 DocNode getNode(int64_t V) { 321 auto N = DocNode(&KindAndDocs[size_t(Type::Int)]); 322 N.Int = V; 323 return N; 324 } 325 326 /// Create an Int node associated with this Document. getNode(int V)327 DocNode getNode(int V) { 328 auto N = DocNode(&KindAndDocs[size_t(Type::Int)]); 329 N.Int = V; 330 return N; 331 } 332 333 /// Create a UInt node associated with this Document. getNode(uint64_t V)334 DocNode getNode(uint64_t V) { 335 auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]); 336 N.UInt = V; 337 return N; 338 } 339 340 /// Create a UInt node associated with this Document. getNode(unsigned V)341 DocNode getNode(unsigned V) { 342 auto N = DocNode(&KindAndDocs[size_t(Type::UInt)]); 343 N.UInt = V; 344 return N; 345 } 346 347 /// Create a Boolean node associated with this Document. getNode(bool V)348 DocNode getNode(bool V) { 349 auto N = DocNode(&KindAndDocs[size_t(Type::Boolean)]); 350 N.Bool = V; 351 return N; 352 } 353 354 /// Create a Float node associated with this Document. getNode(double V)355 DocNode getNode(double V) { 356 auto N = DocNode(&KindAndDocs[size_t(Type::Float)]); 357 N.Float = V; 358 return N; 359 } 360 361 /// Create a String node associated with this Document. If !Copy, the passed 362 /// string must remain valid for the lifetime of the Document. 363 DocNode getNode(StringRef V, bool Copy = false) { 364 if (Copy) 365 V = addString(V); 366 auto N = DocNode(&KindAndDocs[size_t(Type::String)]); 367 N.Raw = V; 368 return N; 369 } 370 371 /// Create a String node associated with this Document. If !Copy, the passed 372 /// string must remain valid for the lifetime of the Document. 373 DocNode getNode(const char *V, bool Copy = false) { 374 return getNode(StringRef(V), Copy); 375 } 376 377 /// Create a Binary node associated with this Document. If !Copy, the passed 378 /// buffer must remain valid for the lifetime of the Document. 379 DocNode getNode(MemoryBufferRef V, bool Copy = false) { 380 auto Raw = V.getBuffer(); 381 if (Copy) 382 Raw = addString(Raw); 383 auto N = DocNode(&KindAndDocs[size_t(Type::Binary)]); 384 N.Raw = Raw; 385 return N; 386 } 387 388 /// Create an empty Map node associated with this Document. getMapNode()389 MapDocNode getMapNode() { 390 auto N = DocNode(&KindAndDocs[size_t(Type::Map)]); 391 Maps.push_back(std::make_unique<DocNode::MapTy>()); 392 N.Map = Maps.back().get(); 393 return N.getMap(); 394 } 395 396 /// Create an empty Array node associated with this Document. getArrayNode()397 ArrayDocNode getArrayNode() { 398 auto N = DocNode(&KindAndDocs[size_t(Type::Array)]); 399 Arrays.push_back(std::make_unique<DocNode::ArrayTy>()); 400 N.Array = Arrays.back().get(); 401 return N.getArray(); 402 } 403 404 /// Read a document from a binary msgpack blob, merging into anything already 405 /// in the Document. The blob data must remain valid for the lifetime of this 406 /// Document (because a string object in the document contains a StringRef 407 /// into the original blob). If Multi, then this sets root to an array and 408 /// adds top-level objects to it. If !Multi, then it only reads a single 409 /// top-level object, even if there are more, and sets root to that. Returns 410 /// false if failed due to illegal format or merge error. 411 /// 412 /// The Merger arg is a callback function that is called when the merge has a 413 /// conflict, that is, it is trying to set an item that is already set. If the 414 /// conflict cannot be resolved, the callback function returns -1. If the 415 /// conflict can be resolved, the callback returns a non-negative number and 416 /// sets *DestNode to the resolved node. The returned non-negative number is 417 /// significant only for an array node; it is then the array index to start 418 /// populating at. That allows Merger to choose whether to merge array 419 /// elements (returns 0) or append new elements (returns existing size). 420 /// 421 /// If SrcNode is an array or map, the resolution must be that *DestNode is an 422 /// array or map respectively, although it could be the array or map 423 /// (respectively) that was already there. MapKey is the key if *DestNode is a 424 /// map entry, a nil node otherwise. 425 /// 426 /// The default for Merger is to disallow any conflict. 427 bool readFromBlob( 428 StringRef Blob, bool Multi, 429 function_ref<int(DocNode *DestNode, DocNode SrcNode, DocNode MapKey)> 430 Merger = [](DocNode *DestNode, DocNode SrcNode, DocNode MapKey) { 431 return -1; 432 }); 433 434 /// Write a MsgPack document to a binary MsgPack blob. 435 void writeToBlob(std::string &Blob); 436 437 /// Copy a string into the Document's strings list, and return the copy that 438 /// is owned by the Document. addString(StringRef S)439 StringRef addString(StringRef S) { 440 Strings.push_back(std::unique_ptr<char[]>(new char[S.size()])); 441 memcpy(&Strings.back()[0], S.data(), S.size()); 442 return StringRef(&Strings.back()[0], S.size()); 443 } 444 445 /// Set whether YAML output uses hex for UInt. Default off. 446 void setHexMode(bool Val = true) { HexMode = Val; } 447 448 /// Get Hexmode flag. getHexMode()449 bool getHexMode() const { return HexMode; } 450 451 /// Convert MsgPack Document to YAML text. 452 void toYAML(raw_ostream &OS); 453 454 /// Read YAML text into the MsgPack document. Returns false on failure. 455 bool fromYAML(StringRef S); 456 }; 457 458 } // namespace msgpack 459 } // namespace llvm 460 461 #endif // LLVM_BINARYFORMAT_MSGPACKDOCUMENT_H 462