1 //===- MsgPackReader.h - Simple MsgPack reader ------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This is a MessagePack reader. 11 /// 12 /// See https://github.com/msgpack/msgpack/blob/master/spec.md for the full 13 /// standard. 14 /// 15 /// Typical usage: 16 /// \code 17 /// StringRef input = GetInput(); 18 /// msgpack::Reader MPReader(input); 19 /// msgpack::Object Obj; 20 /// 21 /// while (true) { 22 /// Expected<bool> ReadObj = MPReader.read(&Obj); 23 /// if (!ReadObj) 24 /// // Handle error... 25 /// if (!ReadObj.get()) 26 /// break; // Reached end of input 27 /// switch (Obj.Kind) { 28 /// case msgpack::Type::Int: 29 // // Use Obj.Int 30 /// break; 31 /// // ... 32 /// } 33 /// } 34 /// \endcode 35 /// 36 //===----------------------------------------------------------------------===// 37 38 #ifndef LLVM_BINARYFORMAT_MSGPACKREADER_H 39 #define LLVM_BINARYFORMAT_MSGPACKREADER_H 40 41 #include "llvm/Support/Error.h" 42 #include "llvm/Support/MemoryBufferRef.h" 43 #include <cstdint> 44 45 namespace llvm { 46 namespace msgpack { 47 48 /// MessagePack types as defined in the standard, with the exception of Integer 49 /// being divided into a signed Int and unsigned UInt variant in order to map 50 /// directly to C++ types. 51 /// 52 /// The types map onto corresponding union members of the \c Object struct. 53 enum class Type : uint8_t { 54 Int, 55 UInt, 56 Nil, 57 Boolean, 58 Float, 59 String, 60 Binary, 61 Array, 62 Map, 63 Extension, 64 Empty, // Used by MsgPackDocument to represent an empty node 65 }; 66 67 /// Extension types are composed of a user-defined type ID and an uninterpreted 68 /// sequence of bytes. 69 struct ExtensionType { 70 /// User-defined extension type. 71 int8_t Type; 72 /// Raw bytes of the extension object. 73 StringRef Bytes; 74 }; 75 76 /// MessagePack object, represented as a tagged union of C++ types. 77 /// 78 /// All types except \c Type::Nil (which has only one value, and so is 79 /// completely represented by the \c Kind itself) map to a exactly one union 80 /// member. 81 struct Object { 82 Type Kind; 83 union { 84 /// Value for \c Type::Int. 85 int64_t Int; 86 /// Value for \c Type::Uint. 87 uint64_t UInt; 88 /// Value for \c Type::Boolean. 89 bool Bool; 90 /// Value for \c Type::Float. 91 double Float; 92 /// Value for \c Type::String and \c Type::Binary. 93 StringRef Raw; 94 /// Value for \c Type::Array and \c Type::Map. 95 size_t Length; 96 /// Value for \c Type::Extension. 97 ExtensionType Extension; 98 }; 99 100 Object() : Kind(Type::Int), Int(0) {} 101 }; 102 103 /// Reads MessagePack objects from memory, one at a time. 104 class Reader { 105 public: 106 /// Construct a reader, keeping a reference to the \p InputBuffer. 107 Reader(MemoryBufferRef InputBuffer); 108 /// Construct a reader, keeping a reference to the \p Input. 109 Reader(StringRef Input); 110 111 Reader(const Reader &) = delete; 112 Reader &operator=(const Reader &) = delete; 113 114 /// Read one object from the input buffer, advancing past it. 115 /// 116 /// The \p Obj is updated with the kind of the object read, and the 117 /// corresponding union member is updated. 118 /// 119 /// For the collection objects (Array and Map), only the length is read, and 120 /// the caller must make and additional \c N calls (in the case of Array) or 121 /// \c N*2 calls (in the case of Map) to \c Read to retrieve the collection 122 /// elements. 123 /// 124 /// \param [out] Obj filled with next object on success. 125 /// 126 /// \returns true when object successfully read, false when at end of 127 /// input (and so \p Obj was not updated), otherwise an error. 128 Expected<bool> read(Object &Obj); 129 130 private: 131 MemoryBufferRef InputBuffer; 132 StringRef::iterator Current; 133 StringRef::iterator End; 134 135 size_t remainingSpace() { 136 // The rest of the code maintains the invariant that End >= Current, so 137 // that this cast is always defined behavior. 138 return static_cast<size_t>(End - Current); 139 } 140 141 template <class T> Expected<bool> readRaw(Object &Obj); 142 template <class T> Expected<bool> readInt(Object &Obj); 143 template <class T> Expected<bool> readUInt(Object &Obj); 144 template <class T> Expected<bool> readLength(Object &Obj); 145 template <class T> Expected<bool> readExt(Object &Obj); 146 Expected<bool> createRaw(Object &Obj, uint32_t Size); 147 Expected<bool> createExt(Object &Obj, uint32_t Size); 148 }; 149 150 } // end namespace msgpack 151 } // end namespace llvm 152 153 #endif // LLVM_BINARYFORMAT_MSGPACKREADER_H 154