1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H 10 #define LLVM_SUPPORT_DATAEXTRACTOR_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/Support/DataTypes.h" 14 15 namespace llvm { 16 17 /// An auxiliary type to facilitate extraction of 3-byte entities. 18 struct Uint24 { 19 uint8_t Bytes[3]; 20 Uint24(uint8_t U) { 21 Bytes[0] = Bytes[1] = Bytes[2] = U; 22 } 23 Uint24(uint8_t U0, uint8_t U1, uint8_t U2) { 24 Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2; 25 } 26 uint32_t getAsUint32(bool IsLittleEndian) const { 27 int LoIx = IsLittleEndian ? 0 : 2; 28 return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16); 29 } 30 }; 31 32 using uint24_t = Uint24; 33 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3"); 34 35 /// Needed by swapByteOrder(). 36 inline uint24_t getSwappedBytes(uint24_t C) { 37 return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]); 38 } 39 40 class DataExtractor { 41 StringRef Data; 42 uint8_t IsLittleEndian; 43 uint8_t AddressSize; 44 public: 45 /// Construct with a buffer that is owned by the caller. 46 /// 47 /// This constructor allows us to use data that is owned by the 48 /// caller. The data must stay around as long as this object is 49 /// valid. 50 DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize) 51 : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {} 52 53 /// Get the data pointed to by this extractor. 54 StringRef getData() const { return Data; } 55 /// Get the endianness for this extractor. 56 bool isLittleEndian() const { return IsLittleEndian; } 57 /// Get the address size for this extractor. 58 uint8_t getAddressSize() const { return AddressSize; } 59 /// Set the address size for this extractor. 60 void setAddressSize(uint8_t Size) { AddressSize = Size; } 61 62 /// Extract a C string from \a *offset_ptr. 63 /// 64 /// Returns a pointer to a C String from the data at the offset 65 /// pointed to by \a offset_ptr. A variable length NULL terminated C 66 /// string will be extracted and the \a offset_ptr will be 67 /// updated with the offset of the byte that follows the NULL 68 /// terminator byte. 69 /// 70 /// @param[in,out] offset_ptr 71 /// A pointer to an offset within the data that will be advanced 72 /// by the appropriate number of bytes if the value is extracted 73 /// correctly. If the offset is out of bounds or there are not 74 /// enough bytes to extract this value, the offset will be left 75 /// unmodified. 76 /// 77 /// @return 78 /// A pointer to the C string value in the data. If the offset 79 /// pointed to by \a offset_ptr is out of bounds, or if the 80 /// offset plus the length of the C string is out of bounds, 81 /// NULL will be returned. 82 const char *getCStr(uint32_t *offset_ptr) const; 83 84 /// Extract a C string from \a *OffsetPtr. 85 /// 86 /// Returns a StringRef for the C String from the data at the offset 87 /// pointed to by \a OffsetPtr. A variable length NULL terminated C 88 /// string will be extracted and the \a OffsetPtr will be 89 /// updated with the offset of the byte that follows the NULL 90 /// terminator byte. 91 /// 92 /// \param[in,out] OffsetPtr 93 /// A pointer to an offset within the data that will be advanced 94 /// by the appropriate number of bytes if the value is extracted 95 /// correctly. If the offset is out of bounds or there are not 96 /// enough bytes to extract this value, the offset will be left 97 /// unmodified. 98 /// 99 /// \return 100 /// A StringRef for the C string value in the data. If the offset 101 /// pointed to by \a OffsetPtr is out of bounds, or if the 102 /// offset plus the length of the C string is out of bounds, 103 /// a default-initialized StringRef will be returned. 104 StringRef getCStrRef(uint32_t *OffsetPtr) const; 105 106 /// Extract an unsigned integer of size \a byte_size from \a 107 /// *offset_ptr. 108 /// 109 /// Extract a single unsigned integer value and update the offset 110 /// pointed to by \a offset_ptr. The size of the extracted integer 111 /// is specified by the \a byte_size argument. \a byte_size should 112 /// have a value greater than or equal to one and less than or equal 113 /// to eight since the return value is 64 bits wide. Any 114 /// \a byte_size values less than 1 or greater than 8 will result in 115 /// nothing being extracted, and zero being returned. 116 /// 117 /// @param[in,out] offset_ptr 118 /// A pointer to an offset within the data that will be advanced 119 /// by the appropriate number of bytes if the value is extracted 120 /// correctly. If the offset is out of bounds or there are not 121 /// enough bytes to extract this value, the offset will be left 122 /// unmodified. 123 /// 124 /// @param[in] byte_size 125 /// The size in byte of the integer to extract. 126 /// 127 /// @return 128 /// The unsigned integer value that was extracted, or zero on 129 /// failure. 130 uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const; 131 132 /// Extract an signed integer of size \a byte_size from \a *offset_ptr. 133 /// 134 /// Extract a single signed integer value (sign extending if required) 135 /// and update the offset pointed to by \a offset_ptr. The size of 136 /// the extracted integer is specified by the \a byte_size argument. 137 /// \a byte_size should have a value greater than or equal to one 138 /// and less than or equal to eight since the return value is 64 139 /// bits wide. Any \a byte_size values less than 1 or greater than 140 /// 8 will result in nothing being extracted, and zero being returned. 141 /// 142 /// @param[in,out] offset_ptr 143 /// A pointer to an offset within the data that will be advanced 144 /// by the appropriate number of bytes if the value is extracted 145 /// correctly. If the offset is out of bounds or there are not 146 /// enough bytes to extract this value, the offset will be left 147 /// unmodified. 148 /// 149 /// @param[in] size 150 /// The size in bytes of the integer to extract. 151 /// 152 /// @return 153 /// The sign extended signed integer value that was extracted, 154 /// or zero on failure. 155 int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const; 156 157 //------------------------------------------------------------------ 158 /// Extract an pointer from \a *offset_ptr. 159 /// 160 /// Extract a single pointer from the data and update the offset 161 /// pointed to by \a offset_ptr. The size of the extracted pointer 162 /// is \a getAddressSize(), so the address size has to be 163 /// set correctly prior to extracting any pointer values. 164 /// 165 /// @param[in,out] offset_ptr 166 /// A pointer to an offset within the data that will be advanced 167 /// by the appropriate number of bytes if the value is extracted 168 /// correctly. If the offset is out of bounds or there are not 169 /// enough bytes to extract this value, the offset will be left 170 /// unmodified. 171 /// 172 /// @return 173 /// The extracted pointer value as a 64 integer. 174 uint64_t getAddress(uint32_t *offset_ptr) const { 175 return getUnsigned(offset_ptr, AddressSize); 176 } 177 178 /// Extract a uint8_t value from \a *offset_ptr. 179 /// 180 /// Extract a single uint8_t from the binary data at the offset 181 /// pointed to by \a offset_ptr, and advance the offset on success. 182 /// 183 /// @param[in,out] offset_ptr 184 /// A pointer to an offset within the data that will be advanced 185 /// by the appropriate number of bytes if the value is extracted 186 /// correctly. If the offset is out of bounds or there are not 187 /// enough bytes to extract this value, the offset will be left 188 /// unmodified. 189 /// 190 /// @return 191 /// The extracted uint8_t value. 192 uint8_t getU8(uint32_t *offset_ptr) const; 193 194 /// Extract \a count uint8_t values from \a *offset_ptr. 195 /// 196 /// Extract \a count uint8_t values from the binary data at the 197 /// offset pointed to by \a offset_ptr, and advance the offset on 198 /// success. The extracted values are copied into \a dst. 199 /// 200 /// @param[in,out] offset_ptr 201 /// A pointer to an offset within the data that will be advanced 202 /// by the appropriate number of bytes if the value is extracted 203 /// correctly. If the offset is out of bounds or there are not 204 /// enough bytes to extract this value, the offset will be left 205 /// unmodified. 206 /// 207 /// @param[out] dst 208 /// A buffer to copy \a count uint8_t values into. \a dst must 209 /// be large enough to hold all requested data. 210 /// 211 /// @param[in] count 212 /// The number of uint8_t values to extract. 213 /// 214 /// @return 215 /// \a dst if all values were properly extracted and copied, 216 /// NULL otherise. 217 uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const; 218 219 //------------------------------------------------------------------ 220 /// Extract a uint16_t value from \a *offset_ptr. 221 /// 222 /// Extract a single uint16_t from the binary data at the offset 223 /// pointed to by \a offset_ptr, and update the offset on success. 224 /// 225 /// @param[in,out] offset_ptr 226 /// A pointer to an offset within the data that will be advanced 227 /// by the appropriate number of bytes if the value is extracted 228 /// correctly. If the offset is out of bounds or there are not 229 /// enough bytes to extract this value, the offset will be left 230 /// unmodified. 231 /// 232 /// @return 233 /// The extracted uint16_t value. 234 //------------------------------------------------------------------ 235 uint16_t getU16(uint32_t *offset_ptr) const; 236 237 /// Extract \a count uint16_t values from \a *offset_ptr. 238 /// 239 /// Extract \a count uint16_t values from the binary data at the 240 /// offset pointed to by \a offset_ptr, and advance the offset on 241 /// success. The extracted values are copied into \a dst. 242 /// 243 /// @param[in,out] offset_ptr 244 /// A pointer to an offset within the data that will be advanced 245 /// by the appropriate number of bytes if the value is extracted 246 /// correctly. If the offset is out of bounds or there are not 247 /// enough bytes to extract this value, the offset will be left 248 /// unmodified. 249 /// 250 /// @param[out] dst 251 /// A buffer to copy \a count uint16_t values into. \a dst must 252 /// be large enough to hold all requested data. 253 /// 254 /// @param[in] count 255 /// The number of uint16_t values to extract. 256 /// 257 /// @return 258 /// \a dst if all values were properly extracted and copied, 259 /// NULL otherise. 260 uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const; 261 262 /// Extract a 24-bit unsigned value from \a *offset_ptr and return it 263 /// in a uint32_t. 264 /// 265 /// Extract 3 bytes from the binary data at the offset pointed to by 266 /// \a offset_ptr, construct a uint32_t from them and update the offset 267 /// on success. 268 /// 269 /// @param[in,out] offset_ptr 270 /// A pointer to an offset within the data that will be advanced 271 /// by the 3 bytes if the value is extracted correctly. If the offset 272 /// is out of bounds or there are not enough bytes to extract this value, 273 /// the offset will be left unmodified. 274 /// 275 /// @return 276 /// The extracted 24-bit value represented in a uint32_t. 277 uint32_t getU24(uint32_t *offset_ptr) const; 278 279 /// Extract a uint32_t value from \a *offset_ptr. 280 /// 281 /// Extract a single uint32_t from the binary data at the offset 282 /// pointed to by \a offset_ptr, and update the offset on success. 283 /// 284 /// @param[in,out] offset_ptr 285 /// A pointer to an offset within the data that will be advanced 286 /// by the appropriate number of bytes if the value is extracted 287 /// correctly. If the offset is out of bounds or there are not 288 /// enough bytes to extract this value, the offset will be left 289 /// unmodified. 290 /// 291 /// @return 292 /// The extracted uint32_t value. 293 uint32_t getU32(uint32_t *offset_ptr) const; 294 295 /// Extract \a count uint32_t values from \a *offset_ptr. 296 /// 297 /// Extract \a count uint32_t values from the binary data at the 298 /// offset pointed to by \a offset_ptr, and advance the offset on 299 /// success. The extracted values are copied into \a dst. 300 /// 301 /// @param[in,out] offset_ptr 302 /// A pointer to an offset within the data that will be advanced 303 /// by the appropriate number of bytes if the value is extracted 304 /// correctly. If the offset is out of bounds or there are not 305 /// enough bytes to extract this value, the offset will be left 306 /// unmodified. 307 /// 308 /// @param[out] dst 309 /// A buffer to copy \a count uint32_t values into. \a dst must 310 /// be large enough to hold all requested data. 311 /// 312 /// @param[in] count 313 /// The number of uint32_t values to extract. 314 /// 315 /// @return 316 /// \a dst if all values were properly extracted and copied, 317 /// NULL otherise. 318 uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const; 319 320 /// Extract a uint64_t value from \a *offset_ptr. 321 /// 322 /// Extract a single uint64_t from the binary data at the offset 323 /// pointed to by \a offset_ptr, and update the offset on success. 324 /// 325 /// @param[in,out] offset_ptr 326 /// A pointer to an offset within the data that will be advanced 327 /// by the appropriate number of bytes if the value is extracted 328 /// correctly. If the offset is out of bounds or there are not 329 /// enough bytes to extract this value, the offset will be left 330 /// unmodified. 331 /// 332 /// @return 333 /// The extracted uint64_t value. 334 uint64_t getU64(uint32_t *offset_ptr) const; 335 336 /// Extract \a count uint64_t values from \a *offset_ptr. 337 /// 338 /// Extract \a count uint64_t values from the binary data at the 339 /// offset pointed to by \a offset_ptr, and advance the offset on 340 /// success. The extracted values are copied into \a dst. 341 /// 342 /// @param[in,out] offset_ptr 343 /// A pointer to an offset within the data that will be advanced 344 /// by the appropriate number of bytes if the value is extracted 345 /// correctly. If the offset is out of bounds or there are not 346 /// enough bytes to extract this value, the offset will be left 347 /// unmodified. 348 /// 349 /// @param[out] dst 350 /// A buffer to copy \a count uint64_t values into. \a dst must 351 /// be large enough to hold all requested data. 352 /// 353 /// @param[in] count 354 /// The number of uint64_t values to extract. 355 /// 356 /// @return 357 /// \a dst if all values were properly extracted and copied, 358 /// NULL otherise. 359 uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const; 360 361 /// Extract a signed LEB128 value from \a *offset_ptr. 362 /// 363 /// Extracts an signed LEB128 number from this object's data 364 /// starting at the offset pointed to by \a offset_ptr. The offset 365 /// pointed to by \a offset_ptr will be updated with the offset of 366 /// the byte following the last extracted byte. 367 /// 368 /// @param[in,out] offset_ptr 369 /// A pointer to an offset within the data that will be advanced 370 /// by the appropriate number of bytes if the value is extracted 371 /// correctly. If the offset is out of bounds or there are not 372 /// enough bytes to extract this value, the offset will be left 373 /// unmodified. 374 /// 375 /// @return 376 /// The extracted signed integer value. 377 int64_t getSLEB128(uint32_t *offset_ptr) const; 378 379 /// Extract a unsigned LEB128 value from \a *offset_ptr. 380 /// 381 /// Extracts an unsigned LEB128 number from this object's data 382 /// starting at the offset pointed to by \a offset_ptr. The offset 383 /// pointed to by \a offset_ptr will be updated with the offset of 384 /// the byte following the last extracted byte. 385 /// 386 /// @param[in,out] offset_ptr 387 /// A pointer to an offset within the data that will be advanced 388 /// by the appropriate number of bytes if the value is extracted 389 /// correctly. If the offset is out of bounds or there are not 390 /// enough bytes to extract this value, the offset will be left 391 /// unmodified. 392 /// 393 /// @return 394 /// The extracted unsigned integer value. 395 uint64_t getULEB128(uint32_t *offset_ptr) const; 396 397 /// Test the validity of \a offset. 398 /// 399 /// @return 400 /// \b true if \a offset is a valid offset into the data in this 401 /// object, \b false otherwise. 402 bool isValidOffset(uint32_t offset) const { return Data.size() > offset; } 403 404 /// Test the availability of \a length bytes of data from \a offset. 405 /// 406 /// @return 407 /// \b true if \a offset is a valid offset and there are \a 408 /// length bytes available at that offset, \b false otherwise. 409 bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const { 410 return offset + length >= offset && isValidOffset(offset + length - 1); 411 } 412 413 /// Test the availability of enough bytes of data for a pointer from 414 /// \a offset. The size of a pointer is \a getAddressSize(). 415 /// 416 /// @return 417 /// \b true if \a offset is a valid offset and there are enough 418 /// bytes for a pointer available at that offset, \b false 419 /// otherwise. 420 bool isValidOffsetForAddress(uint32_t offset) const { 421 return isValidOffsetForDataOfSize(offset, AddressSize); 422 } 423 }; 424 425 } // namespace llvm 426 427 #endif 428