1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H 10 #define LLVM_SUPPORT_DATAEXTRACTOR_H 11 12 #include "llvm/ADT/StringRef.h" 13 #include "llvm/Support/DataTypes.h" 14 #include "llvm/Support/Error.h" 15 16 namespace llvm { 17 18 /// An auxiliary type to facilitate extraction of 3-byte entities. 19 struct Uint24 { 20 uint8_t Bytes[3]; 21 Uint24(uint8_t U) { 22 Bytes[0] = Bytes[1] = Bytes[2] = U; 23 } 24 Uint24(uint8_t U0, uint8_t U1, uint8_t U2) { 25 Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2; 26 } 27 uint32_t getAsUint32(bool IsLittleEndian) const { 28 int LoIx = IsLittleEndian ? 0 : 2; 29 return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16); 30 } 31 }; 32 33 using uint24_t = Uint24; 34 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3"); 35 36 /// Needed by swapByteOrder(). 37 inline uint24_t getSwappedBytes(uint24_t C) { 38 return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]); 39 } 40 41 class DataExtractor { 42 StringRef Data; 43 uint8_t IsLittleEndian; 44 uint8_t AddressSize; 45 public: 46 /// A class representing a position in a DataExtractor, as well as any error 47 /// encountered during extraction. It enables one to extract a sequence of 48 /// values without error-checking and then checking for errors in bulk at the 49 /// end. The class holds an Error object, so failing to check the result of 50 /// the parse will result in a runtime error. The error flag is sticky and 51 /// will cause all subsequent extraction functions to fail without even 52 /// attempting to parse and without updating the Cursor offset. After clearing 53 /// the error flag, one can again use the Cursor object for parsing. 54 class Cursor { 55 uint64_t Offset; 56 Error Err; 57 58 friend class DataExtractor; 59 60 public: 61 /// Construct a cursor for extraction from the given offset. 62 explicit Cursor(uint64_t Offset) : Offset(Offset), Err(Error::success()) {} 63 64 /// Checks whether the cursor is valid (i.e. no errors were encountered). In 65 /// case of errors, this does not clear the error flag -- one must call 66 /// takeError() instead. 67 explicit operator bool() { return !Err; } 68 69 /// Return the current position of this Cursor. In the error state this is 70 /// the position of the Cursor before the first error was encountered. 71 uint64_t tell() const { return Offset; } 72 73 /// Return error contained inside this Cursor, if any. Clears the internal 74 /// Cursor state. 75 Error takeError() { return std::move(Err); } 76 }; 77 78 /// Construct with a buffer that is owned by the caller. 79 /// 80 /// This constructor allows us to use data that is owned by the 81 /// caller. The data must stay around as long as this object is 82 /// valid. 83 DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize) 84 : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {} 85 DataExtractor(ArrayRef<uint8_t> Data, bool IsLittleEndian, 86 uint8_t AddressSize) 87 : Data(StringRef(reinterpret_cast<const char *>(Data.data()), 88 Data.size())), 89 IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {} 90 91 /// Get the data pointed to by this extractor. 92 StringRef getData() const { return Data; } 93 /// Get the endianness for this extractor. 94 bool isLittleEndian() const { return IsLittleEndian; } 95 /// Get the address size for this extractor. 96 uint8_t getAddressSize() const { return AddressSize; } 97 /// Set the address size for this extractor. 98 void setAddressSize(uint8_t Size) { AddressSize = Size; } 99 100 /// Extract a C string from \a *offset_ptr. 101 /// 102 /// Returns a pointer to a C String from the data at the offset 103 /// pointed to by \a offset_ptr. A variable length NULL terminated C 104 /// string will be extracted and the \a offset_ptr will be 105 /// updated with the offset of the byte that follows the NULL 106 /// terminator byte. 107 /// 108 /// @param[in,out] offset_ptr 109 /// A pointer to an offset within the data that will be advanced 110 /// by the appropriate number of bytes if the value is extracted 111 /// correctly. If the offset is out of bounds or there are not 112 /// enough bytes to extract this value, the offset will be left 113 /// unmodified. 114 /// 115 /// @return 116 /// A pointer to the C string value in the data. If the offset 117 /// pointed to by \a offset_ptr is out of bounds, or if the 118 /// offset plus the length of the C string is out of bounds, 119 /// NULL will be returned. 120 const char *getCStr(uint64_t *offset_ptr) const; 121 122 /// Extract a C string from \a *offset_ptr. 123 /// 124 /// Returns a StringRef for the C String from the data at the offset 125 /// pointed to by \a offset_ptr. A variable length NULL terminated C 126 /// string will be extracted and the \a offset_ptr will be 127 /// updated with the offset of the byte that follows the NULL 128 /// terminator byte. 129 /// 130 /// \param[in,out] offset_ptr 131 /// A pointer to an offset within the data that will be advanced 132 /// by the appropriate number of bytes if the value is extracted 133 /// correctly. If the offset is out of bounds or there are not 134 /// enough bytes to extract this value, the offset will be left 135 /// unmodified. 136 /// 137 /// \return 138 /// A StringRef for the C string value in the data. If the offset 139 /// pointed to by \a offset_ptr is out of bounds, or if the 140 /// offset plus the length of the C string is out of bounds, 141 /// a default-initialized StringRef will be returned. 142 StringRef getCStrRef(uint64_t *offset_ptr) const; 143 144 /// Extract an unsigned integer of size \a byte_size from \a 145 /// *offset_ptr. 146 /// 147 /// Extract a single unsigned integer value and update the offset 148 /// pointed to by \a offset_ptr. The size of the extracted integer 149 /// is specified by the \a byte_size argument. \a byte_size should 150 /// have a value greater than or equal to one and less than or equal 151 /// to eight since the return value is 64 bits wide. Any 152 /// \a byte_size values less than 1 or greater than 8 will result in 153 /// nothing being extracted, and zero being returned. 154 /// 155 /// @param[in,out] offset_ptr 156 /// A pointer to an offset within the data that will be advanced 157 /// by the appropriate number of bytes if the value is extracted 158 /// correctly. If the offset is out of bounds or there are not 159 /// enough bytes to extract this value, the offset will be left 160 /// unmodified. 161 /// 162 /// @param[in] byte_size 163 /// The size in byte of the integer to extract. 164 /// 165 /// @param[in,out] Err 166 /// A pointer to an Error object. Upon return the Error object is set to 167 /// indicate the result (success/failure) of the function. If the Error 168 /// object is already set when calling this function, no extraction is 169 /// performed. 170 /// 171 /// @return 172 /// The unsigned integer value that was extracted, or zero on 173 /// failure. 174 uint64_t getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, 175 Error *Err = nullptr) const; 176 177 /// Extract an unsigned integer of the given size from the location given by 178 /// the cursor. In case of an extraction error, or if the cursor is already in 179 /// an error state, zero is returned. 180 uint64_t getUnsigned(Cursor &C, uint32_t Size) const { 181 return getUnsigned(&C.Offset, Size, &C.Err); 182 } 183 184 /// Extract an signed integer of size \a byte_size from \a *offset_ptr. 185 /// 186 /// Extract a single signed integer value (sign extending if required) 187 /// and update the offset pointed to by \a offset_ptr. The size of 188 /// the extracted integer is specified by the \a byte_size argument. 189 /// \a byte_size should have a value greater than or equal to one 190 /// and less than or equal to eight since the return value is 64 191 /// bits wide. Any \a byte_size values less than 1 or greater than 192 /// 8 will result in nothing being extracted, and zero being returned. 193 /// 194 /// @param[in,out] offset_ptr 195 /// A pointer to an offset within the data that will be advanced 196 /// by the appropriate number of bytes if the value is extracted 197 /// correctly. If the offset is out of bounds or there are not 198 /// enough bytes to extract this value, the offset will be left 199 /// unmodified. 200 /// 201 /// @param[in] size 202 /// The size in bytes of the integer to extract. 203 /// 204 /// @return 205 /// The sign extended signed integer value that was extracted, 206 /// or zero on failure. 207 int64_t getSigned(uint64_t *offset_ptr, uint32_t size) const; 208 209 //------------------------------------------------------------------ 210 /// Extract an pointer from \a *offset_ptr. 211 /// 212 /// Extract a single pointer from the data and update the offset 213 /// pointed to by \a offset_ptr. The size of the extracted pointer 214 /// is \a getAddressSize(), so the address size has to be 215 /// set correctly prior to extracting any pointer values. 216 /// 217 /// @param[in,out] offset_ptr 218 /// A pointer to an offset within the data that will be advanced 219 /// by the appropriate number of bytes if the value is extracted 220 /// correctly. If the offset is out of bounds or there are not 221 /// enough bytes to extract this value, the offset will be left 222 /// unmodified. 223 /// 224 /// @return 225 /// The extracted pointer value as a 64 integer. 226 uint64_t getAddress(uint64_t *offset_ptr) const { 227 return getUnsigned(offset_ptr, AddressSize); 228 } 229 230 /// Extract a pointer-sized unsigned integer from the location given by the 231 /// cursor. In case of an extraction error, or if the cursor is already in 232 /// an error state, zero is returned. 233 uint64_t getAddress(Cursor &C) const { return getUnsigned(C, AddressSize); } 234 235 /// Extract a uint8_t value from \a *offset_ptr. 236 /// 237 /// Extract a single uint8_t from the binary data at the offset 238 /// pointed to by \a offset_ptr, and advance the offset on success. 239 /// 240 /// @param[in,out] offset_ptr 241 /// A pointer to an offset within the data that will be advanced 242 /// by the appropriate number of bytes if the value is extracted 243 /// correctly. If the offset is out of bounds or there are not 244 /// enough bytes to extract this value, the offset will be left 245 /// unmodified. 246 /// 247 /// @param[in,out] Err 248 /// A pointer to an Error object. Upon return the Error object is set to 249 /// indicate the result (success/failure) of the function. If the Error 250 /// object is already set when calling this function, no extraction is 251 /// performed. 252 /// 253 /// @return 254 /// The extracted uint8_t value. 255 uint8_t getU8(uint64_t *offset_ptr, Error *Err = nullptr) const; 256 257 /// Extract a single uint8_t value from the location given by the cursor. In 258 /// case of an extraction error, or if the cursor is already in an error 259 /// state, zero is returned. 260 uint8_t getU8(Cursor &C) const { return getU8(&C.Offset, &C.Err); } 261 262 /// Extract \a count uint8_t values from \a *offset_ptr. 263 /// 264 /// Extract \a count uint8_t values from the binary data at the 265 /// offset pointed to by \a offset_ptr, and advance the offset on 266 /// success. The extracted values are copied into \a dst. 267 /// 268 /// @param[in,out] offset_ptr 269 /// A pointer to an offset within the data that will be advanced 270 /// by the appropriate number of bytes if the value is extracted 271 /// correctly. If the offset is out of bounds or there are not 272 /// enough bytes to extract this value, the offset will be left 273 /// unmodified. 274 /// 275 /// @param[out] dst 276 /// A buffer to copy \a count uint8_t values into. \a dst must 277 /// be large enough to hold all requested data. 278 /// 279 /// @param[in] count 280 /// The number of uint8_t values to extract. 281 /// 282 /// @return 283 /// \a dst if all values were properly extracted and copied, 284 /// NULL otherise. 285 uint8_t *getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const; 286 287 /// Extract \a Count uint8_t values from the location given by the cursor and 288 /// store them into the destination buffer. In case of an extraction error, or 289 /// if the cursor is already in an error state, a nullptr is returned and the 290 /// destination buffer is left unchanged. 291 uint8_t *getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const; 292 293 /// Extract \a Count uint8_t values from the location given by the cursor and 294 /// store them into the destination vector. The vector is resized to fit the 295 /// extracted data. In case of an extraction error, or if the cursor is 296 /// already in an error state, the destination vector is left unchanged and 297 /// cursor is placed into an error state. 298 void getU8(Cursor &C, SmallVectorImpl<uint8_t> &Dst, uint32_t Count) const { 299 if (isValidOffsetForDataOfSize(C.Offset, Count)) 300 Dst.resize(Count); 301 302 // This relies on the fact that getU8 will not attempt to write to the 303 // buffer if isValidOffsetForDataOfSize(C.Offset, Count) is false. 304 getU8(C, Dst.data(), Count); 305 } 306 307 //------------------------------------------------------------------ 308 /// Extract a uint16_t value from \a *offset_ptr. 309 /// 310 /// Extract a single uint16_t from the binary data at the offset 311 /// pointed to by \a offset_ptr, and update the offset on success. 312 /// 313 /// @param[in,out] offset_ptr 314 /// A pointer to an offset within the data that will be advanced 315 /// by the appropriate number of bytes if the value is extracted 316 /// correctly. If the offset is out of bounds or there are not 317 /// enough bytes to extract this value, the offset will be left 318 /// unmodified. 319 /// 320 /// @param[in,out] Err 321 /// A pointer to an Error object. Upon return the Error object is set to 322 /// indicate the result (success/failure) of the function. If the Error 323 /// object is already set when calling this function, no extraction is 324 /// performed. 325 /// 326 /// @return 327 /// The extracted uint16_t value. 328 //------------------------------------------------------------------ 329 uint16_t getU16(uint64_t *offset_ptr, Error *Err = nullptr) const; 330 331 /// Extract a single uint16_t value from the location given by the cursor. In 332 /// case of an extraction error, or if the cursor is already in an error 333 /// state, zero is returned. 334 uint16_t getU16(Cursor &C) const { return getU16(&C.Offset, &C.Err); } 335 336 /// Extract \a count uint16_t values from \a *offset_ptr. 337 /// 338 /// Extract \a count uint16_t values from the binary data at the 339 /// offset pointed to by \a offset_ptr, and advance the offset on 340 /// success. The extracted values are copied into \a dst. 341 /// 342 /// @param[in,out] offset_ptr 343 /// A pointer to an offset within the data that will be advanced 344 /// by the appropriate number of bytes if the value is extracted 345 /// correctly. If the offset is out of bounds or there are not 346 /// enough bytes to extract this value, the offset will be left 347 /// unmodified. 348 /// 349 /// @param[out] dst 350 /// A buffer to copy \a count uint16_t values into. \a dst must 351 /// be large enough to hold all requested data. 352 /// 353 /// @param[in] count 354 /// The number of uint16_t values to extract. 355 /// 356 /// @return 357 /// \a dst if all values were properly extracted and copied, 358 /// NULL otherise. 359 uint16_t *getU16(uint64_t *offset_ptr, uint16_t *dst, uint32_t count) const; 360 361 /// Extract a 24-bit unsigned value from \a *offset_ptr and return it 362 /// in a uint32_t. 363 /// 364 /// Extract 3 bytes from the binary data at the offset pointed to by 365 /// \a offset_ptr, construct a uint32_t from them and update the offset 366 /// on success. 367 /// 368 /// @param[in,out] offset_ptr 369 /// A pointer to an offset within the data that will be advanced 370 /// by the 3 bytes if the value is extracted correctly. If the offset 371 /// is out of bounds or there are not enough bytes to extract this value, 372 /// the offset will be left unmodified. 373 /// 374 /// @return 375 /// The extracted 24-bit value represented in a uint32_t. 376 uint32_t getU24(uint64_t *offset_ptr) const; 377 378 /// Extract a uint32_t value from \a *offset_ptr. 379 /// 380 /// Extract a single uint32_t from the binary data at the offset 381 /// pointed to by \a offset_ptr, and update the offset on success. 382 /// 383 /// @param[in,out] offset_ptr 384 /// A pointer to an offset within the data that will be advanced 385 /// by the appropriate number of bytes if the value is extracted 386 /// correctly. If the offset is out of bounds or there are not 387 /// enough bytes to extract this value, the offset will be left 388 /// unmodified. 389 /// 390 /// @param[in,out] Err 391 /// A pointer to an Error object. Upon return the Error object is set to 392 /// indicate the result (success/failure) of the function. If the Error 393 /// object is already set when calling this function, no extraction is 394 /// performed. 395 /// 396 /// @return 397 /// The extracted uint32_t value. 398 uint32_t getU32(uint64_t *offset_ptr, Error *Err = nullptr) const; 399 400 /// Extract a single uint32_t value from the location given by the cursor. In 401 /// case of an extraction error, or if the cursor is already in an error 402 /// state, zero is returned. 403 uint32_t getU32(Cursor &C) const { return getU32(&C.Offset, &C.Err); } 404 405 /// Extract \a count uint32_t values from \a *offset_ptr. 406 /// 407 /// Extract \a count uint32_t values from the binary data at the 408 /// offset pointed to by \a offset_ptr, and advance the offset on 409 /// success. The extracted values are copied into \a dst. 410 /// 411 /// @param[in,out] offset_ptr 412 /// A pointer to an offset within the data that will be advanced 413 /// by the appropriate number of bytes if the value is extracted 414 /// correctly. If the offset is out of bounds or there are not 415 /// enough bytes to extract this value, the offset will be left 416 /// unmodified. 417 /// 418 /// @param[out] dst 419 /// A buffer to copy \a count uint32_t values into. \a dst must 420 /// be large enough to hold all requested data. 421 /// 422 /// @param[in] count 423 /// The number of uint32_t values to extract. 424 /// 425 /// @return 426 /// \a dst if all values were properly extracted and copied, 427 /// NULL otherise. 428 uint32_t *getU32(uint64_t *offset_ptr, uint32_t *dst, uint32_t count) const; 429 430 /// Extract a uint64_t value from \a *offset_ptr. 431 /// 432 /// Extract a single uint64_t from the binary data at the offset 433 /// pointed to by \a offset_ptr, and update the offset on success. 434 /// 435 /// @param[in,out] offset_ptr 436 /// A pointer to an offset within the data that will be advanced 437 /// by the appropriate number of bytes if the value is extracted 438 /// correctly. If the offset is out of bounds or there are not 439 /// enough bytes to extract this value, the offset will be left 440 /// unmodified. 441 /// 442 /// @param[in,out] Err 443 /// A pointer to an Error object. Upon return the Error object is set to 444 /// indicate the result (success/failure) of the function. If the Error 445 /// object is already set when calling this function, no extraction is 446 /// performed. 447 /// 448 /// @return 449 /// The extracted uint64_t value. 450 uint64_t getU64(uint64_t *offset_ptr, Error *Err = nullptr) const; 451 452 /// Extract a single uint64_t value from the location given by the cursor. In 453 /// case of an extraction error, or if the cursor is already in an error 454 /// state, zero is returned. 455 uint64_t getU64(Cursor &C) const { return getU64(&C.Offset, &C.Err); } 456 457 /// Extract \a count uint64_t values from \a *offset_ptr. 458 /// 459 /// Extract \a count uint64_t values from the binary data at the 460 /// offset pointed to by \a offset_ptr, and advance the offset on 461 /// success. The extracted values are copied into \a dst. 462 /// 463 /// @param[in,out] offset_ptr 464 /// A pointer to an offset within the data that will be advanced 465 /// by the appropriate number of bytes if the value is extracted 466 /// correctly. If the offset is out of bounds or there are not 467 /// enough bytes to extract this value, the offset will be left 468 /// unmodified. 469 /// 470 /// @param[out] dst 471 /// A buffer to copy \a count uint64_t values into. \a dst must 472 /// be large enough to hold all requested data. 473 /// 474 /// @param[in] count 475 /// The number of uint64_t values to extract. 476 /// 477 /// @return 478 /// \a dst if all values were properly extracted and copied, 479 /// NULL otherise. 480 uint64_t *getU64(uint64_t *offset_ptr, uint64_t *dst, uint32_t count) const; 481 482 /// Extract a signed LEB128 value from \a *offset_ptr. 483 /// 484 /// Extracts an signed LEB128 number from this object's data 485 /// starting at the offset pointed to by \a offset_ptr. The offset 486 /// pointed to by \a offset_ptr will be updated with the offset of 487 /// the byte following the last extracted byte. 488 /// 489 /// @param[in,out] offset_ptr 490 /// A pointer to an offset within the data that will be advanced 491 /// by the appropriate number of bytes if the value is extracted 492 /// correctly. If the offset is out of bounds or there are not 493 /// enough bytes to extract this value, the offset will be left 494 /// unmodified. 495 /// 496 /// @return 497 /// The extracted signed integer value. 498 int64_t getSLEB128(uint64_t *offset_ptr) const; 499 500 /// Extract a unsigned LEB128 value from \a *offset_ptr. 501 /// 502 /// Extracts an unsigned LEB128 number from this object's data 503 /// starting at the offset pointed to by \a offset_ptr. The offset 504 /// pointed to by \a offset_ptr will be updated with the offset of 505 /// the byte following the last extracted byte. 506 /// 507 /// @param[in,out] offset_ptr 508 /// A pointer to an offset within the data that will be advanced 509 /// by the appropriate number of bytes if the value is extracted 510 /// correctly. If the offset is out of bounds or there are not 511 /// enough bytes to extract this value, the offset will be left 512 /// unmodified. 513 /// 514 /// @param[in,out] Err 515 /// A pointer to an Error object. Upon return the Error object is set to 516 /// indicate the result (success/failure) of the function. If the Error 517 /// object is already set when calling this function, no extraction is 518 /// performed. 519 /// 520 /// @return 521 /// The extracted unsigned integer value. 522 uint64_t getULEB128(uint64_t *offset_ptr, llvm::Error *Err = nullptr) const; 523 524 /// Extract an unsigned ULEB128 value from the location given by the cursor. 525 /// In case of an extraction error, or if the cursor is already in an error 526 /// state, zero is returned. 527 uint64_t getULEB128(Cursor &C) const { return getULEB128(&C.Offset, &C.Err); } 528 529 /// Advance the Cursor position by the given number of bytes. No-op if the 530 /// cursor is in an error state. 531 void skip(Cursor &C, uint64_t Length) const; 532 533 /// Return true iff the cursor is at the end of the buffer, regardless of the 534 /// error state of the cursor. The only way both eof and error states can be 535 /// true is if one attempts a read while the cursor is at the very end of the 536 /// data buffer. 537 bool eof(const Cursor &C) const { return size() == C.Offset; } 538 539 /// Test the validity of \a offset. 540 /// 541 /// @return 542 /// \b true if \a offset is a valid offset into the data in this 543 /// object, \b false otherwise. 544 bool isValidOffset(uint64_t offset) const { return size() > offset; } 545 546 /// Test the availability of \a length bytes of data from \a offset. 547 /// 548 /// @return 549 /// \b true if \a offset is a valid offset and there are \a 550 /// length bytes available at that offset, \b false otherwise. 551 bool isValidOffsetForDataOfSize(uint64_t offset, uint64_t length) const { 552 return offset + length >= offset && isValidOffset(offset + length - 1); 553 } 554 555 /// Test the availability of enough bytes of data for a pointer from 556 /// \a offset. The size of a pointer is \a getAddressSize(). 557 /// 558 /// @return 559 /// \b true if \a offset is a valid offset and there are enough 560 /// bytes for a pointer available at that offset, \b false 561 /// otherwise. 562 bool isValidOffsetForAddress(uint64_t offset) const { 563 return isValidOffsetForDataOfSize(offset, AddressSize); 564 } 565 566 /// Return the number of bytes in the underlying buffer. 567 size_t size() const { return Data.size(); } 568 569 protected: 570 // Make it possible for subclasses to access these fields without making them 571 // public. 572 static uint64_t &getOffset(Cursor &C) { return C.Offset; } 573 static Error &getError(Cursor &C) { return C.Err; } 574 }; 575 576 } // namespace llvm 577 578 #endif 579