1 //===- Archive.cpp - ar File Format implementation ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the ArchiveObjectFile class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Object/Archive.h" 14 #include "llvm/ADT/Optional.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Object/Binary.h" 19 #include "llvm/Object/Error.h" 20 #include "llvm/Support/Chrono.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/ErrorOr.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/MemoryBuffer.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 #include <cassert> 31 #include <cstddef> 32 #include <cstdint> 33 #include <cstring> 34 #include <memory> 35 #include <string> 36 #include <system_error> 37 38 using namespace llvm; 39 using namespace object; 40 using namespace llvm::support::endian; 41 42 void Archive::anchor() {} 43 44 static Error malformedError(Twine Msg) { 45 std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; 46 return make_error<GenericBinaryError>(std::move(StringMsg), 47 object_error::parse_failed); 48 } 49 50 static Error 51 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader, 52 const char *RawHeaderPtr, uint64_t Size) { 53 StringRef Msg("remaining size of archive too small for next archive " 54 "member header "); 55 56 Expected<StringRef> NameOrErr = ArMemHeader->getName(Size); 57 if (NameOrErr) 58 return malformedError(Msg + "for " + *NameOrErr); 59 60 consumeError(NameOrErr.takeError()); 61 uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data(); 62 return malformedError(Msg + "at offset " + Twine(Offset)); 63 } 64 65 template <class T, std::size_t N> 66 StringRef getFieldRawString(const T (&Field)[N]) { 67 return StringRef(Field, N).rtrim(" "); 68 } 69 70 template <class T> 71 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const { 72 return getFieldRawString(ArMemHdr->AccessMode); 73 } 74 75 template <class T> 76 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const { 77 return getFieldRawString(ArMemHdr->LastModified); 78 } 79 80 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const { 81 return getFieldRawString(ArMemHdr->UID); 82 } 83 84 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const { 85 return getFieldRawString(ArMemHdr->GID); 86 } 87 88 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const { 89 return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 90 } 91 92 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>; 93 template class object::CommonArchiveMemberHeader<BigArMemHdrType>; 94 95 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, 96 const char *RawHeaderPtr, 97 uint64_t Size, Error *Err) 98 : CommonArchiveMemberHeader<UnixArMemHdrType>( 99 Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) { 100 if (RawHeaderPtr == nullptr) 101 return; 102 ErrorAsOutParameter ErrAsOutParam(Err); 103 104 if (Size < getSizeOf()) { 105 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 106 return; 107 } 108 if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { 109 if (Err) { 110 std::string Buf; 111 raw_string_ostream OS(Buf); 112 OS.write_escaped( 113 StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator))); 114 OS.flush(); 115 std::string Msg("terminator characters in archive member \"" + Buf + 116 "\" not the correct \"`\\n\" values for the archive " 117 "member header "); 118 Expected<StringRef> NameOrErr = getName(Size); 119 if (!NameOrErr) { 120 consumeError(NameOrErr.takeError()); 121 uint64_t Offset = RawHeaderPtr - Parent->getData().data(); 122 *Err = malformedError(Msg + "at offset " + Twine(Offset)); 123 } else 124 *Err = malformedError(Msg + "for " + NameOrErr.get()); 125 } 126 return; 127 } 128 } 129 130 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent, 131 const char *RawHeaderPtr, 132 uint64_t Size, Error *Err) 133 : CommonArchiveMemberHeader<BigArMemHdrType>( 134 Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) { 135 if (RawHeaderPtr == nullptr) 136 return; 137 ErrorAsOutParameter ErrAsOutParam(Err); 138 139 if (Size < getSizeOf()) 140 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 141 } 142 143 // This gets the raw name from the ArMemHdr->Name field and checks that it is 144 // valid for the kind of archive. If it is not valid it returns an Error. 145 Expected<StringRef> ArchiveMemberHeader::getRawName() const { 146 char EndCond; 147 auto Kind = Parent->kind(); 148 if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { 149 if (ArMemHdr->Name[0] == ' ') { 150 uint64_t Offset = 151 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 152 return malformedError("name contains a leading space for archive member " 153 "header at offset " + 154 Twine(Offset)); 155 } 156 EndCond = ' '; 157 } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') 158 EndCond = ' '; 159 else 160 EndCond = '/'; 161 StringRef::size_type end = 162 StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); 163 if (end == StringRef::npos) 164 end = sizeof(ArMemHdr->Name); 165 assert(end <= sizeof(ArMemHdr->Name) && end > 0); 166 // Don't include the EndCond if there is one. 167 return StringRef(ArMemHdr->Name, end); 168 } 169 170 Expected<uint64_t> 171 getArchiveMemberDecField(Twine FieldName, const StringRef RawField, 172 const Archive *Parent, 173 const AbstractArchiveMemberHeader *MemHeader) { 174 uint64_t Value; 175 if (RawField.getAsInteger(10, Value)) { 176 uint64_t Offset = MemHeader->getOffset(); 177 return malformedError("characters in " + FieldName + 178 " field in archive member header are not " 179 "all decimal numbers: '" + 180 RawField + 181 "' for the archive " 182 "member header at offset " + 183 Twine(Offset)); 184 } 185 return Value; 186 } 187 188 Expected<uint64_t> 189 getArchiveMemberOctField(Twine FieldName, const StringRef RawField, 190 const Archive *Parent, 191 const AbstractArchiveMemberHeader *MemHeader) { 192 uint64_t Value; 193 if (RawField.getAsInteger(8, Value)) { 194 uint64_t Offset = MemHeader->getOffset(); 195 return malformedError("characters in " + FieldName + 196 " field in archive member header are not " 197 "all octal numbers: '" + 198 RawField + 199 "' for the archive " 200 "member header at offset " + 201 Twine(Offset)); 202 } 203 return Value; 204 } 205 206 Expected<StringRef> BigArchiveMemberHeader::getRawName() const { 207 Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField( 208 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 209 if (!NameLenOrErr) 210 // TODO: Out-of-line. 211 return NameLenOrErr.takeError(); 212 uint64_t NameLen = NameLenOrErr.get(); 213 214 // If the name length is odd, pad with '\0' to get an even length. After 215 // padding, there is the name terminator "`\n". 216 uint64_t NameLenWithPadding = alignTo(NameLen, 2); 217 StringRef NameTerminator = "`\n"; 218 StringRef NameStringWithNameTerminator = 219 StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size()); 220 if (!NameStringWithNameTerminator.endswith(NameTerminator)) { 221 uint64_t Offset = 222 reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) - 223 Parent->getData().data(); 224 // TODO: Out-of-line. 225 return malformedError( 226 "name does not have name terminator \"`\\n\" for archive member" 227 "header at offset " + 228 Twine(Offset)); 229 } 230 return StringRef(ArMemHdr->Name, NameLen); 231 } 232 233 // member including the header, so the size of any name following the header 234 // is checked to make sure it does not overflow. 235 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { 236 237 // This can be called from the ArchiveMemberHeader constructor when the 238 // archive header is truncated to produce an error message with the name. 239 // Make sure the name field is not truncated. 240 if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { 241 uint64_t ArchiveOffset = 242 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 243 return malformedError("archive header truncated before the name field " 244 "for archive member header at offset " + 245 Twine(ArchiveOffset)); 246 } 247 248 // The raw name itself can be invalid. 249 Expected<StringRef> NameOrErr = getRawName(); 250 if (!NameOrErr) 251 return NameOrErr.takeError(); 252 StringRef Name = NameOrErr.get(); 253 254 // Check if it's a special name. 255 if (Name[0] == '/') { 256 if (Name.size() == 1) // Linker member. 257 return Name; 258 if (Name.size() == 2 && Name[1] == '/') // String table. 259 return Name; 260 // It's a long name. 261 // Get the string table offset. 262 std::size_t StringOffset; 263 if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { 264 std::string Buf; 265 raw_string_ostream OS(Buf); 266 OS.write_escaped(Name.substr(1).rtrim(' ')); 267 OS.flush(); 268 uint64_t ArchiveOffset = 269 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 270 return malformedError("long name offset characters after the '/' are " 271 "not all decimal numbers: '" + 272 Buf + "' for archive member header at offset " + 273 Twine(ArchiveOffset)); 274 } 275 276 // Verify it. 277 if (StringOffset >= Parent->getStringTable().size()) { 278 uint64_t ArchiveOffset = 279 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 280 return malformedError("long name offset " + Twine(StringOffset) + 281 " past the end of the string table for archive " 282 "member header at offset " + 283 Twine(ArchiveOffset)); 284 } 285 286 // GNU long file names end with a "/\n". 287 if (Parent->kind() == Archive::K_GNU || 288 Parent->kind() == Archive::K_GNU64) { 289 size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset); 290 if (End == StringRef::npos || End < 1 || 291 Parent->getStringTable()[End - 1] != '/') { 292 return malformedError("string table at long name offset " + 293 Twine(StringOffset) + "not terminated"); 294 } 295 return Parent->getStringTable().slice(StringOffset, End - 1); 296 } 297 return Parent->getStringTable().begin() + StringOffset; 298 } 299 300 if (Name.startswith("#1/")) { 301 uint64_t NameLength; 302 if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { 303 std::string Buf; 304 raw_string_ostream OS(Buf); 305 OS.write_escaped(Name.substr(3).rtrim(' ')); 306 OS.flush(); 307 uint64_t ArchiveOffset = 308 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 309 return malformedError("long name length characters after the #1/ are " 310 "not all decimal numbers: '" + 311 Buf + "' for archive member header at offset " + 312 Twine(ArchiveOffset)); 313 } 314 if (getSizeOf() + NameLength > Size) { 315 uint64_t ArchiveOffset = 316 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 317 return malformedError("long name length: " + Twine(NameLength) + 318 " extends past the end of the member or archive " 319 "for archive member header at offset " + 320 Twine(ArchiveOffset)); 321 } 322 return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), 323 NameLength) 324 .rtrim('\0'); 325 } 326 327 // It is not a long name so trim the blanks at the end of the name. 328 if (Name[Name.size() - 1] != '/') 329 return Name.rtrim(' '); 330 331 // It's a simple name. 332 return Name.drop_back(1); 333 } 334 335 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const { 336 return getRawName(); 337 } 338 339 Expected<uint64_t> ArchiveMemberHeader::getSize() const { 340 return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size), 341 Parent, this); 342 } 343 344 Expected<uint64_t> BigArchiveMemberHeader::getSize() const { 345 Expected<uint64_t> SizeOrErr = getArchiveMemberDecField( 346 "size", getFieldRawString(ArMemHdr->Size), Parent, this); 347 if (!SizeOrErr) 348 return SizeOrErr.takeError(); 349 350 Expected<uint64_t> NameLenOrErr = getRawNameSize(); 351 if (!NameLenOrErr) 352 return NameLenOrErr.takeError(); 353 354 return *SizeOrErr + alignTo(*NameLenOrErr, 2); 355 } 356 357 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const { 358 return getArchiveMemberDecField( 359 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 360 } 361 362 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const { 363 return getArchiveMemberDecField( 364 "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this); 365 } 366 367 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const { 368 Expected<uint64_t> AccessModeOrErr = 369 getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this); 370 if (!AccessModeOrErr) 371 return AccessModeOrErr.takeError(); 372 return static_cast<sys::fs::perms>(*AccessModeOrErr); 373 } 374 375 Expected<sys::TimePoint<std::chrono::seconds>> 376 AbstractArchiveMemberHeader::getLastModified() const { 377 Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField( 378 "LastModified", getRawLastModified(), Parent, this); 379 380 if (!SecondsOrErr) 381 return SecondsOrErr.takeError(); 382 383 return sys::toTimePoint(*SecondsOrErr); 384 } 385 386 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const { 387 StringRef User = getRawUID(); 388 if (User.empty()) 389 return 0; 390 return getArchiveMemberDecField("UID", User, Parent, this); 391 } 392 393 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const { 394 StringRef Group = getRawGID(); 395 if (Group.empty()) 396 return 0; 397 return getArchiveMemberDecField("GID", Group, Parent, this); 398 } 399 400 Expected<bool> ArchiveMemberHeader::isThin() const { 401 Expected<StringRef> NameOrErr = getRawName(); 402 if (!NameOrErr) 403 return NameOrErr.takeError(); 404 StringRef Name = NameOrErr.get(); 405 return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/"; 406 } 407 408 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const { 409 uint64_t Size = getSizeOf(); 410 Expected<bool> isThinOrErr = isThin(); 411 if (!isThinOrErr) 412 return isThinOrErr.takeError(); 413 414 bool isThin = isThinOrErr.get(); 415 if (!isThin) { 416 Expected<uint64_t> MemberSize = getSize(); 417 if (!MemberSize) 418 return MemberSize.takeError(); 419 420 Size += MemberSize.get(); 421 } 422 423 // If Size is odd, add 1 to make it even. 424 const char *NextLoc = 425 reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2); 426 427 if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd()) 428 return nullptr; 429 430 return NextLoc; 431 } 432 433 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const { 434 if (getOffset() == 435 static_cast<const BigArchive *>(Parent)->getLastChildOffset()) 436 return nullptr; 437 438 Expected<uint64_t> NextOffsetOrErr = getNextOffset(); 439 if (!NextOffsetOrErr) 440 return NextOffsetOrErr.takeError(); 441 return Parent->getData().data() + NextOffsetOrErr.get(); 442 } 443 444 Archive::Child::Child(const Archive *Parent, StringRef Data, 445 uint16_t StartOfFile) 446 : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { 447 Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr); 448 } 449 450 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) 451 : Parent(Parent) { 452 if (!Start) { 453 Header = nullptr; 454 return; 455 } 456 457 Header = Parent->createArchiveMemberHeader( 458 Start, 459 Parent ? Parent->getData().size() - (Start - Parent->getData().data()) 460 : 0, 461 Err); 462 463 // If we are pointed to real data, Start is not a nullptr, then there must be 464 // a non-null Err pointer available to report malformed data on. Only in 465 // the case sentinel value is being constructed is Err is permitted to be a 466 // nullptr. 467 assert(Err && "Err can't be nullptr if Start is not a nullptr"); 468 469 ErrorAsOutParameter ErrAsOutParam(Err); 470 471 // If there was an error in the construction of the Header 472 // then just return with the error now set. 473 if (*Err) 474 return; 475 476 uint64_t Size = Header->getSizeOf(); 477 Data = StringRef(Start, Size); 478 Expected<bool> isThinOrErr = isThinMember(); 479 if (!isThinOrErr) { 480 *Err = isThinOrErr.takeError(); 481 return; 482 } 483 bool isThin = isThinOrErr.get(); 484 if (!isThin) { 485 Expected<uint64_t> MemberSize = getRawSize(); 486 if (!MemberSize) { 487 *Err = MemberSize.takeError(); 488 return; 489 } 490 Size += MemberSize.get(); 491 Data = StringRef(Start, Size); 492 } 493 494 // Setup StartOfFile and PaddingBytes. 495 StartOfFile = Header->getSizeOf(); 496 // Don't include attached name. 497 Expected<StringRef> NameOrErr = getRawName(); 498 if (!NameOrErr) { 499 *Err = NameOrErr.takeError(); 500 return; 501 } 502 StringRef Name = NameOrErr.get(); 503 504 if (Parent->kind() == Archive::K_AIXBIG) { 505 // The actual start of the file is after the name and any necessary 506 // even-alignment padding. 507 StartOfFile += ((Name.size() + 1) >> 1) << 1; 508 } else if (Name.startswith("#1/")) { 509 uint64_t NameSize; 510 StringRef RawNameSize = Name.substr(3).rtrim(' '); 511 if (RawNameSize.getAsInteger(10, NameSize)) { 512 uint64_t Offset = Start - Parent->getData().data(); 513 *Err = malformedError("long name length characters after the #1/ are " 514 "not all decimal numbers: '" + 515 RawNameSize + 516 "' for archive member header at offset " + 517 Twine(Offset)); 518 return; 519 } 520 StartOfFile += NameSize; 521 } 522 } 523 524 Expected<uint64_t> Archive::Child::getSize() const { 525 if (Parent->IsThin) 526 return Header->getSize(); 527 return Data.size() - StartOfFile; 528 } 529 530 Expected<uint64_t> Archive::Child::getRawSize() const { 531 return Header->getSize(); 532 } 533 534 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); } 535 536 Expected<std::string> Archive::Child::getFullName() const { 537 Expected<bool> isThin = isThinMember(); 538 if (!isThin) 539 return isThin.takeError(); 540 assert(isThin.get()); 541 Expected<StringRef> NameOrErr = getName(); 542 if (!NameOrErr) 543 return NameOrErr.takeError(); 544 StringRef Name = *NameOrErr; 545 if (sys::path::is_absolute(Name)) 546 return std::string(Name); 547 548 SmallString<128> FullName = sys::path::parent_path( 549 Parent->getMemoryBufferRef().getBufferIdentifier()); 550 sys::path::append(FullName, Name); 551 return std::string(FullName.str()); 552 } 553 554 Expected<StringRef> Archive::Child::getBuffer() const { 555 Expected<bool> isThinOrErr = isThinMember(); 556 if (!isThinOrErr) 557 return isThinOrErr.takeError(); 558 bool isThin = isThinOrErr.get(); 559 if (!isThin) { 560 Expected<uint64_t> Size = getSize(); 561 if (!Size) 562 return Size.takeError(); 563 return StringRef(Data.data() + StartOfFile, Size.get()); 564 } 565 Expected<std::string> FullNameOrErr = getFullName(); 566 if (!FullNameOrErr) 567 return FullNameOrErr.takeError(); 568 const std::string &FullName = *FullNameOrErr; 569 ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); 570 if (std::error_code EC = Buf.getError()) 571 return errorCodeToError(EC); 572 Parent->ThinBuffers.push_back(std::move(*Buf)); 573 return Parent->ThinBuffers.back()->getBuffer(); 574 } 575 576 Expected<Archive::Child> Archive::Child::getNext() const { 577 Expected<const char *> NextLocOrErr = Header->getNextChildLoc(); 578 if (!NextLocOrErr) 579 return NextLocOrErr.takeError(); 580 581 const char *NextLoc = *NextLocOrErr; 582 583 // Check to see if this is at the end of the archive. 584 if (NextLoc == nullptr) 585 return Child(nullptr, nullptr, nullptr); 586 587 // Check to see if this is past the end of the archive. 588 if (NextLoc > Parent->Data.getBufferEnd()) { 589 std::string Msg("offset to next archive member past the end of the archive " 590 "after member "); 591 Expected<StringRef> NameOrErr = getName(); 592 if (!NameOrErr) { 593 consumeError(NameOrErr.takeError()); 594 uint64_t Offset = Data.data() - Parent->getData().data(); 595 return malformedError(Msg + "at offset " + Twine(Offset)); 596 } else 597 return malformedError(Msg + NameOrErr.get()); 598 } 599 600 Error Err = Error::success(); 601 Child Ret(Parent, NextLoc, &Err); 602 if (Err) 603 return std::move(Err); 604 return Ret; 605 } 606 607 uint64_t Archive::Child::getChildOffset() const { 608 const char *a = Parent->Data.getBuffer().data(); 609 const char *c = Data.data(); 610 uint64_t offset = c - a; 611 return offset; 612 } 613 614 Expected<StringRef> Archive::Child::getName() const { 615 Expected<uint64_t> RawSizeOrErr = getRawSize(); 616 if (!RawSizeOrErr) 617 return RawSizeOrErr.takeError(); 618 uint64_t RawSize = RawSizeOrErr.get(); 619 Expected<StringRef> NameOrErr = 620 Header->getName(Header->getSizeOf() + RawSize); 621 if (!NameOrErr) 622 return NameOrErr.takeError(); 623 StringRef Name = NameOrErr.get(); 624 return Name; 625 } 626 627 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { 628 Expected<StringRef> NameOrErr = getName(); 629 if (!NameOrErr) 630 return NameOrErr.takeError(); 631 StringRef Name = NameOrErr.get(); 632 Expected<StringRef> Buf = getBuffer(); 633 if (!Buf) 634 return createFileError(Name, Buf.takeError()); 635 return MemoryBufferRef(*Buf, Name); 636 } 637 638 Expected<std::unique_ptr<Binary>> 639 Archive::Child::getAsBinary(LLVMContext *Context) const { 640 Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); 641 if (!BuffOrErr) 642 return BuffOrErr.takeError(); 643 644 auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); 645 if (BinaryOrErr) 646 return std::move(*BinaryOrErr); 647 return BinaryOrErr.takeError(); 648 } 649 650 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { 651 Error Err = Error::success(); 652 std::unique_ptr<Archive> Ret; 653 StringRef Buffer = Source.getBuffer(); 654 655 if (Buffer.startswith(BigArchiveMagic)) 656 Ret = std::make_unique<BigArchive>(Source, Err); 657 else 658 Ret = std::make_unique<Archive>(Source, Err); 659 660 if (Err) 661 return std::move(Err); 662 return std::move(Ret); 663 } 664 665 std::unique_ptr<AbstractArchiveMemberHeader> 666 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size, 667 Error *Err) const { 668 ErrorAsOutParameter ErrAsOutParam(Err); 669 if (kind() != K_AIXBIG) 670 return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err); 671 return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size, 672 Err); 673 } 674 675 uint64_t Archive::getArchiveMagicLen() const { 676 if (isThin()) 677 return sizeof(ThinArchiveMagic) - 1; 678 679 if (Kind() == K_AIXBIG) 680 return sizeof(BigArchiveMagic) - 1; 681 682 return sizeof(ArchiveMagic) - 1; 683 } 684 685 void Archive::setFirstRegular(const Child &C) { 686 FirstRegularData = C.Data; 687 FirstRegularStartOfFile = C.StartOfFile; 688 } 689 690 Archive::Archive(MemoryBufferRef Source, Error &Err) 691 : Binary(Binary::ID_Archive, Source) { 692 ErrorAsOutParameter ErrAsOutParam(&Err); 693 StringRef Buffer = Data.getBuffer(); 694 // Check for sufficient magic. 695 if (Buffer.startswith(ThinArchiveMagic)) { 696 IsThin = true; 697 } else if (Buffer.startswith(ArchiveMagic)) { 698 IsThin = false; 699 } else if (Buffer.startswith(BigArchiveMagic)) { 700 Format = K_AIXBIG; 701 IsThin = false; 702 return; 703 } else { 704 Err = make_error<GenericBinaryError>("file too small to be an archive", 705 object_error::invalid_file_type); 706 return; 707 } 708 709 // Make sure Format is initialized before any call to 710 // ArchiveMemberHeader::getName() is made. This could be a valid empty 711 // archive which is the same in all formats. So claiming it to be gnu to is 712 // fine if not totally correct before we look for a string table or table of 713 // contents. 714 Format = K_GNU; 715 716 // Get the special members. 717 child_iterator I = child_begin(Err, false); 718 if (Err) 719 return; 720 child_iterator E = child_end(); 721 722 // See if this is a valid empty archive and if so return. 723 if (I == E) { 724 Err = Error::success(); 725 return; 726 } 727 const Child *C = &*I; 728 729 auto Increment = [&]() { 730 ++I; 731 if (Err) 732 return true; 733 C = &*I; 734 return false; 735 }; 736 737 Expected<StringRef> NameOrErr = C->getRawName(); 738 if (!NameOrErr) { 739 Err = NameOrErr.takeError(); 740 return; 741 } 742 StringRef Name = NameOrErr.get(); 743 744 // Below is the pattern that is used to figure out the archive format 745 // GNU archive format 746 // First member : / (may exist, if it exists, points to the symbol table ) 747 // Second member : // (may exist, if it exists, points to the string table) 748 // Note : The string table is used if the filename exceeds 15 characters 749 // BSD archive format 750 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) 751 // There is no string table, if the filename exceeds 15 characters or has a 752 // embedded space, the filename has #1/<size>, The size represents the size 753 // of the filename that needs to be read after the archive header 754 // COFF archive format 755 // First member : / 756 // Second member : / (provides a directory of symbols) 757 // Third member : // (may exist, if it exists, contains the string table) 758 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present 759 // even if the string table is empty. However, lib.exe does not in fact 760 // seem to create the third member if there's no member whose filename 761 // exceeds 15 characters. So the third member is optional. 762 763 if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") { 764 if (Name == "__.SYMDEF") 765 Format = K_BSD; 766 else // Name == "__.SYMDEF_64" 767 Format = K_DARWIN64; 768 // We know that the symbol table is not an external file, but we still must 769 // check any Expected<> return value. 770 Expected<StringRef> BufOrErr = C->getBuffer(); 771 if (!BufOrErr) { 772 Err = BufOrErr.takeError(); 773 return; 774 } 775 SymbolTable = BufOrErr.get(); 776 if (Increment()) 777 return; 778 setFirstRegular(*C); 779 780 Err = Error::success(); 781 return; 782 } 783 784 if (Name.startswith("#1/")) { 785 Format = K_BSD; 786 // We know this is BSD, so getName will work since there is no string table. 787 Expected<StringRef> NameOrErr = C->getName(); 788 if (!NameOrErr) { 789 Err = NameOrErr.takeError(); 790 return; 791 } 792 Name = NameOrErr.get(); 793 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { 794 // We know that the symbol table is not an external file, but we still 795 // must check any Expected<> return value. 796 Expected<StringRef> BufOrErr = C->getBuffer(); 797 if (!BufOrErr) { 798 Err = BufOrErr.takeError(); 799 return; 800 } 801 SymbolTable = BufOrErr.get(); 802 if (Increment()) 803 return; 804 } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { 805 Format = K_DARWIN64; 806 // We know that the symbol table is not an external file, but we still 807 // must check any Expected<> return value. 808 Expected<StringRef> BufOrErr = C->getBuffer(); 809 if (!BufOrErr) { 810 Err = BufOrErr.takeError(); 811 return; 812 } 813 SymbolTable = BufOrErr.get(); 814 if (Increment()) 815 return; 816 } 817 setFirstRegular(*C); 818 return; 819 } 820 821 // MIPS 64-bit ELF archives use a special format of a symbol table. 822 // This format is marked by `ar_name` field equals to "/SYM64/". 823 // For detailed description see page 96 in the following document: 824 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf 825 826 bool has64SymTable = false; 827 if (Name == "/" || Name == "/SYM64/") { 828 // We know that the symbol table is not an external file, but we still 829 // must check any Expected<> return value. 830 Expected<StringRef> BufOrErr = C->getBuffer(); 831 if (!BufOrErr) { 832 Err = BufOrErr.takeError(); 833 return; 834 } 835 SymbolTable = BufOrErr.get(); 836 if (Name == "/SYM64/") 837 has64SymTable = true; 838 839 if (Increment()) 840 return; 841 if (I == E) { 842 Err = Error::success(); 843 return; 844 } 845 Expected<StringRef> NameOrErr = C->getRawName(); 846 if (!NameOrErr) { 847 Err = NameOrErr.takeError(); 848 return; 849 } 850 Name = NameOrErr.get(); 851 } 852 853 if (Name == "//") { 854 Format = has64SymTable ? K_GNU64 : K_GNU; 855 // The string table is never an external member, but we still 856 // must check any Expected<> return value. 857 Expected<StringRef> BufOrErr = C->getBuffer(); 858 if (!BufOrErr) { 859 Err = BufOrErr.takeError(); 860 return; 861 } 862 StringTable = BufOrErr.get(); 863 if (Increment()) 864 return; 865 setFirstRegular(*C); 866 Err = Error::success(); 867 return; 868 } 869 870 if (Name[0] != '/') { 871 Format = has64SymTable ? K_GNU64 : K_GNU; 872 setFirstRegular(*C); 873 Err = Error::success(); 874 return; 875 } 876 877 if (Name != "/") { 878 Err = errorCodeToError(object_error::parse_failed); 879 return; 880 } 881 882 Format = K_COFF; 883 // We know that the symbol table is not an external file, but we still 884 // must check any Expected<> return value. 885 Expected<StringRef> BufOrErr = C->getBuffer(); 886 if (!BufOrErr) { 887 Err = BufOrErr.takeError(); 888 return; 889 } 890 SymbolTable = BufOrErr.get(); 891 892 if (Increment()) 893 return; 894 895 if (I == E) { 896 setFirstRegular(*C); 897 Err = Error::success(); 898 return; 899 } 900 901 NameOrErr = C->getRawName(); 902 if (!NameOrErr) { 903 Err = NameOrErr.takeError(); 904 return; 905 } 906 Name = NameOrErr.get(); 907 908 if (Name == "//") { 909 // The string table is never an external member, but we still 910 // must check any Expected<> return value. 911 Expected<StringRef> BufOrErr = C->getBuffer(); 912 if (!BufOrErr) { 913 Err = BufOrErr.takeError(); 914 return; 915 } 916 StringTable = BufOrErr.get(); 917 if (Increment()) 918 return; 919 } 920 921 setFirstRegular(*C); 922 Err = Error::success(); 923 } 924 925 Archive::child_iterator Archive::child_begin(Error &Err, 926 bool SkipInternal) const { 927 if (isEmpty()) 928 return child_end(); 929 930 if (SkipInternal) 931 return child_iterator::itr( 932 Child(this, FirstRegularData, FirstRegularStartOfFile), Err); 933 934 const char *Loc = Data.getBufferStart() + getFirstChildOffset(); 935 Child C(this, Loc, &Err); 936 if (Err) 937 return child_end(); 938 return child_iterator::itr(C, Err); 939 } 940 941 Archive::child_iterator Archive::child_end() const { 942 return child_iterator::end(Child(nullptr, nullptr, nullptr)); 943 } 944 945 StringRef Archive::Symbol::getName() const { 946 return Parent->getSymbolTable().begin() + StringIndex; 947 } 948 949 Expected<Archive::Child> Archive::Symbol::getMember() const { 950 const char *Buf = Parent->getSymbolTable().begin(); 951 const char *Offsets = Buf; 952 if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) 953 Offsets += sizeof(uint64_t); 954 else 955 Offsets += sizeof(uint32_t); 956 uint64_t Offset = 0; 957 if (Parent->kind() == K_GNU) { 958 Offset = read32be(Offsets + SymbolIndex * 4); 959 } else if (Parent->kind() == K_GNU64) { 960 Offset = read64be(Offsets + SymbolIndex * 8); 961 } else if (Parent->kind() == K_BSD) { 962 // The SymbolIndex is an index into the ranlib structs that start at 963 // Offsets (the first uint32_t is the number of bytes of the ranlib 964 // structs). The ranlib structs are a pair of uint32_t's the first 965 // being a string table offset and the second being the offset into 966 // the archive of the member that defines the symbol. Which is what 967 // is needed here. 968 Offset = read32le(Offsets + SymbolIndex * 8 + 4); 969 } else if (Parent->kind() == K_DARWIN64) { 970 // The SymbolIndex is an index into the ranlib_64 structs that start at 971 // Offsets (the first uint64_t is the number of bytes of the ranlib_64 972 // structs). The ranlib_64 structs are a pair of uint64_t's the first 973 // being a string table offset and the second being the offset into 974 // the archive of the member that defines the symbol. Which is what 975 // is needed here. 976 Offset = read64le(Offsets + SymbolIndex * 16 + 8); 977 } else { 978 // Skip offsets. 979 uint32_t MemberCount = read32le(Buf); 980 Buf += MemberCount * 4 + 4; 981 982 uint32_t SymbolCount = read32le(Buf); 983 if (SymbolIndex >= SymbolCount) 984 return errorCodeToError(object_error::parse_failed); 985 986 // Skip SymbolCount to get to the indices table. 987 const char *Indices = Buf + 4; 988 989 // Get the index of the offset in the file member offset table for this 990 // symbol. 991 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); 992 // Subtract 1 since OffsetIndex is 1 based. 993 --OffsetIndex; 994 995 if (OffsetIndex >= MemberCount) 996 return errorCodeToError(object_error::parse_failed); 997 998 Offset = read32le(Offsets + OffsetIndex * 4); 999 } 1000 1001 const char *Loc = Parent->getData().begin() + Offset; 1002 Error Err = Error::success(); 1003 Child C(Parent, Loc, &Err); 1004 if (Err) 1005 return std::move(Err); 1006 return C; 1007 } 1008 1009 Archive::Symbol Archive::Symbol::getNext() const { 1010 Symbol t(*this); 1011 if (Parent->kind() == K_BSD) { 1012 // t.StringIndex is an offset from the start of the __.SYMDEF or 1013 // "__.SYMDEF SORTED" member into the string table for the ranlib 1014 // struct indexed by t.SymbolIndex . To change t.StringIndex to the 1015 // offset in the string table for t.SymbolIndex+1 we subtract the 1016 // its offset from the start of the string table for t.SymbolIndex 1017 // and add the offset of the string table for t.SymbolIndex+1. 1018 1019 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1020 // which is the number of bytes of ranlib structs that follow. The ranlib 1021 // structs are a pair of uint32_t's the first being a string table offset 1022 // and the second being the offset into the archive of the member that 1023 // define the symbol. After that the next uint32_t is the byte count of 1024 // the string table followed by the string table. 1025 const char *Buf = Parent->getSymbolTable().begin(); 1026 uint32_t RanlibCount = 0; 1027 RanlibCount = read32le(Buf) / 8; 1028 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) 1029 // don't change the t.StringIndex as we don't want to reference a ranlib 1030 // past RanlibCount. 1031 if (t.SymbolIndex + 1 < RanlibCount) { 1032 const char *Ranlibs = Buf + 4; 1033 uint32_t CurRanStrx = 0; 1034 uint32_t NextRanStrx = 0; 1035 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); 1036 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); 1037 t.StringIndex -= CurRanStrx; 1038 t.StringIndex += NextRanStrx; 1039 } 1040 } else { 1041 // Go to one past next null. 1042 t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; 1043 } 1044 ++t.SymbolIndex; 1045 return t; 1046 } 1047 1048 Archive::symbol_iterator Archive::symbol_begin() const { 1049 if (!hasSymbolTable()) 1050 return symbol_iterator(Symbol(this, 0, 0)); 1051 1052 const char *buf = getSymbolTable().begin(); 1053 if (kind() == K_GNU) { 1054 uint32_t symbol_count = 0; 1055 symbol_count = read32be(buf); 1056 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); 1057 } else if (kind() == K_GNU64) { 1058 uint64_t symbol_count = read64be(buf); 1059 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); 1060 } else if (kind() == K_BSD) { 1061 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1062 // which is the number of bytes of ranlib structs that follow. The ranlib 1063 // structs are a pair of uint32_t's the first being a string table offset 1064 // and the second being the offset into the archive of the member that 1065 // define the symbol. After that the next uint32_t is the byte count of 1066 // the string table followed by the string table. 1067 uint32_t ranlib_count = 0; 1068 ranlib_count = read32le(buf) / 8; 1069 const char *ranlibs = buf + 4; 1070 uint32_t ran_strx = 0; 1071 ran_strx = read32le(ranlibs); 1072 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); 1073 // Skip the byte count of the string table. 1074 buf += sizeof(uint32_t); 1075 buf += ran_strx; 1076 } else if (kind() == K_DARWIN64) { 1077 // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t 1078 // which is the number of bytes of ranlib_64 structs that follow. The 1079 // ranlib_64 structs are a pair of uint64_t's the first being a string 1080 // table offset and the second being the offset into the archive of the 1081 // member that define the symbol. After that the next uint64_t is the byte 1082 // count of the string table followed by the string table. 1083 uint64_t ranlib_count = 0; 1084 ranlib_count = read64le(buf) / 16; 1085 const char *ranlibs = buf + 8; 1086 uint64_t ran_strx = 0; 1087 ran_strx = read64le(ranlibs); 1088 buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); 1089 // Skip the byte count of the string table. 1090 buf += sizeof(uint64_t); 1091 buf += ran_strx; 1092 } else { 1093 uint32_t member_count = 0; 1094 uint32_t symbol_count = 0; 1095 member_count = read32le(buf); 1096 buf += 4 + (member_count * 4); // Skip offsets. 1097 symbol_count = read32le(buf); 1098 buf += 4 + (symbol_count * 2); // Skip indices. 1099 } 1100 uint32_t string_start_offset = buf - getSymbolTable().begin(); 1101 return symbol_iterator(Symbol(this, 0, string_start_offset)); 1102 } 1103 1104 Archive::symbol_iterator Archive::symbol_end() const { 1105 return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); 1106 } 1107 1108 uint32_t Archive::getNumberOfSymbols() const { 1109 if (!hasSymbolTable()) 1110 return 0; 1111 const char *buf = getSymbolTable().begin(); 1112 if (kind() == K_GNU) 1113 return read32be(buf); 1114 if (kind() == K_GNU64) 1115 return read64be(buf); 1116 if (kind() == K_BSD) 1117 return read32le(buf) / 8; 1118 if (kind() == K_DARWIN64) 1119 return read64le(buf) / 16; 1120 uint32_t member_count = 0; 1121 member_count = read32le(buf); 1122 buf += 4 + (member_count * 4); // Skip offsets. 1123 return read32le(buf); 1124 } 1125 1126 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { 1127 Archive::symbol_iterator bs = symbol_begin(); 1128 Archive::symbol_iterator es = symbol_end(); 1129 1130 for (; bs != es; ++bs) { 1131 StringRef SymName = bs->getName(); 1132 if (SymName == name) { 1133 if (auto MemberOrErr = bs->getMember()) 1134 return Child(*MemberOrErr); 1135 else 1136 return MemberOrErr.takeError(); 1137 } 1138 } 1139 return Optional<Child>(); 1140 } 1141 1142 // Returns true if archive file contains no member file. 1143 bool Archive::isEmpty() const { 1144 return Data.getBufferSize() == getArchiveMagicLen(); 1145 } 1146 1147 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } 1148 1149 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) 1150 : Archive(Source, Err) { 1151 ErrorAsOutParameter ErrAsOutParam(&Err); 1152 StringRef Buffer = Data.getBuffer(); 1153 ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data()); 1154 1155 StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset); 1156 if (RawOffset.getAsInteger(10, FirstChildOffset)) 1157 // TODO: Out-of-line. 1158 Err = malformedError("malformed AIX big archive: first member offset \"" + 1159 RawOffset + "\" is not a number"); 1160 1161 RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset); 1162 if (RawOffset.getAsInteger(10, LastChildOffset)) 1163 // TODO: Out-of-line. 1164 Err = malformedError("malformed AIX big archive: last member offset \"" + 1165 RawOffset + "\" is not a number"); 1166 1167 child_iterator I = child_begin(Err, false); 1168 if (Err) 1169 return; 1170 child_iterator E = child_end(); 1171 if (I == E) { 1172 Err = Error::success(); 1173 return; 1174 } 1175 setFirstRegular(*I); 1176 Err = Error::success(); 1177 } 1178