1 //===- Archive.cpp - ar File Format implementation ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the ArchiveObjectFile class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Object/Archive.h" 14 #include "llvm/ADT/Optional.h" 15 #include "llvm/ADT/SmallString.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/Twine.h" 18 #include "llvm/Object/Binary.h" 19 #include "llvm/Object/Error.h" 20 #include "llvm/Support/Chrono.h" 21 #include "llvm/Support/Endian.h" 22 #include "llvm/Support/Error.h" 23 #include "llvm/Support/ErrorOr.h" 24 #include "llvm/Support/FileSystem.h" 25 #include "llvm/Support/Host.h" 26 #include "llvm/Support/MathExtras.h" 27 #include "llvm/Support/MemoryBuffer.h" 28 #include "llvm/Support/Path.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <algorithm> 31 #include <cassert> 32 #include <cstddef> 33 #include <cstdint> 34 #include <memory> 35 #include <string> 36 #include <system_error> 37 38 using namespace llvm; 39 using namespace object; 40 using namespace llvm::support::endian; 41 42 void Archive::anchor() {} 43 44 static Error malformedError(Twine Msg) { 45 std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")"; 46 return make_error<GenericBinaryError>(std::move(StringMsg), 47 object_error::parse_failed); 48 } 49 50 static Error 51 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader, 52 const char *RawHeaderPtr, uint64_t Size) { 53 StringRef Msg("remaining size of archive too small for next archive " 54 "member header "); 55 56 Expected<StringRef> NameOrErr = ArMemHeader->getName(Size); 57 if (NameOrErr) 58 return malformedError(Msg + "for " + *NameOrErr); 59 60 consumeError(NameOrErr.takeError()); 61 uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data(); 62 return malformedError(Msg + "at offset " + Twine(Offset)); 63 } 64 65 template <class T, std::size_t N> 66 StringRef getFieldRawString(const T (&Field)[N]) { 67 return StringRef(Field, N).rtrim(" "); 68 } 69 70 template <class T> 71 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const { 72 return getFieldRawString(ArMemHdr->AccessMode); 73 } 74 75 template <class T> 76 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const { 77 return getFieldRawString(ArMemHdr->LastModified); 78 } 79 80 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const { 81 return getFieldRawString(ArMemHdr->UID); 82 } 83 84 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const { 85 return getFieldRawString(ArMemHdr->GID); 86 } 87 88 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const { 89 return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 90 } 91 92 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>; 93 template class object::CommonArchiveMemberHeader<BigArMemHdrType>; 94 95 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent, 96 const char *RawHeaderPtr, 97 uint64_t Size, Error *Err) 98 : CommonArchiveMemberHeader<UnixArMemHdrType>( 99 Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) { 100 if (RawHeaderPtr == nullptr) 101 return; 102 ErrorAsOutParameter ErrAsOutParam(Err); 103 104 if (Size < getSizeOf()) { 105 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 106 return; 107 } 108 if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') { 109 if (Err) { 110 std::string Buf; 111 raw_string_ostream OS(Buf); 112 OS.write_escaped( 113 StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator))); 114 OS.flush(); 115 std::string Msg("terminator characters in archive member \"" + Buf + 116 "\" not the correct \"`\\n\" values for the archive " 117 "member header "); 118 Expected<StringRef> NameOrErr = getName(Size); 119 if (!NameOrErr) { 120 consumeError(NameOrErr.takeError()); 121 uint64_t Offset = RawHeaderPtr - Parent->getData().data(); 122 *Err = malformedError(Msg + "at offset " + Twine(Offset)); 123 } else 124 *Err = malformedError(Msg + "for " + NameOrErr.get()); 125 } 126 return; 127 } 128 } 129 130 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent, 131 const char *RawHeaderPtr, 132 uint64_t Size, Error *Err) 133 : CommonArchiveMemberHeader<BigArMemHdrType>( 134 Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) { 135 if (RawHeaderPtr == nullptr) 136 return; 137 ErrorAsOutParameter ErrAsOutParam(Err); 138 139 if (Size < getSizeOf()) 140 *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size); 141 } 142 143 // This gets the raw name from the ArMemHdr->Name field and checks that it is 144 // valid for the kind of archive. If it is not valid it returns an Error. 145 Expected<StringRef> ArchiveMemberHeader::getRawName() const { 146 char EndCond; 147 auto Kind = Parent->kind(); 148 if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) { 149 if (ArMemHdr->Name[0] == ' ') { 150 uint64_t Offset = 151 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 152 return malformedError("name contains a leading space for archive member " 153 "header at offset " + 154 Twine(Offset)); 155 } 156 EndCond = ' '; 157 } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#') 158 EndCond = ' '; 159 else 160 EndCond = '/'; 161 StringRef::size_type end = 162 StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond); 163 if (end == StringRef::npos) 164 end = sizeof(ArMemHdr->Name); 165 assert(end <= sizeof(ArMemHdr->Name) && end > 0); 166 // Don't include the EndCond if there is one. 167 return StringRef(ArMemHdr->Name, end); 168 } 169 170 Expected<uint64_t> 171 getArchiveMemberDecField(Twine FieldName, const StringRef RawField, 172 const Archive *Parent, 173 const AbstractArchiveMemberHeader *MemHeader) { 174 uint64_t Value; 175 if (RawField.getAsInteger(10, Value)) { 176 uint64_t Offset = MemHeader->getOffset(); 177 return malformedError("characters in " + FieldName + 178 " field in archive member header are not " 179 "all decimal numbers: '" + 180 RawField + 181 "' for the archive " 182 "member header at offset " + 183 Twine(Offset)); 184 } 185 return Value; 186 } 187 188 Expected<uint64_t> 189 getArchiveMemberOctField(Twine FieldName, const StringRef RawField, 190 const Archive *Parent, 191 const AbstractArchiveMemberHeader *MemHeader) { 192 uint64_t Value; 193 if (RawField.getAsInteger(8, Value)) { 194 uint64_t Offset = MemHeader->getOffset(); 195 return malformedError("characters in " + FieldName + 196 " field in archive member header are not " 197 "all octal numbers: '" + 198 RawField + 199 "' for the archive " 200 "member header at offset " + 201 Twine(Offset)); 202 } 203 return Value; 204 } 205 206 Expected<StringRef> BigArchiveMemberHeader::getRawName() const { 207 Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField( 208 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 209 if (!NameLenOrErr) 210 // TODO: Out-of-line. 211 return NameLenOrErr.takeError(); 212 uint64_t NameLen = NameLenOrErr.get(); 213 214 // If the name length is odd, pad with '\0' to get an even length. After 215 // padding, there is the name terminator "`\n". 216 uint64_t NameLenWithPadding = alignTo(NameLen, 2); 217 StringRef NameTerminator = "`\n"; 218 StringRef NameStringWithNameTerminator = 219 StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size()); 220 if (!NameStringWithNameTerminator.endswith(NameTerminator)) { 221 uint64_t Offset = 222 reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) - 223 Parent->getData().data(); 224 // TODO: Out-of-line. 225 return malformedError( 226 "name does not have name terminator \"`\\n\" for archive member" 227 "header at offset " + 228 Twine(Offset)); 229 } 230 return StringRef(ArMemHdr->Name, NameLen); 231 } 232 233 // member including the header, so the size of any name following the header 234 // is checked to make sure it does not overflow. 235 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const { 236 237 // This can be called from the ArchiveMemberHeader constructor when the 238 // archive header is truncated to produce an error message with the name. 239 // Make sure the name field is not truncated. 240 if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) { 241 uint64_t ArchiveOffset = 242 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 243 return malformedError("archive header truncated before the name field " 244 "for archive member header at offset " + 245 Twine(ArchiveOffset)); 246 } 247 248 // The raw name itself can be invalid. 249 Expected<StringRef> NameOrErr = getRawName(); 250 if (!NameOrErr) 251 return NameOrErr.takeError(); 252 StringRef Name = NameOrErr.get(); 253 254 // Check if it's a special name. 255 if (Name[0] == '/') { 256 if (Name.size() == 1) // Linker member. 257 return Name; 258 if (Name.size() == 2 && Name[1] == '/') // String table. 259 return Name; 260 // System libraries from the Windows SDK for Windows 11 contain this symbol. 261 // It looks like a CFG guard: we just skip it for now. 262 if (Name.equals("/<XFGHASHMAP>/")) 263 return Name; 264 // Some libraries (e.g., arm64rt.lib) from the Windows WDK 265 // (version 10.0.22000.0) contain this undocumented special member. 266 if (Name.equals("/<ECSYMBOLS>/")) 267 return Name; 268 // It's a long name. 269 // Get the string table offset. 270 std::size_t StringOffset; 271 if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) { 272 std::string Buf; 273 raw_string_ostream OS(Buf); 274 OS.write_escaped(Name.substr(1).rtrim(' ')); 275 OS.flush(); 276 uint64_t ArchiveOffset = 277 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 278 return malformedError("long name offset characters after the '/' are " 279 "not all decimal numbers: '" + 280 Buf + "' for archive member header at offset " + 281 Twine(ArchiveOffset)); 282 } 283 284 // Verify it. 285 if (StringOffset >= Parent->getStringTable().size()) { 286 uint64_t ArchiveOffset = 287 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 288 return malformedError("long name offset " + Twine(StringOffset) + 289 " past the end of the string table for archive " 290 "member header at offset " + 291 Twine(ArchiveOffset)); 292 } 293 294 // GNU long file names end with a "/\n". 295 if (Parent->kind() == Archive::K_GNU || 296 Parent->kind() == Archive::K_GNU64) { 297 size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset); 298 if (End == StringRef::npos || End < 1 || 299 Parent->getStringTable()[End - 1] != '/') { 300 return malformedError("string table at long name offset " + 301 Twine(StringOffset) + "not terminated"); 302 } 303 return Parent->getStringTable().slice(StringOffset, End - 1); 304 } 305 return Parent->getStringTable().begin() + StringOffset; 306 } 307 308 if (Name.startswith("#1/")) { 309 uint64_t NameLength; 310 if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) { 311 std::string Buf; 312 raw_string_ostream OS(Buf); 313 OS.write_escaped(Name.substr(3).rtrim(' ')); 314 OS.flush(); 315 uint64_t ArchiveOffset = 316 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 317 return malformedError("long name length characters after the #1/ are " 318 "not all decimal numbers: '" + 319 Buf + "' for archive member header at offset " + 320 Twine(ArchiveOffset)); 321 } 322 if (getSizeOf() + NameLength > Size) { 323 uint64_t ArchiveOffset = 324 reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data(); 325 return malformedError("long name length: " + Twine(NameLength) + 326 " extends past the end of the member or archive " 327 "for archive member header at offset " + 328 Twine(ArchiveOffset)); 329 } 330 return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(), 331 NameLength) 332 .rtrim('\0'); 333 } 334 335 // It is not a long name so trim the blanks at the end of the name. 336 if (Name[Name.size() - 1] != '/') 337 return Name.rtrim(' '); 338 339 // It's a simple name. 340 return Name.drop_back(1); 341 } 342 343 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const { 344 return getRawName(); 345 } 346 347 Expected<uint64_t> ArchiveMemberHeader::getSize() const { 348 return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size), 349 Parent, this); 350 } 351 352 Expected<uint64_t> BigArchiveMemberHeader::getSize() const { 353 Expected<uint64_t> SizeOrErr = getArchiveMemberDecField( 354 "size", getFieldRawString(ArMemHdr->Size), Parent, this); 355 if (!SizeOrErr) 356 return SizeOrErr.takeError(); 357 358 Expected<uint64_t> NameLenOrErr = getRawNameSize(); 359 if (!NameLenOrErr) 360 return NameLenOrErr.takeError(); 361 362 return *SizeOrErr + alignTo(*NameLenOrErr, 2); 363 } 364 365 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const { 366 return getArchiveMemberDecField( 367 "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this); 368 } 369 370 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const { 371 return getArchiveMemberDecField( 372 "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this); 373 } 374 375 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const { 376 Expected<uint64_t> AccessModeOrErr = 377 getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this); 378 if (!AccessModeOrErr) 379 return AccessModeOrErr.takeError(); 380 return static_cast<sys::fs::perms>(*AccessModeOrErr); 381 } 382 383 Expected<sys::TimePoint<std::chrono::seconds>> 384 AbstractArchiveMemberHeader::getLastModified() const { 385 Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField( 386 "LastModified", getRawLastModified(), Parent, this); 387 388 if (!SecondsOrErr) 389 return SecondsOrErr.takeError(); 390 391 return sys::toTimePoint(*SecondsOrErr); 392 } 393 394 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const { 395 StringRef User = getRawUID(); 396 if (User.empty()) 397 return 0; 398 return getArchiveMemberDecField("UID", User, Parent, this); 399 } 400 401 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const { 402 StringRef Group = getRawGID(); 403 if (Group.empty()) 404 return 0; 405 return getArchiveMemberDecField("GID", Group, Parent, this); 406 } 407 408 Expected<bool> ArchiveMemberHeader::isThin() const { 409 Expected<StringRef> NameOrErr = getRawName(); 410 if (!NameOrErr) 411 return NameOrErr.takeError(); 412 StringRef Name = NameOrErr.get(); 413 return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/"; 414 } 415 416 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const { 417 uint64_t Size = getSizeOf(); 418 Expected<bool> isThinOrErr = isThin(); 419 if (!isThinOrErr) 420 return isThinOrErr.takeError(); 421 422 bool isThin = isThinOrErr.get(); 423 if (!isThin) { 424 Expected<uint64_t> MemberSize = getSize(); 425 if (!MemberSize) 426 return MemberSize.takeError(); 427 428 Size += MemberSize.get(); 429 } 430 431 // If Size is odd, add 1 to make it even. 432 const char *NextLoc = 433 reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2); 434 435 if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd()) 436 return nullptr; 437 438 return NextLoc; 439 } 440 441 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const { 442 if (getOffset() == 443 static_cast<const BigArchive *>(Parent)->getLastChildOffset()) 444 return nullptr; 445 446 Expected<uint64_t> NextOffsetOrErr = getNextOffset(); 447 if (!NextOffsetOrErr) 448 return NextOffsetOrErr.takeError(); 449 return Parent->getData().data() + NextOffsetOrErr.get(); 450 } 451 452 Archive::Child::Child(const Archive *Parent, StringRef Data, 453 uint16_t StartOfFile) 454 : Parent(Parent), Data(Data), StartOfFile(StartOfFile) { 455 Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr); 456 } 457 458 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err) 459 : Parent(Parent) { 460 if (!Start) { 461 Header = nullptr; 462 return; 463 } 464 465 Header = Parent->createArchiveMemberHeader( 466 Start, 467 Parent ? Parent->getData().size() - (Start - Parent->getData().data()) 468 : 0, 469 Err); 470 471 // If we are pointed to real data, Start is not a nullptr, then there must be 472 // a non-null Err pointer available to report malformed data on. Only in 473 // the case sentinel value is being constructed is Err is permitted to be a 474 // nullptr. 475 assert(Err && "Err can't be nullptr if Start is not a nullptr"); 476 477 ErrorAsOutParameter ErrAsOutParam(Err); 478 479 // If there was an error in the construction of the Header 480 // then just return with the error now set. 481 if (*Err) 482 return; 483 484 uint64_t Size = Header->getSizeOf(); 485 Data = StringRef(Start, Size); 486 Expected<bool> isThinOrErr = isThinMember(); 487 if (!isThinOrErr) { 488 *Err = isThinOrErr.takeError(); 489 return; 490 } 491 bool isThin = isThinOrErr.get(); 492 if (!isThin) { 493 Expected<uint64_t> MemberSize = getRawSize(); 494 if (!MemberSize) { 495 *Err = MemberSize.takeError(); 496 return; 497 } 498 Size += MemberSize.get(); 499 Data = StringRef(Start, Size); 500 } 501 502 // Setup StartOfFile and PaddingBytes. 503 StartOfFile = Header->getSizeOf(); 504 // Don't include attached name. 505 Expected<StringRef> NameOrErr = getRawName(); 506 if (!NameOrErr) { 507 *Err = NameOrErr.takeError(); 508 return; 509 } 510 StringRef Name = NameOrErr.get(); 511 512 if (Parent->kind() == Archive::K_AIXBIG) { 513 // The actual start of the file is after the name and any necessary 514 // even-alignment padding. 515 StartOfFile += ((Name.size() + 1) >> 1) << 1; 516 } else if (Name.startswith("#1/")) { 517 uint64_t NameSize; 518 StringRef RawNameSize = Name.substr(3).rtrim(' '); 519 if (RawNameSize.getAsInteger(10, NameSize)) { 520 uint64_t Offset = Start - Parent->getData().data(); 521 *Err = malformedError("long name length characters after the #1/ are " 522 "not all decimal numbers: '" + 523 RawNameSize + 524 "' for archive member header at offset " + 525 Twine(Offset)); 526 return; 527 } 528 StartOfFile += NameSize; 529 } 530 } 531 532 Expected<uint64_t> Archive::Child::getSize() const { 533 if (Parent->IsThin) 534 return Header->getSize(); 535 return Data.size() - StartOfFile; 536 } 537 538 Expected<uint64_t> Archive::Child::getRawSize() const { 539 return Header->getSize(); 540 } 541 542 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); } 543 544 Expected<std::string> Archive::Child::getFullName() const { 545 Expected<bool> isThin = isThinMember(); 546 if (!isThin) 547 return isThin.takeError(); 548 assert(isThin.get()); 549 Expected<StringRef> NameOrErr = getName(); 550 if (!NameOrErr) 551 return NameOrErr.takeError(); 552 StringRef Name = *NameOrErr; 553 if (sys::path::is_absolute(Name)) 554 return std::string(Name); 555 556 SmallString<128> FullName = sys::path::parent_path( 557 Parent->getMemoryBufferRef().getBufferIdentifier()); 558 sys::path::append(FullName, Name); 559 return std::string(FullName.str()); 560 } 561 562 Expected<StringRef> Archive::Child::getBuffer() const { 563 Expected<bool> isThinOrErr = isThinMember(); 564 if (!isThinOrErr) 565 return isThinOrErr.takeError(); 566 bool isThin = isThinOrErr.get(); 567 if (!isThin) { 568 Expected<uint64_t> Size = getSize(); 569 if (!Size) 570 return Size.takeError(); 571 return StringRef(Data.data() + StartOfFile, Size.get()); 572 } 573 Expected<std::string> FullNameOrErr = getFullName(); 574 if (!FullNameOrErr) 575 return FullNameOrErr.takeError(); 576 const std::string &FullName = *FullNameOrErr; 577 ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName); 578 if (std::error_code EC = Buf.getError()) 579 return errorCodeToError(EC); 580 Parent->ThinBuffers.push_back(std::move(*Buf)); 581 return Parent->ThinBuffers.back()->getBuffer(); 582 } 583 584 Expected<Archive::Child> Archive::Child::getNext() const { 585 Expected<const char *> NextLocOrErr = Header->getNextChildLoc(); 586 if (!NextLocOrErr) 587 return NextLocOrErr.takeError(); 588 589 const char *NextLoc = *NextLocOrErr; 590 591 // Check to see if this is at the end of the archive. 592 if (NextLoc == nullptr) 593 return Child(nullptr, nullptr, nullptr); 594 595 // Check to see if this is past the end of the archive. 596 if (NextLoc > Parent->Data.getBufferEnd()) { 597 std::string Msg("offset to next archive member past the end of the archive " 598 "after member "); 599 Expected<StringRef> NameOrErr = getName(); 600 if (!NameOrErr) { 601 consumeError(NameOrErr.takeError()); 602 uint64_t Offset = Data.data() - Parent->getData().data(); 603 return malformedError(Msg + "at offset " + Twine(Offset)); 604 } else 605 return malformedError(Msg + NameOrErr.get()); 606 } 607 608 Error Err = Error::success(); 609 Child Ret(Parent, NextLoc, &Err); 610 if (Err) 611 return std::move(Err); 612 return Ret; 613 } 614 615 uint64_t Archive::Child::getChildOffset() const { 616 const char *a = Parent->Data.getBuffer().data(); 617 const char *c = Data.data(); 618 uint64_t offset = c - a; 619 return offset; 620 } 621 622 Expected<StringRef> Archive::Child::getName() const { 623 Expected<uint64_t> RawSizeOrErr = getRawSize(); 624 if (!RawSizeOrErr) 625 return RawSizeOrErr.takeError(); 626 uint64_t RawSize = RawSizeOrErr.get(); 627 Expected<StringRef> NameOrErr = 628 Header->getName(Header->getSizeOf() + RawSize); 629 if (!NameOrErr) 630 return NameOrErr.takeError(); 631 StringRef Name = NameOrErr.get(); 632 return Name; 633 } 634 635 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const { 636 Expected<StringRef> NameOrErr = getName(); 637 if (!NameOrErr) 638 return NameOrErr.takeError(); 639 StringRef Name = NameOrErr.get(); 640 Expected<StringRef> Buf = getBuffer(); 641 if (!Buf) 642 return createFileError(Name, Buf.takeError()); 643 return MemoryBufferRef(*Buf, Name); 644 } 645 646 Expected<std::unique_ptr<Binary>> 647 Archive::Child::getAsBinary(LLVMContext *Context) const { 648 Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef(); 649 if (!BuffOrErr) 650 return BuffOrErr.takeError(); 651 652 auto BinaryOrErr = createBinary(BuffOrErr.get(), Context); 653 if (BinaryOrErr) 654 return std::move(*BinaryOrErr); 655 return BinaryOrErr.takeError(); 656 } 657 658 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) { 659 Error Err = Error::success(); 660 std::unique_ptr<Archive> Ret; 661 StringRef Buffer = Source.getBuffer(); 662 663 if (Buffer.startswith(BigArchiveMagic)) 664 Ret = std::make_unique<BigArchive>(Source, Err); 665 else 666 Ret = std::make_unique<Archive>(Source, Err); 667 668 if (Err) 669 return std::move(Err); 670 return std::move(Ret); 671 } 672 673 std::unique_ptr<AbstractArchiveMemberHeader> 674 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size, 675 Error *Err) const { 676 ErrorAsOutParameter ErrAsOutParam(Err); 677 if (kind() != K_AIXBIG) 678 return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err); 679 return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size, 680 Err); 681 } 682 683 uint64_t Archive::getArchiveMagicLen() const { 684 if (isThin()) 685 return sizeof(ThinArchiveMagic) - 1; 686 687 if (Kind() == K_AIXBIG) 688 return sizeof(BigArchiveMagic) - 1; 689 690 return sizeof(ArchiveMagic) - 1; 691 } 692 693 void Archive::setFirstRegular(const Child &C) { 694 FirstRegularData = C.Data; 695 FirstRegularStartOfFile = C.StartOfFile; 696 } 697 698 Archive::Archive(MemoryBufferRef Source, Error &Err) 699 : Binary(Binary::ID_Archive, Source) { 700 ErrorAsOutParameter ErrAsOutParam(&Err); 701 StringRef Buffer = Data.getBuffer(); 702 // Check for sufficient magic. 703 if (Buffer.startswith(ThinArchiveMagic)) { 704 IsThin = true; 705 } else if (Buffer.startswith(ArchiveMagic)) { 706 IsThin = false; 707 } else if (Buffer.startswith(BigArchiveMagic)) { 708 Format = K_AIXBIG; 709 IsThin = false; 710 return; 711 } else { 712 Err = make_error<GenericBinaryError>("file too small to be an archive", 713 object_error::invalid_file_type); 714 return; 715 } 716 717 // Make sure Format is initialized before any call to 718 // ArchiveMemberHeader::getName() is made. This could be a valid empty 719 // archive which is the same in all formats. So claiming it to be gnu to is 720 // fine if not totally correct before we look for a string table or table of 721 // contents. 722 Format = K_GNU; 723 724 // Get the special members. 725 child_iterator I = child_begin(Err, false); 726 if (Err) 727 return; 728 child_iterator E = child_end(); 729 730 // See if this is a valid empty archive and if so return. 731 if (I == E) { 732 Err = Error::success(); 733 return; 734 } 735 const Child *C = &*I; 736 737 auto Increment = [&]() { 738 ++I; 739 if (Err) 740 return true; 741 C = &*I; 742 return false; 743 }; 744 745 Expected<StringRef> NameOrErr = C->getRawName(); 746 if (!NameOrErr) { 747 Err = NameOrErr.takeError(); 748 return; 749 } 750 StringRef Name = NameOrErr.get(); 751 752 // Below is the pattern that is used to figure out the archive format 753 // GNU archive format 754 // First member : / (may exist, if it exists, points to the symbol table ) 755 // Second member : // (may exist, if it exists, points to the string table) 756 // Note : The string table is used if the filename exceeds 15 characters 757 // BSD archive format 758 // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) 759 // There is no string table, if the filename exceeds 15 characters or has a 760 // embedded space, the filename has #1/<size>, The size represents the size 761 // of the filename that needs to be read after the archive header 762 // COFF archive format 763 // First member : / 764 // Second member : / (provides a directory of symbols) 765 // Third member : // (may exist, if it exists, contains the string table) 766 // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present 767 // even if the string table is empty. However, lib.exe does not in fact 768 // seem to create the third member if there's no member whose filename 769 // exceeds 15 characters. So the third member is optional. 770 771 if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") { 772 if (Name == "__.SYMDEF") 773 Format = K_BSD; 774 else // Name == "__.SYMDEF_64" 775 Format = K_DARWIN64; 776 // We know that the symbol table is not an external file, but we still must 777 // check any Expected<> return value. 778 Expected<StringRef> BufOrErr = C->getBuffer(); 779 if (!BufOrErr) { 780 Err = BufOrErr.takeError(); 781 return; 782 } 783 SymbolTable = BufOrErr.get(); 784 if (Increment()) 785 return; 786 setFirstRegular(*C); 787 788 Err = Error::success(); 789 return; 790 } 791 792 if (Name.startswith("#1/")) { 793 Format = K_BSD; 794 // We know this is BSD, so getName will work since there is no string table. 795 Expected<StringRef> NameOrErr = C->getName(); 796 if (!NameOrErr) { 797 Err = NameOrErr.takeError(); 798 return; 799 } 800 Name = NameOrErr.get(); 801 if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") { 802 // We know that the symbol table is not an external file, but we still 803 // must check any Expected<> return value. 804 Expected<StringRef> BufOrErr = C->getBuffer(); 805 if (!BufOrErr) { 806 Err = BufOrErr.takeError(); 807 return; 808 } 809 SymbolTable = BufOrErr.get(); 810 if (Increment()) 811 return; 812 } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") { 813 Format = K_DARWIN64; 814 // We know that the symbol table is not an external file, but we still 815 // must check any Expected<> return value. 816 Expected<StringRef> BufOrErr = C->getBuffer(); 817 if (!BufOrErr) { 818 Err = BufOrErr.takeError(); 819 return; 820 } 821 SymbolTable = BufOrErr.get(); 822 if (Increment()) 823 return; 824 } 825 setFirstRegular(*C); 826 return; 827 } 828 829 // MIPS 64-bit ELF archives use a special format of a symbol table. 830 // This format is marked by `ar_name` field equals to "/SYM64/". 831 // For detailed description see page 96 in the following document: 832 // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf 833 834 bool has64SymTable = false; 835 if (Name == "/" || Name == "/SYM64/") { 836 // We know that the symbol table is not an external file, but we still 837 // must check any Expected<> return value. 838 Expected<StringRef> BufOrErr = C->getBuffer(); 839 if (!BufOrErr) { 840 Err = BufOrErr.takeError(); 841 return; 842 } 843 SymbolTable = BufOrErr.get(); 844 if (Name == "/SYM64/") 845 has64SymTable = true; 846 847 if (Increment()) 848 return; 849 if (I == E) { 850 Err = Error::success(); 851 return; 852 } 853 Expected<StringRef> NameOrErr = C->getRawName(); 854 if (!NameOrErr) { 855 Err = NameOrErr.takeError(); 856 return; 857 } 858 Name = NameOrErr.get(); 859 } 860 861 if (Name == "//") { 862 Format = has64SymTable ? K_GNU64 : K_GNU; 863 // The string table is never an external member, but we still 864 // must check any Expected<> return value. 865 Expected<StringRef> BufOrErr = C->getBuffer(); 866 if (!BufOrErr) { 867 Err = BufOrErr.takeError(); 868 return; 869 } 870 StringTable = BufOrErr.get(); 871 if (Increment()) 872 return; 873 setFirstRegular(*C); 874 Err = Error::success(); 875 return; 876 } 877 878 if (Name[0] != '/') { 879 Format = has64SymTable ? K_GNU64 : K_GNU; 880 setFirstRegular(*C); 881 Err = Error::success(); 882 return; 883 } 884 885 if (Name != "/") { 886 Err = errorCodeToError(object_error::parse_failed); 887 return; 888 } 889 890 Format = K_COFF; 891 // We know that the symbol table is not an external file, but we still 892 // must check any Expected<> return value. 893 Expected<StringRef> BufOrErr = C->getBuffer(); 894 if (!BufOrErr) { 895 Err = BufOrErr.takeError(); 896 return; 897 } 898 SymbolTable = BufOrErr.get(); 899 900 if (Increment()) 901 return; 902 903 if (I == E) { 904 setFirstRegular(*C); 905 Err = Error::success(); 906 return; 907 } 908 909 NameOrErr = C->getRawName(); 910 if (!NameOrErr) { 911 Err = NameOrErr.takeError(); 912 return; 913 } 914 Name = NameOrErr.get(); 915 916 if (Name == "//") { 917 // The string table is never an external member, but we still 918 // must check any Expected<> return value. 919 Expected<StringRef> BufOrErr = C->getBuffer(); 920 if (!BufOrErr) { 921 Err = BufOrErr.takeError(); 922 return; 923 } 924 StringTable = BufOrErr.get(); 925 if (Increment()) 926 return; 927 } 928 929 setFirstRegular(*C); 930 Err = Error::success(); 931 } 932 933 object::Archive::Kind Archive::getDefaultKindForHost() { 934 Triple HostTriple(sys::getProcessTriple()); 935 return HostTriple.isOSDarwin() 936 ? object::Archive::K_DARWIN 937 : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG 938 : object::Archive::K_GNU); 939 } 940 941 Archive::child_iterator Archive::child_begin(Error &Err, 942 bool SkipInternal) const { 943 if (isEmpty()) 944 return child_end(); 945 946 if (SkipInternal) 947 return child_iterator::itr( 948 Child(this, FirstRegularData, FirstRegularStartOfFile), Err); 949 950 const char *Loc = Data.getBufferStart() + getFirstChildOffset(); 951 Child C(this, Loc, &Err); 952 if (Err) 953 return child_end(); 954 return child_iterator::itr(C, Err); 955 } 956 957 Archive::child_iterator Archive::child_end() const { 958 return child_iterator::end(Child(nullptr, nullptr, nullptr)); 959 } 960 961 StringRef Archive::Symbol::getName() const { 962 return Parent->getSymbolTable().begin() + StringIndex; 963 } 964 965 Expected<Archive::Child> Archive::Symbol::getMember() const { 966 const char *Buf = Parent->getSymbolTable().begin(); 967 const char *Offsets = Buf; 968 if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64) 969 Offsets += sizeof(uint64_t); 970 else 971 Offsets += sizeof(uint32_t); 972 uint64_t Offset = 0; 973 if (Parent->kind() == K_GNU) { 974 Offset = read32be(Offsets + SymbolIndex * 4); 975 } else if (Parent->kind() == K_GNU64) { 976 Offset = read64be(Offsets + SymbolIndex * 8); 977 } else if (Parent->kind() == K_BSD) { 978 // The SymbolIndex is an index into the ranlib structs that start at 979 // Offsets (the first uint32_t is the number of bytes of the ranlib 980 // structs). The ranlib structs are a pair of uint32_t's the first 981 // being a string table offset and the second being the offset into 982 // the archive of the member that defines the symbol. Which is what 983 // is needed here. 984 Offset = read32le(Offsets + SymbolIndex * 8 + 4); 985 } else if (Parent->kind() == K_DARWIN64) { 986 // The SymbolIndex is an index into the ranlib_64 structs that start at 987 // Offsets (the first uint64_t is the number of bytes of the ranlib_64 988 // structs). The ranlib_64 structs are a pair of uint64_t's the first 989 // being a string table offset and the second being the offset into 990 // the archive of the member that defines the symbol. Which is what 991 // is needed here. 992 Offset = read64le(Offsets + SymbolIndex * 16 + 8); 993 } else { 994 // Skip offsets. 995 uint32_t MemberCount = read32le(Buf); 996 Buf += MemberCount * 4 + 4; 997 998 uint32_t SymbolCount = read32le(Buf); 999 if (SymbolIndex >= SymbolCount) 1000 return errorCodeToError(object_error::parse_failed); 1001 1002 // Skip SymbolCount to get to the indices table. 1003 const char *Indices = Buf + 4; 1004 1005 // Get the index of the offset in the file member offset table for this 1006 // symbol. 1007 uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2); 1008 // Subtract 1 since OffsetIndex is 1 based. 1009 --OffsetIndex; 1010 1011 if (OffsetIndex >= MemberCount) 1012 return errorCodeToError(object_error::parse_failed); 1013 1014 Offset = read32le(Offsets + OffsetIndex * 4); 1015 } 1016 1017 const char *Loc = Parent->getData().begin() + Offset; 1018 Error Err = Error::success(); 1019 Child C(Parent, Loc, &Err); 1020 if (Err) 1021 return std::move(Err); 1022 return C; 1023 } 1024 1025 Archive::Symbol Archive::Symbol::getNext() const { 1026 Symbol t(*this); 1027 if (Parent->kind() == K_BSD) { 1028 // t.StringIndex is an offset from the start of the __.SYMDEF or 1029 // "__.SYMDEF SORTED" member into the string table for the ranlib 1030 // struct indexed by t.SymbolIndex . To change t.StringIndex to the 1031 // offset in the string table for t.SymbolIndex+1 we subtract the 1032 // its offset from the start of the string table for t.SymbolIndex 1033 // and add the offset of the string table for t.SymbolIndex+1. 1034 1035 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1036 // which is the number of bytes of ranlib structs that follow. The ranlib 1037 // structs are a pair of uint32_t's the first being a string table offset 1038 // and the second being the offset into the archive of the member that 1039 // define the symbol. After that the next uint32_t is the byte count of 1040 // the string table followed by the string table. 1041 const char *Buf = Parent->getSymbolTable().begin(); 1042 uint32_t RanlibCount = 0; 1043 RanlibCount = read32le(Buf) / 8; 1044 // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) 1045 // don't change the t.StringIndex as we don't want to reference a ranlib 1046 // past RanlibCount. 1047 if (t.SymbolIndex + 1 < RanlibCount) { 1048 const char *Ranlibs = Buf + 4; 1049 uint32_t CurRanStrx = 0; 1050 uint32_t NextRanStrx = 0; 1051 CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8); 1052 NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8); 1053 t.StringIndex -= CurRanStrx; 1054 t.StringIndex += NextRanStrx; 1055 } 1056 } else { 1057 // Go to one past next null. 1058 t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1; 1059 } 1060 ++t.SymbolIndex; 1061 return t; 1062 } 1063 1064 Archive::symbol_iterator Archive::symbol_begin() const { 1065 if (!hasSymbolTable()) 1066 return symbol_iterator(Symbol(this, 0, 0)); 1067 1068 const char *buf = getSymbolTable().begin(); 1069 if (kind() == K_GNU) { 1070 uint32_t symbol_count = 0; 1071 symbol_count = read32be(buf); 1072 buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); 1073 } else if (kind() == K_GNU64) { 1074 uint64_t symbol_count = read64be(buf); 1075 buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t))); 1076 } else if (kind() == K_BSD) { 1077 // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t 1078 // which is the number of bytes of ranlib structs that follow. The ranlib 1079 // structs are a pair of uint32_t's the first being a string table offset 1080 // and the second being the offset into the archive of the member that 1081 // define the symbol. After that the next uint32_t is the byte count of 1082 // the string table followed by the string table. 1083 uint32_t ranlib_count = 0; 1084 ranlib_count = read32le(buf) / 8; 1085 const char *ranlibs = buf + 4; 1086 uint32_t ran_strx = 0; 1087 ran_strx = read32le(ranlibs); 1088 buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); 1089 // Skip the byte count of the string table. 1090 buf += sizeof(uint32_t); 1091 buf += ran_strx; 1092 } else if (kind() == K_DARWIN64) { 1093 // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t 1094 // which is the number of bytes of ranlib_64 structs that follow. The 1095 // ranlib_64 structs are a pair of uint64_t's the first being a string 1096 // table offset and the second being the offset into the archive of the 1097 // member that define the symbol. After that the next uint64_t is the byte 1098 // count of the string table followed by the string table. 1099 uint64_t ranlib_count = 0; 1100 ranlib_count = read64le(buf) / 16; 1101 const char *ranlibs = buf + 8; 1102 uint64_t ran_strx = 0; 1103 ran_strx = read64le(ranlibs); 1104 buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t)))); 1105 // Skip the byte count of the string table. 1106 buf += sizeof(uint64_t); 1107 buf += ran_strx; 1108 } else { 1109 uint32_t member_count = 0; 1110 uint32_t symbol_count = 0; 1111 member_count = read32le(buf); 1112 buf += 4 + (member_count * 4); // Skip offsets. 1113 symbol_count = read32le(buf); 1114 buf += 4 + (symbol_count * 2); // Skip indices. 1115 } 1116 uint32_t string_start_offset = buf - getSymbolTable().begin(); 1117 return symbol_iterator(Symbol(this, 0, string_start_offset)); 1118 } 1119 1120 Archive::symbol_iterator Archive::symbol_end() const { 1121 return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0)); 1122 } 1123 1124 uint32_t Archive::getNumberOfSymbols() const { 1125 if (!hasSymbolTable()) 1126 return 0; 1127 const char *buf = getSymbolTable().begin(); 1128 if (kind() == K_GNU) 1129 return read32be(buf); 1130 if (kind() == K_GNU64) 1131 return read64be(buf); 1132 if (kind() == K_BSD) 1133 return read32le(buf) / 8; 1134 if (kind() == K_DARWIN64) 1135 return read64le(buf) / 16; 1136 uint32_t member_count = 0; 1137 member_count = read32le(buf); 1138 buf += 4 + (member_count * 4); // Skip offsets. 1139 return read32le(buf); 1140 } 1141 1142 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const { 1143 Archive::symbol_iterator bs = symbol_begin(); 1144 Archive::symbol_iterator es = symbol_end(); 1145 1146 for (; bs != es; ++bs) { 1147 StringRef SymName = bs->getName(); 1148 if (SymName == name) { 1149 if (auto MemberOrErr = bs->getMember()) 1150 return Child(*MemberOrErr); 1151 else 1152 return MemberOrErr.takeError(); 1153 } 1154 } 1155 return Optional<Child>(); 1156 } 1157 1158 // Returns true if archive file contains no member file. 1159 bool Archive::isEmpty() const { 1160 return Data.getBufferSize() == getArchiveMagicLen(); 1161 } 1162 1163 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); } 1164 1165 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err) 1166 : Archive(Source, Err) { 1167 ErrorAsOutParameter ErrAsOutParam(&Err); 1168 StringRef Buffer = Data.getBuffer(); 1169 ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data()); 1170 1171 StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset); 1172 if (RawOffset.getAsInteger(10, FirstChildOffset)) 1173 // TODO: Out-of-line. 1174 Err = malformedError("malformed AIX big archive: first member offset \"" + 1175 RawOffset + "\" is not a number"); 1176 1177 RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset); 1178 if (RawOffset.getAsInteger(10, LastChildOffset)) 1179 // TODO: Out-of-line. 1180 Err = malformedError("malformed AIX big archive: last member offset \"" + 1181 RawOffset + "\" is not a number"); 1182 1183 child_iterator I = child_begin(Err, false); 1184 if (Err) 1185 return; 1186 child_iterator E = child_end(); 1187 if (I == E) { 1188 Err = Error::success(); 1189 return; 1190 } 1191 setFirstRegular(*I); 1192 Err = Error::success(); 1193 } 1194