xref: /freebsd/contrib/llvm-project/llvm/lib/Object/Archive.cpp (revision 81ad626541db97eb356e2c1d4a20eb2a26a766ab)
1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the ArchiveObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Object/Archive.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Error.h"
20 #include "llvm/Support/Chrono.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/Host.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/MemoryBuffer.h"
28 #include "llvm/Support/Path.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <algorithm>
31 #include <cassert>
32 #include <cstddef>
33 #include <cstdint>
34 #include <memory>
35 #include <string>
36 #include <system_error>
37 
38 using namespace llvm;
39 using namespace object;
40 using namespace llvm::support::endian;
41 
42 void Archive::anchor() {}
43 
44 static Error malformedError(Twine Msg) {
45   std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
46   return make_error<GenericBinaryError>(std::move(StringMsg),
47                                         object_error::parse_failed);
48 }
49 
50 static Error
51 createMemberHeaderParseError(const AbstractArchiveMemberHeader *ArMemHeader,
52                              const char *RawHeaderPtr, uint64_t Size) {
53   StringRef Msg("remaining size of archive too small for next archive "
54                 "member header ");
55 
56   Expected<StringRef> NameOrErr = ArMemHeader->getName(Size);
57   if (NameOrErr)
58     return malformedError(Msg + "for " + *NameOrErr);
59 
60   consumeError(NameOrErr.takeError());
61   uint64_t Offset = RawHeaderPtr - ArMemHeader->Parent->getData().data();
62   return malformedError(Msg + "at offset " + Twine(Offset));
63 }
64 
65 template <class T, std::size_t N>
66 StringRef getFieldRawString(const T (&Field)[N]) {
67   return StringRef(Field, N).rtrim(" ");
68 }
69 
70 template <class T>
71 StringRef CommonArchiveMemberHeader<T>::getRawAccessMode() const {
72   return getFieldRawString(ArMemHdr->AccessMode);
73 }
74 
75 template <class T>
76 StringRef CommonArchiveMemberHeader<T>::getRawLastModified() const {
77   return getFieldRawString(ArMemHdr->LastModified);
78 }
79 
80 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawUID() const {
81   return getFieldRawString(ArMemHdr->UID);
82 }
83 
84 template <class T> StringRef CommonArchiveMemberHeader<T>::getRawGID() const {
85   return getFieldRawString(ArMemHdr->GID);
86 }
87 
88 template <class T> uint64_t CommonArchiveMemberHeader<T>::getOffset() const {
89   return reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
90 }
91 
92 template class object::CommonArchiveMemberHeader<UnixArMemHdrType>;
93 template class object::CommonArchiveMemberHeader<BigArMemHdrType>;
94 
95 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
96                                          const char *RawHeaderPtr,
97                                          uint64_t Size, Error *Err)
98     : CommonArchiveMemberHeader<UnixArMemHdrType>(
99           Parent, reinterpret_cast<const UnixArMemHdrType *>(RawHeaderPtr)) {
100   if (RawHeaderPtr == nullptr)
101     return;
102   ErrorAsOutParameter ErrAsOutParam(Err);
103 
104   if (Size < getSizeOf()) {
105     *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
106     return;
107   }
108   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
109     if (Err) {
110       std::string Buf;
111       raw_string_ostream OS(Buf);
112       OS.write_escaped(
113           StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator)));
114       OS.flush();
115       std::string Msg("terminator characters in archive member \"" + Buf +
116                       "\" not the correct \"`\\n\" values for the archive "
117                       "member header ");
118       Expected<StringRef> NameOrErr = getName(Size);
119       if (!NameOrErr) {
120         consumeError(NameOrErr.takeError());
121         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
122         *Err = malformedError(Msg + "at offset " + Twine(Offset));
123       } else
124         *Err = malformedError(Msg + "for " + NameOrErr.get());
125     }
126     return;
127   }
128 }
129 
130 BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent,
131                                                const char *RawHeaderPtr,
132                                                uint64_t Size, Error *Err)
133     : CommonArchiveMemberHeader<BigArMemHdrType>(
134           Parent, reinterpret_cast<const BigArMemHdrType *>(RawHeaderPtr)) {
135   if (RawHeaderPtr == nullptr)
136     return;
137   ErrorAsOutParameter ErrAsOutParam(Err);
138 
139   if (Size < getSizeOf())
140     *Err = createMemberHeaderParseError(this, RawHeaderPtr, Size);
141 }
142 
143 // This gets the raw name from the ArMemHdr->Name field and checks that it is
144 // valid for the kind of archive.  If it is not valid it returns an Error.
145 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
146   char EndCond;
147   auto Kind = Parent->kind();
148   if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
149     if (ArMemHdr->Name[0] == ' ') {
150       uint64_t Offset =
151           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
152       return malformedError("name contains a leading space for archive member "
153                             "header at offset " +
154                             Twine(Offset));
155     }
156     EndCond = ' ';
157   } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
158     EndCond = ' ';
159   else
160     EndCond = '/';
161   StringRef::size_type end =
162       StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
163   if (end == StringRef::npos)
164     end = sizeof(ArMemHdr->Name);
165   assert(end <= sizeof(ArMemHdr->Name) && end > 0);
166   // Don't include the EndCond if there is one.
167   return StringRef(ArMemHdr->Name, end);
168 }
169 
170 Expected<uint64_t>
171 getArchiveMemberDecField(Twine FieldName, const StringRef RawField,
172                          const Archive *Parent,
173                          const AbstractArchiveMemberHeader *MemHeader) {
174   uint64_t Value;
175   if (RawField.getAsInteger(10, Value)) {
176     uint64_t Offset = MemHeader->getOffset();
177     return malformedError("characters in " + FieldName +
178                           " field in archive member header are not "
179                           "all decimal numbers: '" +
180                           RawField +
181                           "' for the archive "
182                           "member header at offset " +
183                           Twine(Offset));
184   }
185   return Value;
186 }
187 
188 Expected<uint64_t>
189 getArchiveMemberOctField(Twine FieldName, const StringRef RawField,
190                          const Archive *Parent,
191                          const AbstractArchiveMemberHeader *MemHeader) {
192   uint64_t Value;
193   if (RawField.getAsInteger(8, Value)) {
194     uint64_t Offset = MemHeader->getOffset();
195     return malformedError("characters in " + FieldName +
196                           " field in archive member header are not "
197                           "all octal numbers: '" +
198                           RawField +
199                           "' for the archive "
200                           "member header at offset " +
201                           Twine(Offset));
202   }
203   return Value;
204 }
205 
206 Expected<StringRef> BigArchiveMemberHeader::getRawName() const {
207   Expected<uint64_t> NameLenOrErr = getArchiveMemberDecField(
208       "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
209   if (!NameLenOrErr)
210     // TODO: Out-of-line.
211     return NameLenOrErr.takeError();
212   uint64_t NameLen = NameLenOrErr.get();
213 
214   // If the name length is odd, pad with '\0' to get an even length. After
215   // padding, there is the name terminator "`\n".
216   uint64_t NameLenWithPadding = alignTo(NameLen, 2);
217   StringRef NameTerminator = "`\n";
218   StringRef NameStringWithNameTerminator =
219       StringRef(ArMemHdr->Name, NameLenWithPadding + NameTerminator.size());
220   if (!NameStringWithNameTerminator.endswith(NameTerminator)) {
221     uint64_t Offset =
222         reinterpret_cast<const char *>(ArMemHdr->Name + NameLenWithPadding) -
223         Parent->getData().data();
224     // TODO: Out-of-line.
225     return malformedError(
226         "name does not have name terminator \"`\\n\" for archive member"
227         "header at offset " +
228         Twine(Offset));
229   }
230   return StringRef(ArMemHdr->Name, NameLen);
231 }
232 
233 // member including the header, so the size of any name following the header
234 // is checked to make sure it does not overflow.
235 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
236 
237   // This can be called from the ArchiveMemberHeader constructor when the
238   // archive header is truncated to produce an error message with the name.
239   // Make sure the name field is not truncated.
240   if (Size < offsetof(UnixArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
241     uint64_t ArchiveOffset =
242         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
243     return malformedError("archive header truncated before the name field "
244                           "for archive member header at offset " +
245                           Twine(ArchiveOffset));
246   }
247 
248   // The raw name itself can be invalid.
249   Expected<StringRef> NameOrErr = getRawName();
250   if (!NameOrErr)
251     return NameOrErr.takeError();
252   StringRef Name = NameOrErr.get();
253 
254   // Check if it's a special name.
255   if (Name[0] == '/') {
256     if (Name.size() == 1) // Linker member.
257       return Name;
258     if (Name.size() == 2 && Name[1] == '/') // String table.
259       return Name;
260     // System libraries from the Windows SDK for Windows 11 contain this symbol.
261     // It looks like a CFG guard: we just skip it for now.
262     if (Name.equals("/<XFGHASHMAP>/"))
263       return Name;
264     // Some libraries (e.g., arm64rt.lib) from the Windows WDK
265     // (version 10.0.22000.0) contain this undocumented special member.
266     if (Name.equals("/<ECSYMBOLS>/"))
267       return Name;
268     // It's a long name.
269     // Get the string table offset.
270     std::size_t StringOffset;
271     if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
272       std::string Buf;
273       raw_string_ostream OS(Buf);
274       OS.write_escaped(Name.substr(1).rtrim(' '));
275       OS.flush();
276       uint64_t ArchiveOffset =
277           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
278       return malformedError("long name offset characters after the '/' are "
279                             "not all decimal numbers: '" +
280                             Buf + "' for archive member header at offset " +
281                             Twine(ArchiveOffset));
282     }
283 
284     // Verify it.
285     if (StringOffset >= Parent->getStringTable().size()) {
286       uint64_t ArchiveOffset =
287           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
288       return malformedError("long name offset " + Twine(StringOffset) +
289                             " past the end of the string table for archive "
290                             "member header at offset " +
291                             Twine(ArchiveOffset));
292     }
293 
294     // GNU long file names end with a "/\n".
295     if (Parent->kind() == Archive::K_GNU ||
296         Parent->kind() == Archive::K_GNU64) {
297       size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
298       if (End == StringRef::npos || End < 1 ||
299           Parent->getStringTable()[End - 1] != '/') {
300         return malformedError("string table at long name offset " +
301                               Twine(StringOffset) + "not terminated");
302       }
303       return Parent->getStringTable().slice(StringOffset, End - 1);
304     }
305     return Parent->getStringTable().begin() + StringOffset;
306   }
307 
308   if (Name.startswith("#1/")) {
309     uint64_t NameLength;
310     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
311       std::string Buf;
312       raw_string_ostream OS(Buf);
313       OS.write_escaped(Name.substr(3).rtrim(' '));
314       OS.flush();
315       uint64_t ArchiveOffset =
316           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
317       return malformedError("long name length characters after the #1/ are "
318                             "not all decimal numbers: '" +
319                             Buf + "' for archive member header at offset " +
320                             Twine(ArchiveOffset));
321     }
322     if (getSizeOf() + NameLength > Size) {
323       uint64_t ArchiveOffset =
324           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
325       return malformedError("long name length: " + Twine(NameLength) +
326                             " extends past the end of the member or archive "
327                             "for archive member header at offset " +
328                             Twine(ArchiveOffset));
329     }
330     return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
331                      NameLength)
332         .rtrim('\0');
333   }
334 
335   // It is not a long name so trim the blanks at the end of the name.
336   if (Name[Name.size() - 1] != '/')
337     return Name.rtrim(' ');
338 
339   // It's a simple name.
340   return Name.drop_back(1);
341 }
342 
343 Expected<StringRef> BigArchiveMemberHeader::getName(uint64_t Size) const {
344   return getRawName();
345 }
346 
347 Expected<uint64_t> ArchiveMemberHeader::getSize() const {
348   return getArchiveMemberDecField("size", getFieldRawString(ArMemHdr->Size),
349                                   Parent, this);
350 }
351 
352 Expected<uint64_t> BigArchiveMemberHeader::getSize() const {
353   Expected<uint64_t> SizeOrErr = getArchiveMemberDecField(
354       "size", getFieldRawString(ArMemHdr->Size), Parent, this);
355   if (!SizeOrErr)
356     return SizeOrErr.takeError();
357 
358   Expected<uint64_t> NameLenOrErr = getRawNameSize();
359   if (!NameLenOrErr)
360     return NameLenOrErr.takeError();
361 
362   return *SizeOrErr + alignTo(*NameLenOrErr, 2);
363 }
364 
365 Expected<uint64_t> BigArchiveMemberHeader::getRawNameSize() const {
366   return getArchiveMemberDecField(
367       "NameLen", getFieldRawString(ArMemHdr->NameLen), Parent, this);
368 }
369 
370 Expected<uint64_t> BigArchiveMemberHeader::getNextOffset() const {
371   return getArchiveMemberDecField(
372       "NextOffset", getFieldRawString(ArMemHdr->NextOffset), Parent, this);
373 }
374 
375 Expected<sys::fs::perms> AbstractArchiveMemberHeader::getAccessMode() const {
376   Expected<uint64_t> AccessModeOrErr =
377       getArchiveMemberOctField("AccessMode", getRawAccessMode(), Parent, this);
378   if (!AccessModeOrErr)
379     return AccessModeOrErr.takeError();
380   return static_cast<sys::fs::perms>(*AccessModeOrErr);
381 }
382 
383 Expected<sys::TimePoint<std::chrono::seconds>>
384 AbstractArchiveMemberHeader::getLastModified() const {
385   Expected<uint64_t> SecondsOrErr = getArchiveMemberDecField(
386       "LastModified", getRawLastModified(), Parent, this);
387 
388   if (!SecondsOrErr)
389     return SecondsOrErr.takeError();
390 
391   return sys::toTimePoint(*SecondsOrErr);
392 }
393 
394 Expected<unsigned> AbstractArchiveMemberHeader::getUID() const {
395   StringRef User = getRawUID();
396   if (User.empty())
397     return 0;
398   return getArchiveMemberDecField("UID", User, Parent, this);
399 }
400 
401 Expected<unsigned> AbstractArchiveMemberHeader::getGID() const {
402   StringRef Group = getRawGID();
403   if (Group.empty())
404     return 0;
405   return getArchiveMemberDecField("GID", Group, Parent, this);
406 }
407 
408 Expected<bool> ArchiveMemberHeader::isThin() const {
409   Expected<StringRef> NameOrErr = getRawName();
410   if (!NameOrErr)
411     return NameOrErr.takeError();
412   StringRef Name = NameOrErr.get();
413   return Parent->isThin() && Name != "/" && Name != "//" && Name != "/SYM64/";
414 }
415 
416 Expected<const char *> ArchiveMemberHeader::getNextChildLoc() const {
417   uint64_t Size = getSizeOf();
418   Expected<bool> isThinOrErr = isThin();
419   if (!isThinOrErr)
420     return isThinOrErr.takeError();
421 
422   bool isThin = isThinOrErr.get();
423   if (!isThin) {
424     Expected<uint64_t> MemberSize = getSize();
425     if (!MemberSize)
426       return MemberSize.takeError();
427 
428     Size += MemberSize.get();
429   }
430 
431   // If Size is odd, add 1 to make it even.
432   const char *NextLoc =
433       reinterpret_cast<const char *>(ArMemHdr) + alignTo(Size, 2);
434 
435   if (NextLoc == Parent->getMemoryBufferRef().getBufferEnd())
436     return nullptr;
437 
438   return NextLoc;
439 }
440 
441 Expected<const char *> BigArchiveMemberHeader::getNextChildLoc() const {
442   if (getOffset() ==
443       static_cast<const BigArchive *>(Parent)->getLastChildOffset())
444     return nullptr;
445 
446   Expected<uint64_t> NextOffsetOrErr = getNextOffset();
447   if (!NextOffsetOrErr)
448     return NextOffsetOrErr.takeError();
449   return Parent->getData().data() + NextOffsetOrErr.get();
450 }
451 
452 Archive::Child::Child(const Archive *Parent, StringRef Data,
453                       uint16_t StartOfFile)
454     : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {
455   Header = Parent->createArchiveMemberHeader(Data.data(), Data.size(), nullptr);
456 }
457 
458 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
459     : Parent(Parent) {
460   if (!Start) {
461     Header = nullptr;
462     return;
463   }
464 
465   Header = Parent->createArchiveMemberHeader(
466       Start,
467       Parent ? Parent->getData().size() - (Start - Parent->getData().data())
468              : 0,
469       Err);
470 
471   // If we are pointed to real data, Start is not a nullptr, then there must be
472   // a non-null Err pointer available to report malformed data on.  Only in
473   // the case sentinel value is being constructed is Err is permitted to be a
474   // nullptr.
475   assert(Err && "Err can't be nullptr if Start is not a nullptr");
476 
477   ErrorAsOutParameter ErrAsOutParam(Err);
478 
479   // If there was an error in the construction of the Header
480   // then just return with the error now set.
481   if (*Err)
482     return;
483 
484   uint64_t Size = Header->getSizeOf();
485   Data = StringRef(Start, Size);
486   Expected<bool> isThinOrErr = isThinMember();
487   if (!isThinOrErr) {
488     *Err = isThinOrErr.takeError();
489     return;
490   }
491   bool isThin = isThinOrErr.get();
492   if (!isThin) {
493     Expected<uint64_t> MemberSize = getRawSize();
494     if (!MemberSize) {
495       *Err = MemberSize.takeError();
496       return;
497     }
498     Size += MemberSize.get();
499     Data = StringRef(Start, Size);
500   }
501 
502   // Setup StartOfFile and PaddingBytes.
503   StartOfFile = Header->getSizeOf();
504   // Don't include attached name.
505   Expected<StringRef> NameOrErr = getRawName();
506   if (!NameOrErr) {
507     *Err = NameOrErr.takeError();
508     return;
509   }
510   StringRef Name = NameOrErr.get();
511 
512   if (Parent->kind() == Archive::K_AIXBIG) {
513     // The actual start of the file is after the name and any necessary
514     // even-alignment padding.
515     StartOfFile += ((Name.size() + 1) >> 1) << 1;
516   } else if (Name.startswith("#1/")) {
517     uint64_t NameSize;
518     StringRef RawNameSize = Name.substr(3).rtrim(' ');
519     if (RawNameSize.getAsInteger(10, NameSize)) {
520       uint64_t Offset = Start - Parent->getData().data();
521       *Err = malformedError("long name length characters after the #1/ are "
522                             "not all decimal numbers: '" +
523                             RawNameSize +
524                             "' for archive member header at offset " +
525                             Twine(Offset));
526       return;
527     }
528     StartOfFile += NameSize;
529   }
530 }
531 
532 Expected<uint64_t> Archive::Child::getSize() const {
533   if (Parent->IsThin)
534     return Header->getSize();
535   return Data.size() - StartOfFile;
536 }
537 
538 Expected<uint64_t> Archive::Child::getRawSize() const {
539   return Header->getSize();
540 }
541 
542 Expected<bool> Archive::Child::isThinMember() const { return Header->isThin(); }
543 
544 Expected<std::string> Archive::Child::getFullName() const {
545   Expected<bool> isThin = isThinMember();
546   if (!isThin)
547     return isThin.takeError();
548   assert(isThin.get());
549   Expected<StringRef> NameOrErr = getName();
550   if (!NameOrErr)
551     return NameOrErr.takeError();
552   StringRef Name = *NameOrErr;
553   if (sys::path::is_absolute(Name))
554     return std::string(Name);
555 
556   SmallString<128> FullName = sys::path::parent_path(
557       Parent->getMemoryBufferRef().getBufferIdentifier());
558   sys::path::append(FullName, Name);
559   return std::string(FullName.str());
560 }
561 
562 Expected<StringRef> Archive::Child::getBuffer() const {
563   Expected<bool> isThinOrErr = isThinMember();
564   if (!isThinOrErr)
565     return isThinOrErr.takeError();
566   bool isThin = isThinOrErr.get();
567   if (!isThin) {
568     Expected<uint64_t> Size = getSize();
569     if (!Size)
570       return Size.takeError();
571     return StringRef(Data.data() + StartOfFile, Size.get());
572   }
573   Expected<std::string> FullNameOrErr = getFullName();
574   if (!FullNameOrErr)
575     return FullNameOrErr.takeError();
576   const std::string &FullName = *FullNameOrErr;
577   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
578   if (std::error_code EC = Buf.getError())
579     return errorCodeToError(EC);
580   Parent->ThinBuffers.push_back(std::move(*Buf));
581   return Parent->ThinBuffers.back()->getBuffer();
582 }
583 
584 Expected<Archive::Child> Archive::Child::getNext() const {
585   Expected<const char *> NextLocOrErr = Header->getNextChildLoc();
586   if (!NextLocOrErr)
587     return NextLocOrErr.takeError();
588 
589   const char *NextLoc = *NextLocOrErr;
590 
591   // Check to see if this is at the end of the archive.
592   if (NextLoc == nullptr)
593     return Child(nullptr, nullptr, nullptr);
594 
595   // Check to see if this is past the end of the archive.
596   if (NextLoc > Parent->Data.getBufferEnd()) {
597     std::string Msg("offset to next archive member past the end of the archive "
598                     "after member ");
599     Expected<StringRef> NameOrErr = getName();
600     if (!NameOrErr) {
601       consumeError(NameOrErr.takeError());
602       uint64_t Offset = Data.data() - Parent->getData().data();
603       return malformedError(Msg + "at offset " + Twine(Offset));
604     } else
605       return malformedError(Msg + NameOrErr.get());
606   }
607 
608   Error Err = Error::success();
609   Child Ret(Parent, NextLoc, &Err);
610   if (Err)
611     return std::move(Err);
612   return Ret;
613 }
614 
615 uint64_t Archive::Child::getChildOffset() const {
616   const char *a = Parent->Data.getBuffer().data();
617   const char *c = Data.data();
618   uint64_t offset = c - a;
619   return offset;
620 }
621 
622 Expected<StringRef> Archive::Child::getName() const {
623   Expected<uint64_t> RawSizeOrErr = getRawSize();
624   if (!RawSizeOrErr)
625     return RawSizeOrErr.takeError();
626   uint64_t RawSize = RawSizeOrErr.get();
627   Expected<StringRef> NameOrErr =
628       Header->getName(Header->getSizeOf() + RawSize);
629   if (!NameOrErr)
630     return NameOrErr.takeError();
631   StringRef Name = NameOrErr.get();
632   return Name;
633 }
634 
635 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
636   Expected<StringRef> NameOrErr = getName();
637   if (!NameOrErr)
638     return NameOrErr.takeError();
639   StringRef Name = NameOrErr.get();
640   Expected<StringRef> Buf = getBuffer();
641   if (!Buf)
642     return createFileError(Name, Buf.takeError());
643   return MemoryBufferRef(*Buf, Name);
644 }
645 
646 Expected<std::unique_ptr<Binary>>
647 Archive::Child::getAsBinary(LLVMContext *Context) const {
648   Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
649   if (!BuffOrErr)
650     return BuffOrErr.takeError();
651 
652   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
653   if (BinaryOrErr)
654     return std::move(*BinaryOrErr);
655   return BinaryOrErr.takeError();
656 }
657 
658 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
659   Error Err = Error::success();
660   std::unique_ptr<Archive> Ret;
661   StringRef Buffer = Source.getBuffer();
662 
663   if (Buffer.startswith(BigArchiveMagic))
664     Ret = std::make_unique<BigArchive>(Source, Err);
665   else
666     Ret = std::make_unique<Archive>(Source, Err);
667 
668   if (Err)
669     return std::move(Err);
670   return std::move(Ret);
671 }
672 
673 std::unique_ptr<AbstractArchiveMemberHeader>
674 Archive::createArchiveMemberHeader(const char *RawHeaderPtr, uint64_t Size,
675                                    Error *Err) const {
676   ErrorAsOutParameter ErrAsOutParam(Err);
677   if (kind() != K_AIXBIG)
678     return std::make_unique<ArchiveMemberHeader>(this, RawHeaderPtr, Size, Err);
679   return std::make_unique<BigArchiveMemberHeader>(this, RawHeaderPtr, Size,
680                                                   Err);
681 }
682 
683 uint64_t Archive::getArchiveMagicLen() const {
684   if (isThin())
685     return sizeof(ThinArchiveMagic) - 1;
686 
687   if (Kind() == K_AIXBIG)
688     return sizeof(BigArchiveMagic) - 1;
689 
690   return sizeof(ArchiveMagic) - 1;
691 }
692 
693 void Archive::setFirstRegular(const Child &C) {
694   FirstRegularData = C.Data;
695   FirstRegularStartOfFile = C.StartOfFile;
696 }
697 
698 Archive::Archive(MemoryBufferRef Source, Error &Err)
699     : Binary(Binary::ID_Archive, Source) {
700   ErrorAsOutParameter ErrAsOutParam(&Err);
701   StringRef Buffer = Data.getBuffer();
702   // Check for sufficient magic.
703   if (Buffer.startswith(ThinArchiveMagic)) {
704     IsThin = true;
705   } else if (Buffer.startswith(ArchiveMagic)) {
706     IsThin = false;
707   } else if (Buffer.startswith(BigArchiveMagic)) {
708     Format = K_AIXBIG;
709     IsThin = false;
710     return;
711   } else {
712     Err = make_error<GenericBinaryError>("file too small to be an archive",
713                                          object_error::invalid_file_type);
714     return;
715   }
716 
717   // Make sure Format is initialized before any call to
718   // ArchiveMemberHeader::getName() is made.  This could be a valid empty
719   // archive which is the same in all formats.  So claiming it to be gnu to is
720   // fine if not totally correct before we look for a string table or table of
721   // contents.
722   Format = K_GNU;
723 
724   // Get the special members.
725   child_iterator I = child_begin(Err, false);
726   if (Err)
727     return;
728   child_iterator E = child_end();
729 
730   // See if this is a valid empty archive and if so return.
731   if (I == E) {
732     Err = Error::success();
733     return;
734   }
735   const Child *C = &*I;
736 
737   auto Increment = [&]() {
738     ++I;
739     if (Err)
740       return true;
741     C = &*I;
742     return false;
743   };
744 
745   Expected<StringRef> NameOrErr = C->getRawName();
746   if (!NameOrErr) {
747     Err = NameOrErr.takeError();
748     return;
749   }
750   StringRef Name = NameOrErr.get();
751 
752   // Below is the pattern that is used to figure out the archive format
753   // GNU archive format
754   //  First member : / (may exist, if it exists, points to the symbol table )
755   //  Second member : // (may exist, if it exists, points to the string table)
756   //  Note : The string table is used if the filename exceeds 15 characters
757   // BSD archive format
758   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
759   //  There is no string table, if the filename exceeds 15 characters or has a
760   //  embedded space, the filename has #1/<size>, The size represents the size
761   //  of the filename that needs to be read after the archive header
762   // COFF archive format
763   //  First member : /
764   //  Second member : / (provides a directory of symbols)
765   //  Third member : // (may exist, if it exists, contains the string table)
766   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
767   //  even if the string table is empty. However, lib.exe does not in fact
768   //  seem to create the third member if there's no member whose filename
769   //  exceeds 15 characters. So the third member is optional.
770 
771   if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
772     if (Name == "__.SYMDEF")
773       Format = K_BSD;
774     else // Name == "__.SYMDEF_64"
775       Format = K_DARWIN64;
776     // We know that the symbol table is not an external file, but we still must
777     // check any Expected<> return value.
778     Expected<StringRef> BufOrErr = C->getBuffer();
779     if (!BufOrErr) {
780       Err = BufOrErr.takeError();
781       return;
782     }
783     SymbolTable = BufOrErr.get();
784     if (Increment())
785       return;
786     setFirstRegular(*C);
787 
788     Err = Error::success();
789     return;
790   }
791 
792   if (Name.startswith("#1/")) {
793     Format = K_BSD;
794     // We know this is BSD, so getName will work since there is no string table.
795     Expected<StringRef> NameOrErr = C->getName();
796     if (!NameOrErr) {
797       Err = NameOrErr.takeError();
798       return;
799     }
800     Name = NameOrErr.get();
801     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
802       // We know that the symbol table is not an external file, but we still
803       // must check any Expected<> return value.
804       Expected<StringRef> BufOrErr = C->getBuffer();
805       if (!BufOrErr) {
806         Err = BufOrErr.takeError();
807         return;
808       }
809       SymbolTable = BufOrErr.get();
810       if (Increment())
811         return;
812     } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
813       Format = K_DARWIN64;
814       // We know that the symbol table is not an external file, but we still
815       // must check any Expected<> return value.
816       Expected<StringRef> BufOrErr = C->getBuffer();
817       if (!BufOrErr) {
818         Err = BufOrErr.takeError();
819         return;
820       }
821       SymbolTable = BufOrErr.get();
822       if (Increment())
823         return;
824     }
825     setFirstRegular(*C);
826     return;
827   }
828 
829   // MIPS 64-bit ELF archives use a special format of a symbol table.
830   // This format is marked by `ar_name` field equals to "/SYM64/".
831   // For detailed description see page 96 in the following document:
832   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
833 
834   bool has64SymTable = false;
835   if (Name == "/" || Name == "/SYM64/") {
836     // We know that the symbol table is not an external file, but we still
837     // must check any Expected<> return value.
838     Expected<StringRef> BufOrErr = C->getBuffer();
839     if (!BufOrErr) {
840       Err = BufOrErr.takeError();
841       return;
842     }
843     SymbolTable = BufOrErr.get();
844     if (Name == "/SYM64/")
845       has64SymTable = true;
846 
847     if (Increment())
848       return;
849     if (I == E) {
850       Err = Error::success();
851       return;
852     }
853     Expected<StringRef> NameOrErr = C->getRawName();
854     if (!NameOrErr) {
855       Err = NameOrErr.takeError();
856       return;
857     }
858     Name = NameOrErr.get();
859   }
860 
861   if (Name == "//") {
862     Format = has64SymTable ? K_GNU64 : K_GNU;
863     // The string table is never an external member, but we still
864     // must check any Expected<> return value.
865     Expected<StringRef> BufOrErr = C->getBuffer();
866     if (!BufOrErr) {
867       Err = BufOrErr.takeError();
868       return;
869     }
870     StringTable = BufOrErr.get();
871     if (Increment())
872       return;
873     setFirstRegular(*C);
874     Err = Error::success();
875     return;
876   }
877 
878   if (Name[0] != '/') {
879     Format = has64SymTable ? K_GNU64 : K_GNU;
880     setFirstRegular(*C);
881     Err = Error::success();
882     return;
883   }
884 
885   if (Name != "/") {
886     Err = errorCodeToError(object_error::parse_failed);
887     return;
888   }
889 
890   Format = K_COFF;
891   // We know that the symbol table is not an external file, but we still
892   // must check any Expected<> return value.
893   Expected<StringRef> BufOrErr = C->getBuffer();
894   if (!BufOrErr) {
895     Err = BufOrErr.takeError();
896     return;
897   }
898   SymbolTable = BufOrErr.get();
899 
900   if (Increment())
901     return;
902 
903   if (I == E) {
904     setFirstRegular(*C);
905     Err = Error::success();
906     return;
907   }
908 
909   NameOrErr = C->getRawName();
910   if (!NameOrErr) {
911     Err = NameOrErr.takeError();
912     return;
913   }
914   Name = NameOrErr.get();
915 
916   if (Name == "//") {
917     // The string table is never an external member, but we still
918     // must check any Expected<> return value.
919     Expected<StringRef> BufOrErr = C->getBuffer();
920     if (!BufOrErr) {
921       Err = BufOrErr.takeError();
922       return;
923     }
924     StringTable = BufOrErr.get();
925     if (Increment())
926       return;
927   }
928 
929   setFirstRegular(*C);
930   Err = Error::success();
931 }
932 
933 object::Archive::Kind Archive::getDefaultKindForHost() {
934   Triple HostTriple(sys::getProcessTriple());
935   return HostTriple.isOSDarwin()
936              ? object::Archive::K_DARWIN
937              : (HostTriple.isOSAIX() ? object::Archive::K_AIXBIG
938                                      : object::Archive::K_GNU);
939 }
940 
941 Archive::child_iterator Archive::child_begin(Error &Err,
942                                              bool SkipInternal) const {
943   if (isEmpty())
944     return child_end();
945 
946   if (SkipInternal)
947     return child_iterator::itr(
948         Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
949 
950   const char *Loc = Data.getBufferStart() + getFirstChildOffset();
951   Child C(this, Loc, &Err);
952   if (Err)
953     return child_end();
954   return child_iterator::itr(C, Err);
955 }
956 
957 Archive::child_iterator Archive::child_end() const {
958   return child_iterator::end(Child(nullptr, nullptr, nullptr));
959 }
960 
961 StringRef Archive::Symbol::getName() const {
962   return Parent->getSymbolTable().begin() + StringIndex;
963 }
964 
965 Expected<Archive::Child> Archive::Symbol::getMember() const {
966   const char *Buf = Parent->getSymbolTable().begin();
967   const char *Offsets = Buf;
968   if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
969     Offsets += sizeof(uint64_t);
970   else
971     Offsets += sizeof(uint32_t);
972   uint64_t Offset = 0;
973   if (Parent->kind() == K_GNU) {
974     Offset = read32be(Offsets + SymbolIndex * 4);
975   } else if (Parent->kind() == K_GNU64) {
976     Offset = read64be(Offsets + SymbolIndex * 8);
977   } else if (Parent->kind() == K_BSD) {
978     // The SymbolIndex is an index into the ranlib structs that start at
979     // Offsets (the first uint32_t is the number of bytes of the ranlib
980     // structs).  The ranlib structs are a pair of uint32_t's the first
981     // being a string table offset and the second being the offset into
982     // the archive of the member that defines the symbol.  Which is what
983     // is needed here.
984     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
985   } else if (Parent->kind() == K_DARWIN64) {
986     // The SymbolIndex is an index into the ranlib_64 structs that start at
987     // Offsets (the first uint64_t is the number of bytes of the ranlib_64
988     // structs).  The ranlib_64 structs are a pair of uint64_t's the first
989     // being a string table offset and the second being the offset into
990     // the archive of the member that defines the symbol.  Which is what
991     // is needed here.
992     Offset = read64le(Offsets + SymbolIndex * 16 + 8);
993   } else {
994     // Skip offsets.
995     uint32_t MemberCount = read32le(Buf);
996     Buf += MemberCount * 4 + 4;
997 
998     uint32_t SymbolCount = read32le(Buf);
999     if (SymbolIndex >= SymbolCount)
1000       return errorCodeToError(object_error::parse_failed);
1001 
1002     // Skip SymbolCount to get to the indices table.
1003     const char *Indices = Buf + 4;
1004 
1005     // Get the index of the offset in the file member offset table for this
1006     // symbol.
1007     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
1008     // Subtract 1 since OffsetIndex is 1 based.
1009     --OffsetIndex;
1010 
1011     if (OffsetIndex >= MemberCount)
1012       return errorCodeToError(object_error::parse_failed);
1013 
1014     Offset = read32le(Offsets + OffsetIndex * 4);
1015   }
1016 
1017   const char *Loc = Parent->getData().begin() + Offset;
1018   Error Err = Error::success();
1019   Child C(Parent, Loc, &Err);
1020   if (Err)
1021     return std::move(Err);
1022   return C;
1023 }
1024 
1025 Archive::Symbol Archive::Symbol::getNext() const {
1026   Symbol t(*this);
1027   if (Parent->kind() == K_BSD) {
1028     // t.StringIndex is an offset from the start of the __.SYMDEF or
1029     // "__.SYMDEF SORTED" member into the string table for the ranlib
1030     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
1031     // offset in the string table for t.SymbolIndex+1 we subtract the
1032     // its offset from the start of the string table for t.SymbolIndex
1033     // and add the offset of the string table for t.SymbolIndex+1.
1034 
1035     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
1036     // which is the number of bytes of ranlib structs that follow.  The ranlib
1037     // structs are a pair of uint32_t's the first being a string table offset
1038     // and the second being the offset into the archive of the member that
1039     // define the symbol. After that the next uint32_t is the byte count of
1040     // the string table followed by the string table.
1041     const char *Buf = Parent->getSymbolTable().begin();
1042     uint32_t RanlibCount = 0;
1043     RanlibCount = read32le(Buf) / 8;
1044     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
1045     // don't change the t.StringIndex as we don't want to reference a ranlib
1046     // past RanlibCount.
1047     if (t.SymbolIndex + 1 < RanlibCount) {
1048       const char *Ranlibs = Buf + 4;
1049       uint32_t CurRanStrx = 0;
1050       uint32_t NextRanStrx = 0;
1051       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
1052       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
1053       t.StringIndex -= CurRanStrx;
1054       t.StringIndex += NextRanStrx;
1055     }
1056   } else {
1057     // Go to one past next null.
1058     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
1059   }
1060   ++t.SymbolIndex;
1061   return t;
1062 }
1063 
1064 Archive::symbol_iterator Archive::symbol_begin() const {
1065   if (!hasSymbolTable())
1066     return symbol_iterator(Symbol(this, 0, 0));
1067 
1068   const char *buf = getSymbolTable().begin();
1069   if (kind() == K_GNU) {
1070     uint32_t symbol_count = 0;
1071     symbol_count = read32be(buf);
1072     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
1073   } else if (kind() == K_GNU64) {
1074     uint64_t symbol_count = read64be(buf);
1075     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
1076   } else if (kind() == K_BSD) {
1077     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
1078     // which is the number of bytes of ranlib structs that follow.  The ranlib
1079     // structs are a pair of uint32_t's the first being a string table offset
1080     // and the second being the offset into the archive of the member that
1081     // define the symbol. After that the next uint32_t is the byte count of
1082     // the string table followed by the string table.
1083     uint32_t ranlib_count = 0;
1084     ranlib_count = read32le(buf) / 8;
1085     const char *ranlibs = buf + 4;
1086     uint32_t ran_strx = 0;
1087     ran_strx = read32le(ranlibs);
1088     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
1089     // Skip the byte count of the string table.
1090     buf += sizeof(uint32_t);
1091     buf += ran_strx;
1092   } else if (kind() == K_DARWIN64) {
1093     // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
1094     // which is the number of bytes of ranlib_64 structs that follow.  The
1095     // ranlib_64 structs are a pair of uint64_t's the first being a string
1096     // table offset and the second being the offset into the archive of the
1097     // member that define the symbol. After that the next uint64_t is the byte
1098     // count of the string table followed by the string table.
1099     uint64_t ranlib_count = 0;
1100     ranlib_count = read64le(buf) / 16;
1101     const char *ranlibs = buf + 8;
1102     uint64_t ran_strx = 0;
1103     ran_strx = read64le(ranlibs);
1104     buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
1105     // Skip the byte count of the string table.
1106     buf += sizeof(uint64_t);
1107     buf += ran_strx;
1108   } else {
1109     uint32_t member_count = 0;
1110     uint32_t symbol_count = 0;
1111     member_count = read32le(buf);
1112     buf += 4 + (member_count * 4); // Skip offsets.
1113     symbol_count = read32le(buf);
1114     buf += 4 + (symbol_count * 2); // Skip indices.
1115   }
1116   uint32_t string_start_offset = buf - getSymbolTable().begin();
1117   return symbol_iterator(Symbol(this, 0, string_start_offset));
1118 }
1119 
1120 Archive::symbol_iterator Archive::symbol_end() const {
1121   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
1122 }
1123 
1124 uint32_t Archive::getNumberOfSymbols() const {
1125   if (!hasSymbolTable())
1126     return 0;
1127   const char *buf = getSymbolTable().begin();
1128   if (kind() == K_GNU)
1129     return read32be(buf);
1130   if (kind() == K_GNU64)
1131     return read64be(buf);
1132   if (kind() == K_BSD)
1133     return read32le(buf) / 8;
1134   if (kind() == K_DARWIN64)
1135     return read64le(buf) / 16;
1136   uint32_t member_count = 0;
1137   member_count = read32le(buf);
1138   buf += 4 + (member_count * 4); // Skip offsets.
1139   return read32le(buf);
1140 }
1141 
1142 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
1143   Archive::symbol_iterator bs = symbol_begin();
1144   Archive::symbol_iterator es = symbol_end();
1145 
1146   for (; bs != es; ++bs) {
1147     StringRef SymName = bs->getName();
1148     if (SymName == name) {
1149       if (auto MemberOrErr = bs->getMember())
1150         return Child(*MemberOrErr);
1151       else
1152         return MemberOrErr.takeError();
1153     }
1154   }
1155   return Optional<Child>();
1156 }
1157 
1158 // Returns true if archive file contains no member file.
1159 bool Archive::isEmpty() const {
1160   return Data.getBufferSize() == getArchiveMagicLen();
1161 }
1162 
1163 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
1164 
1165 BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
1166     : Archive(Source, Err) {
1167   ErrorAsOutParameter ErrAsOutParam(&Err);
1168   StringRef Buffer = Data.getBuffer();
1169   ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data());
1170 
1171   StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset);
1172   if (RawOffset.getAsInteger(10, FirstChildOffset))
1173     // TODO: Out-of-line.
1174     Err = malformedError("malformed AIX big archive: first member offset \"" +
1175                          RawOffset + "\" is not a number");
1176 
1177   RawOffset = getFieldRawString(ArFixLenHdr->LastChildOffset);
1178   if (RawOffset.getAsInteger(10, LastChildOffset))
1179     // TODO: Out-of-line.
1180     Err = malformedError("malformed AIX big archive: last member offset \"" +
1181                          RawOffset + "\" is not a number");
1182 
1183   child_iterator I = child_begin(Err, false);
1184   if (Err)
1185     return;
1186   child_iterator E = child_end();
1187   if (I == E) {
1188     Err = Error::success();
1189     return;
1190   }
1191   setFirstRegular(*I);
1192   Err = Error::success();
1193 }
1194