xref: /freebsd/contrib/llvm-project/llvm/lib/Object/Archive.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the ArchiveObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Object/Archive.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Error.h"
20 #include "llvm/Support/Chrono.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstring>
33 #include <memory>
34 #include <string>
35 #include <system_error>
36 
37 using namespace llvm;
38 using namespace object;
39 using namespace llvm::support::endian;
40 
41 const char Magic[] = "!<arch>\n";
42 const char ThinMagic[] = "!<thin>\n";
43 
44 void Archive::anchor() {}
45 
46 static Error malformedError(Twine Msg) {
47   std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
48   return make_error<GenericBinaryError>(std::move(StringMsg),
49                                         object_error::parse_failed);
50 }
51 
52 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
53                                          const char *RawHeaderPtr,
54                                          uint64_t Size, Error *Err)
55     : Parent(Parent),
56       ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
57   if (RawHeaderPtr == nullptr)
58     return;
59   ErrorAsOutParameter ErrAsOutParam(Err);
60 
61   if (Size < sizeof(ArMemHdrType)) {
62     if (Err) {
63       std::string Msg("remaining size of archive too small for next archive "
64                       "member header ");
65       Expected<StringRef> NameOrErr = getName(Size);
66       if (!NameOrErr) {
67         consumeError(NameOrErr.takeError());
68         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
69         *Err = malformedError(Msg + "at offset " + Twine(Offset));
70       } else
71         *Err = malformedError(Msg + "for " + NameOrErr.get());
72     }
73     return;
74   }
75   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
76     if (Err) {
77       std::string Buf;
78       raw_string_ostream OS(Buf);
79       OS.write_escaped(
80           StringRef(ArMemHdr->Terminator, sizeof(ArMemHdr->Terminator)));
81       OS.flush();
82       std::string Msg("terminator characters in archive member \"" + Buf +
83                       "\" not the correct \"`\\n\" values for the archive "
84                       "member header ");
85       Expected<StringRef> NameOrErr = getName(Size);
86       if (!NameOrErr) {
87         consumeError(NameOrErr.takeError());
88         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
89         *Err = malformedError(Msg + "at offset " + Twine(Offset));
90       } else
91         *Err = malformedError(Msg + "for " + NameOrErr.get());
92     }
93     return;
94   }
95 }
96 
97 // This gets the raw name from the ArMemHdr->Name field and checks that it is
98 // valid for the kind of archive.  If it is not valid it returns an Error.
99 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
100   char EndCond;
101   auto Kind = Parent->kind();
102   if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
103     if (ArMemHdr->Name[0] == ' ') {
104       uint64_t Offset =
105           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
106       return malformedError("name contains a leading space for archive member "
107                             "header at offset " +
108                             Twine(Offset));
109     }
110     EndCond = ' ';
111   } else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
112     EndCond = ' ';
113   else
114     EndCond = '/';
115   StringRef::size_type end =
116       StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
117   if (end == StringRef::npos)
118     end = sizeof(ArMemHdr->Name);
119   assert(end <= sizeof(ArMemHdr->Name) && end > 0);
120   // Don't include the EndCond if there is one.
121   return StringRef(ArMemHdr->Name, end);
122 }
123 
124 // This gets the name looking up long names. Size is the size of the archive
125 // member including the header, so the size of any name following the header
126 // is checked to make sure it does not overflow.
127 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
128 
129   // This can be called from the ArchiveMemberHeader constructor when the
130   // archive header is truncated to produce an error message with the name.
131   // Make sure the name field is not truncated.
132   if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
133     uint64_t ArchiveOffset =
134         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
135     return malformedError("archive header truncated before the name field "
136                           "for archive member header at offset " +
137                           Twine(ArchiveOffset));
138   }
139 
140   // The raw name itself can be invalid.
141   Expected<StringRef> NameOrErr = getRawName();
142   if (!NameOrErr)
143     return NameOrErr.takeError();
144   StringRef Name = NameOrErr.get();
145 
146   // Check if it's a special name.
147   if (Name[0] == '/') {
148     if (Name.size() == 1) // Linker member.
149       return Name;
150     if (Name.size() == 2 && Name[1] == '/') // String table.
151       return Name;
152     // It's a long name.
153     // Get the string table offset.
154     std::size_t StringOffset;
155     if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
156       std::string Buf;
157       raw_string_ostream OS(Buf);
158       OS.write_escaped(Name.substr(1).rtrim(' '));
159       OS.flush();
160       uint64_t ArchiveOffset =
161           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
162       return malformedError("long name offset characters after the '/' are "
163                             "not all decimal numbers: '" +
164                             Buf + "' for archive member header at offset " +
165                             Twine(ArchiveOffset));
166     }
167 
168     // Verify it.
169     if (StringOffset >= Parent->getStringTable().size()) {
170       uint64_t ArchiveOffset =
171           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
172       return malformedError("long name offset " + Twine(StringOffset) +
173                             " past the end of the string table for archive "
174                             "member header at offset " +
175                             Twine(ArchiveOffset));
176     }
177 
178     // GNU long file names end with a "/\n".
179     if (Parent->kind() == Archive::K_GNU ||
180         Parent->kind() == Archive::K_GNU64) {
181       size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
182       if (End == StringRef::npos || End < 1 ||
183           Parent->getStringTable()[End - 1] != '/') {
184         return malformedError("string table at long name offset " +
185                               Twine(StringOffset) + "not terminated");
186       }
187       return Parent->getStringTable().slice(StringOffset, End - 1);
188     }
189     return Parent->getStringTable().begin() + StringOffset;
190   }
191 
192   if (Name.startswith("#1/")) {
193     uint64_t NameLength;
194     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
195       std::string Buf;
196       raw_string_ostream OS(Buf);
197       OS.write_escaped(Name.substr(3).rtrim(' '));
198       OS.flush();
199       uint64_t ArchiveOffset =
200           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
201       return malformedError("long name length characters after the #1/ are "
202                             "not all decimal numbers: '" +
203                             Buf + "' for archive member header at offset " +
204                             Twine(ArchiveOffset));
205     }
206     if (getSizeOf() + NameLength > Size) {
207       uint64_t ArchiveOffset =
208           reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
209       return malformedError("long name length: " + Twine(NameLength) +
210                             " extends past the end of the member or archive "
211                             "for archive member header at offset " +
212                             Twine(ArchiveOffset));
213     }
214     return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
215                      NameLength)
216         .rtrim('\0');
217   }
218 
219   // It is not a long name so trim the blanks at the end of the name.
220   if (Name[Name.size() - 1] != '/')
221     return Name.rtrim(' ');
222 
223   // It's a simple name.
224   return Name.drop_back(1);
225 }
226 
227 Expected<uint64_t> ArchiveMemberHeader::getSize() const {
228   uint64_t Ret;
229   if (StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size))
230           .rtrim(" ")
231           .getAsInteger(10, Ret)) {
232     std::string Buf;
233     raw_string_ostream OS(Buf);
234     OS.write_escaped(
235         StringRef(ArMemHdr->Size, sizeof(ArMemHdr->Size)).rtrim(" "));
236     OS.flush();
237     uint64_t Offset =
238         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
239     return malformedError("characters in size field in archive header are not "
240                           "all decimal numbers: '" +
241                           Buf +
242                           "' for archive "
243                           "member header at offset " +
244                           Twine(Offset));
245   }
246   return Ret;
247 }
248 
249 Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
250   unsigned Ret;
251   if (StringRef(ArMemHdr->AccessMode, sizeof(ArMemHdr->AccessMode))
252           .rtrim(' ')
253           .getAsInteger(8, Ret)) {
254     std::string Buf;
255     raw_string_ostream OS(Buf);
256     OS.write_escaped(
257         StringRef(ArMemHdr->AccessMode, sizeof(ArMemHdr->AccessMode))
258             .rtrim(" "));
259     OS.flush();
260     uint64_t Offset =
261         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
262     return malformedError("characters in AccessMode field in archive header "
263                           "are not all decimal numbers: '" +
264                           Buf + "' for the archive member header at offset " +
265                           Twine(Offset));
266   }
267   return static_cast<sys::fs::perms>(Ret);
268 }
269 
270 Expected<sys::TimePoint<std::chrono::seconds>>
271 ArchiveMemberHeader::getLastModified() const {
272   unsigned Seconds;
273   if (StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified))
274           .rtrim(' ')
275           .getAsInteger(10, Seconds)) {
276     std::string Buf;
277     raw_string_ostream OS(Buf);
278     OS.write_escaped(
279         StringRef(ArMemHdr->LastModified, sizeof(ArMemHdr->LastModified))
280             .rtrim(" "));
281     OS.flush();
282     uint64_t Offset =
283         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
284     return malformedError("characters in LastModified field in archive header "
285                           "are not all decimal numbers: '" +
286                           Buf + "' for the archive member header at offset " +
287                           Twine(Offset));
288   }
289 
290   return sys::toTimePoint(Seconds);
291 }
292 
293 Expected<unsigned> ArchiveMemberHeader::getUID() const {
294   unsigned Ret;
295   StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
296   if (User.empty())
297     return 0;
298   if (User.getAsInteger(10, Ret)) {
299     std::string Buf;
300     raw_string_ostream OS(Buf);
301     OS.write_escaped(User);
302     OS.flush();
303     uint64_t Offset =
304         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
305     return malformedError("characters in UID field in archive header "
306                           "are not all decimal numbers: '" +
307                           Buf + "' for the archive member header at offset " +
308                           Twine(Offset));
309   }
310   return Ret;
311 }
312 
313 Expected<unsigned> ArchiveMemberHeader::getGID() const {
314   unsigned Ret;
315   StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
316   if (Group.empty())
317     return 0;
318   if (Group.getAsInteger(10, Ret)) {
319     std::string Buf;
320     raw_string_ostream OS(Buf);
321     OS.write_escaped(Group);
322     OS.flush();
323     uint64_t Offset =
324         reinterpret_cast<const char *>(ArMemHdr) - Parent->getData().data();
325     return malformedError("characters in GID field in archive header "
326                           "are not all decimal numbers: '" +
327                           Buf + "' for the archive member header at offset " +
328                           Twine(Offset));
329   }
330   return Ret;
331 }
332 
333 Archive::Child::Child(const Archive *Parent, StringRef Data,
334                       uint16_t StartOfFile)
335     : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
336       Data(Data), StartOfFile(StartOfFile) {}
337 
338 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
339     : Parent(Parent),
340       Header(Parent, Start,
341              Parent
342                  ? Parent->getData().size() - (Start - Parent->getData().data())
343                  : 0,
344              Err) {
345   if (!Start)
346     return;
347 
348   // If we are pointed to real data, Start is not a nullptr, then there must be
349   // a non-null Err pointer available to report malformed data on.  Only in
350   // the case sentinel value is being constructed is Err is permitted to be a
351   // nullptr.
352   assert(Err && "Err can't be nullptr if Start is not a nullptr");
353 
354   ErrorAsOutParameter ErrAsOutParam(Err);
355 
356   // If there was an error in the construction of the Header
357   // then just return with the error now set.
358   if (*Err)
359     return;
360 
361   uint64_t Size = Header.getSizeOf();
362   Data = StringRef(Start, Size);
363   Expected<bool> isThinOrErr = isThinMember();
364   if (!isThinOrErr) {
365     *Err = isThinOrErr.takeError();
366     return;
367   }
368   bool isThin = isThinOrErr.get();
369   if (!isThin) {
370     Expected<uint64_t> MemberSize = getRawSize();
371     if (!MemberSize) {
372       *Err = MemberSize.takeError();
373       return;
374     }
375     Size += MemberSize.get();
376     Data = StringRef(Start, Size);
377   }
378 
379   // Setup StartOfFile and PaddingBytes.
380   StartOfFile = Header.getSizeOf();
381   // Don't include attached name.
382   Expected<StringRef> NameOrErr = getRawName();
383   if (!NameOrErr) {
384     *Err = NameOrErr.takeError();
385     return;
386   }
387   StringRef Name = NameOrErr.get();
388   if (Name.startswith("#1/")) {
389     uint64_t NameSize;
390     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
391       std::string Buf;
392       raw_string_ostream OS(Buf);
393       OS.write_escaped(Name.substr(3).rtrim(' '));
394       OS.flush();
395       uint64_t Offset = Start - Parent->getData().data();
396       *Err = malformedError("long name length characters after the #1/ are "
397                             "not all decimal numbers: '" +
398                             Buf + "' for archive member header at offset " +
399                             Twine(Offset));
400       return;
401     }
402     StartOfFile += NameSize;
403   }
404 }
405 
406 Expected<uint64_t> Archive::Child::getSize() const {
407   if (Parent->IsThin)
408     return Header.getSize();
409   return Data.size() - StartOfFile;
410 }
411 
412 Expected<uint64_t> Archive::Child::getRawSize() const {
413   return Header.getSize();
414 }
415 
416 Expected<bool> Archive::Child::isThinMember() const {
417   Expected<StringRef> NameOrErr = Header.getRawName();
418   if (!NameOrErr)
419     return NameOrErr.takeError();
420   StringRef Name = NameOrErr.get();
421   return Parent->IsThin && Name != "/" && Name != "//";
422 }
423 
424 Expected<std::string> Archive::Child::getFullName() const {
425   Expected<bool> isThin = isThinMember();
426   if (!isThin)
427     return isThin.takeError();
428   assert(isThin.get());
429   Expected<StringRef> NameOrErr = getName();
430   if (!NameOrErr)
431     return NameOrErr.takeError();
432   StringRef Name = *NameOrErr;
433   if (sys::path::is_absolute(Name))
434     return std::string(Name);
435 
436   SmallString<128> FullName = sys::path::parent_path(
437       Parent->getMemoryBufferRef().getBufferIdentifier());
438   sys::path::append(FullName, Name);
439   return std::string(FullName.str());
440 }
441 
442 Expected<StringRef> Archive::Child::getBuffer() const {
443   Expected<bool> isThinOrErr = isThinMember();
444   if (!isThinOrErr)
445     return isThinOrErr.takeError();
446   bool isThin = isThinOrErr.get();
447   if (!isThin) {
448     Expected<uint64_t> Size = getSize();
449     if (!Size)
450       return Size.takeError();
451     return StringRef(Data.data() + StartOfFile, Size.get());
452   }
453   Expected<std::string> FullNameOrErr = getFullName();
454   if (!FullNameOrErr)
455     return FullNameOrErr.takeError();
456   const std::string &FullName = *FullNameOrErr;
457   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
458   if (std::error_code EC = Buf.getError())
459     return errorCodeToError(EC);
460   Parent->ThinBuffers.push_back(std::move(*Buf));
461   return Parent->ThinBuffers.back()->getBuffer();
462 }
463 
464 Expected<Archive::Child> Archive::Child::getNext() const {
465   size_t SpaceToSkip = Data.size();
466   // If it's odd, add 1 to make it even.
467   if (SpaceToSkip & 1)
468     ++SpaceToSkip;
469 
470   const char *NextLoc = Data.data() + SpaceToSkip;
471 
472   // Check to see if this is at the end of the archive.
473   if (NextLoc == Parent->Data.getBufferEnd())
474     return Child(nullptr, nullptr, nullptr);
475 
476   // Check to see if this is past the end of the archive.
477   if (NextLoc > Parent->Data.getBufferEnd()) {
478     std::string Msg("offset to next archive member past the end of the archive "
479                     "after member ");
480     Expected<StringRef> NameOrErr = getName();
481     if (!NameOrErr) {
482       consumeError(NameOrErr.takeError());
483       uint64_t Offset = Data.data() - Parent->getData().data();
484       return malformedError(Msg + "at offset " + Twine(Offset));
485     } else
486       return malformedError(Msg + NameOrErr.get());
487   }
488 
489   Error Err = Error::success();
490   Child Ret(Parent, NextLoc, &Err);
491   if (Err)
492     return std::move(Err);
493   return Ret;
494 }
495 
496 uint64_t Archive::Child::getChildOffset() const {
497   const char *a = Parent->Data.getBuffer().data();
498   const char *c = Data.data();
499   uint64_t offset = c - a;
500   return offset;
501 }
502 
503 Expected<StringRef> Archive::Child::getName() const {
504   Expected<uint64_t> RawSizeOrErr = getRawSize();
505   if (!RawSizeOrErr)
506     return RawSizeOrErr.takeError();
507   uint64_t RawSize = RawSizeOrErr.get();
508   Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
509   if (!NameOrErr)
510     return NameOrErr.takeError();
511   StringRef Name = NameOrErr.get();
512   return Name;
513 }
514 
515 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
516   Expected<StringRef> NameOrErr = getName();
517   if (!NameOrErr)
518     return NameOrErr.takeError();
519   StringRef Name = NameOrErr.get();
520   Expected<StringRef> Buf = getBuffer();
521   if (!Buf)
522     return createFileError(Name, Buf.takeError());
523   return MemoryBufferRef(*Buf, Name);
524 }
525 
526 Expected<std::unique_ptr<Binary>>
527 Archive::Child::getAsBinary(LLVMContext *Context) const {
528   Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
529   if (!BuffOrErr)
530     return BuffOrErr.takeError();
531 
532   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
533   if (BinaryOrErr)
534     return std::move(*BinaryOrErr);
535   return BinaryOrErr.takeError();
536 }
537 
538 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
539   Error Err = Error::success();
540   std::unique_ptr<Archive> Ret(new Archive(Source, Err));
541   if (Err)
542     return std::move(Err);
543   return std::move(Ret);
544 }
545 
546 void Archive::setFirstRegular(const Child &C) {
547   FirstRegularData = C.Data;
548   FirstRegularStartOfFile = C.StartOfFile;
549 }
550 
551 Archive::Archive(MemoryBufferRef Source, Error &Err)
552     : Binary(Binary::ID_Archive, Source) {
553   ErrorAsOutParameter ErrAsOutParam(&Err);
554   StringRef Buffer = Data.getBuffer();
555   // Check for sufficient magic.
556   if (Buffer.startswith(ThinMagic)) {
557     IsThin = true;
558   } else if (Buffer.startswith(Magic)) {
559     IsThin = false;
560   } else {
561     Err = make_error<GenericBinaryError>("file too small to be an archive",
562                                          object_error::invalid_file_type);
563     return;
564   }
565 
566   // Make sure Format is initialized before any call to
567   // ArchiveMemberHeader::getName() is made.  This could be a valid empty
568   // archive which is the same in all formats.  So claiming it to be gnu to is
569   // fine if not totally correct before we look for a string table or table of
570   // contents.
571   Format = K_GNU;
572 
573   // Get the special members.
574   child_iterator I = child_begin(Err, false);
575   if (Err)
576     return;
577   child_iterator E = child_end();
578 
579   // See if this is a valid empty archive and if so return.
580   if (I == E) {
581     Err = Error::success();
582     return;
583   }
584   const Child *C = &*I;
585 
586   auto Increment = [&]() {
587     ++I;
588     if (Err)
589       return true;
590     C = &*I;
591     return false;
592   };
593 
594   Expected<StringRef> NameOrErr = C->getRawName();
595   if (!NameOrErr) {
596     Err = NameOrErr.takeError();
597     return;
598   }
599   StringRef Name = NameOrErr.get();
600 
601   // Below is the pattern that is used to figure out the archive format
602   // GNU archive format
603   //  First member : / (may exist, if it exists, points to the symbol table )
604   //  Second member : // (may exist, if it exists, points to the string table)
605   //  Note : The string table is used if the filename exceeds 15 characters
606   // BSD archive format
607   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
608   //  There is no string table, if the filename exceeds 15 characters or has a
609   //  embedded space, the filename has #1/<size>, The size represents the size
610   //  of the filename that needs to be read after the archive header
611   // COFF archive format
612   //  First member : /
613   //  Second member : / (provides a directory of symbols)
614   //  Third member : // (may exist, if it exists, contains the string table)
615   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
616   //  even if the string table is empty. However, lib.exe does not in fact
617   //  seem to create the third member if there's no member whose filename
618   //  exceeds 15 characters. So the third member is optional.
619 
620   if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
621     if (Name == "__.SYMDEF")
622       Format = K_BSD;
623     else // Name == "__.SYMDEF_64"
624       Format = K_DARWIN64;
625     // We know that the symbol table is not an external file, but we still must
626     // check any Expected<> return value.
627     Expected<StringRef> BufOrErr = C->getBuffer();
628     if (!BufOrErr) {
629       Err = BufOrErr.takeError();
630       return;
631     }
632     SymbolTable = BufOrErr.get();
633     if (Increment())
634       return;
635     setFirstRegular(*C);
636 
637     Err = Error::success();
638     return;
639   }
640 
641   if (Name.startswith("#1/")) {
642     Format = K_BSD;
643     // We know this is BSD, so getName will work since there is no string table.
644     Expected<StringRef> NameOrErr = C->getName();
645     if (!NameOrErr) {
646       Err = NameOrErr.takeError();
647       return;
648     }
649     Name = NameOrErr.get();
650     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
651       // We know that the symbol table is not an external file, but we still
652       // must check any Expected<> return value.
653       Expected<StringRef> BufOrErr = C->getBuffer();
654       if (!BufOrErr) {
655         Err = BufOrErr.takeError();
656         return;
657       }
658       SymbolTable = BufOrErr.get();
659       if (Increment())
660         return;
661     } else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
662       Format = K_DARWIN64;
663       // We know that the symbol table is not an external file, but we still
664       // must check any Expected<> return value.
665       Expected<StringRef> BufOrErr = C->getBuffer();
666       if (!BufOrErr) {
667         Err = BufOrErr.takeError();
668         return;
669       }
670       SymbolTable = BufOrErr.get();
671       if (Increment())
672         return;
673     }
674     setFirstRegular(*C);
675     return;
676   }
677 
678   // MIPS 64-bit ELF archives use a special format of a symbol table.
679   // This format is marked by `ar_name` field equals to "/SYM64/".
680   // For detailed description see page 96 in the following document:
681   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
682 
683   bool has64SymTable = false;
684   if (Name == "/" || Name == "/SYM64/") {
685     // We know that the symbol table is not an external file, but we still
686     // must check any Expected<> return value.
687     Expected<StringRef> BufOrErr = C->getBuffer();
688     if (!BufOrErr) {
689       Err = BufOrErr.takeError();
690       return;
691     }
692     SymbolTable = BufOrErr.get();
693     if (Name == "/SYM64/")
694       has64SymTable = true;
695 
696     if (Increment())
697       return;
698     if (I == E) {
699       Err = Error::success();
700       return;
701     }
702     Expected<StringRef> NameOrErr = C->getRawName();
703     if (!NameOrErr) {
704       Err = NameOrErr.takeError();
705       return;
706     }
707     Name = NameOrErr.get();
708   }
709 
710   if (Name == "//") {
711     Format = has64SymTable ? K_GNU64 : K_GNU;
712     // The string table is never an external member, but we still
713     // must check any Expected<> return value.
714     Expected<StringRef> BufOrErr = C->getBuffer();
715     if (!BufOrErr) {
716       Err = BufOrErr.takeError();
717       return;
718     }
719     StringTable = BufOrErr.get();
720     if (Increment())
721       return;
722     setFirstRegular(*C);
723     Err = Error::success();
724     return;
725   }
726 
727   if (Name[0] != '/') {
728     Format = has64SymTable ? K_GNU64 : K_GNU;
729     setFirstRegular(*C);
730     Err = Error::success();
731     return;
732   }
733 
734   if (Name != "/") {
735     Err = errorCodeToError(object_error::parse_failed);
736     return;
737   }
738 
739   Format = K_COFF;
740   // We know that the symbol table is not an external file, but we still
741   // must check any Expected<> return value.
742   Expected<StringRef> BufOrErr = C->getBuffer();
743   if (!BufOrErr) {
744     Err = BufOrErr.takeError();
745     return;
746   }
747   SymbolTable = BufOrErr.get();
748 
749   if (Increment())
750     return;
751 
752   if (I == E) {
753     setFirstRegular(*C);
754     Err = Error::success();
755     return;
756   }
757 
758   NameOrErr = C->getRawName();
759   if (!NameOrErr) {
760     Err = NameOrErr.takeError();
761     return;
762   }
763   Name = NameOrErr.get();
764 
765   if (Name == "//") {
766     // The string table is never an external member, but we still
767     // must check any Expected<> return value.
768     Expected<StringRef> BufOrErr = C->getBuffer();
769     if (!BufOrErr) {
770       Err = BufOrErr.takeError();
771       return;
772     }
773     StringTable = BufOrErr.get();
774     if (Increment())
775       return;
776   }
777 
778   setFirstRegular(*C);
779   Err = Error::success();
780 }
781 
782 Archive::child_iterator Archive::child_begin(Error &Err,
783                                              bool SkipInternal) const {
784   if (isEmpty())
785     return child_end();
786 
787   if (SkipInternal)
788     return child_iterator::itr(
789         Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
790 
791   const char *Loc = Data.getBufferStart() + strlen(Magic);
792   Child C(this, Loc, &Err);
793   if (Err)
794     return child_end();
795   return child_iterator::itr(C, Err);
796 }
797 
798 Archive::child_iterator Archive::child_end() const {
799   return child_iterator::end(Child(nullptr, nullptr, nullptr));
800 }
801 
802 StringRef Archive::Symbol::getName() const {
803   return Parent->getSymbolTable().begin() + StringIndex;
804 }
805 
806 Expected<Archive::Child> Archive::Symbol::getMember() const {
807   const char *Buf = Parent->getSymbolTable().begin();
808   const char *Offsets = Buf;
809   if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
810     Offsets += sizeof(uint64_t);
811   else
812     Offsets += sizeof(uint32_t);
813   uint64_t Offset = 0;
814   if (Parent->kind() == K_GNU) {
815     Offset = read32be(Offsets + SymbolIndex * 4);
816   } else if (Parent->kind() == K_GNU64) {
817     Offset = read64be(Offsets + SymbolIndex * 8);
818   } else if (Parent->kind() == K_BSD) {
819     // The SymbolIndex is an index into the ranlib structs that start at
820     // Offsets (the first uint32_t is the number of bytes of the ranlib
821     // structs).  The ranlib structs are a pair of uint32_t's the first
822     // being a string table offset and the second being the offset into
823     // the archive of the member that defines the symbol.  Which is what
824     // is needed here.
825     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
826   } else if (Parent->kind() == K_DARWIN64) {
827     // The SymbolIndex is an index into the ranlib_64 structs that start at
828     // Offsets (the first uint64_t is the number of bytes of the ranlib_64
829     // structs).  The ranlib_64 structs are a pair of uint64_t's the first
830     // being a string table offset and the second being the offset into
831     // the archive of the member that defines the symbol.  Which is what
832     // is needed here.
833     Offset = read64le(Offsets + SymbolIndex * 16 + 8);
834   } else {
835     // Skip offsets.
836     uint32_t MemberCount = read32le(Buf);
837     Buf += MemberCount * 4 + 4;
838 
839     uint32_t SymbolCount = read32le(Buf);
840     if (SymbolIndex >= SymbolCount)
841       return errorCodeToError(object_error::parse_failed);
842 
843     // Skip SymbolCount to get to the indices table.
844     const char *Indices = Buf + 4;
845 
846     // Get the index of the offset in the file member offset table for this
847     // symbol.
848     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
849     // Subtract 1 since OffsetIndex is 1 based.
850     --OffsetIndex;
851 
852     if (OffsetIndex >= MemberCount)
853       return errorCodeToError(object_error::parse_failed);
854 
855     Offset = read32le(Offsets + OffsetIndex * 4);
856   }
857 
858   const char *Loc = Parent->getData().begin() + Offset;
859   Error Err = Error::success();
860   Child C(Parent, Loc, &Err);
861   if (Err)
862     return std::move(Err);
863   return C;
864 }
865 
866 Archive::Symbol Archive::Symbol::getNext() const {
867   Symbol t(*this);
868   if (Parent->kind() == K_BSD) {
869     // t.StringIndex is an offset from the start of the __.SYMDEF or
870     // "__.SYMDEF SORTED" member into the string table for the ranlib
871     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
872     // offset in the string table for t.SymbolIndex+1 we subtract the
873     // its offset from the start of the string table for t.SymbolIndex
874     // and add the offset of the string table for t.SymbolIndex+1.
875 
876     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
877     // which is the number of bytes of ranlib structs that follow.  The ranlib
878     // structs are a pair of uint32_t's the first being a string table offset
879     // and the second being the offset into the archive of the member that
880     // define the symbol. After that the next uint32_t is the byte count of
881     // the string table followed by the string table.
882     const char *Buf = Parent->getSymbolTable().begin();
883     uint32_t RanlibCount = 0;
884     RanlibCount = read32le(Buf) / 8;
885     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
886     // don't change the t.StringIndex as we don't want to reference a ranlib
887     // past RanlibCount.
888     if (t.SymbolIndex + 1 < RanlibCount) {
889       const char *Ranlibs = Buf + 4;
890       uint32_t CurRanStrx = 0;
891       uint32_t NextRanStrx = 0;
892       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
893       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
894       t.StringIndex -= CurRanStrx;
895       t.StringIndex += NextRanStrx;
896     }
897   } else {
898     // Go to one past next null.
899     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
900   }
901   ++t.SymbolIndex;
902   return t;
903 }
904 
905 Archive::symbol_iterator Archive::symbol_begin() const {
906   if (!hasSymbolTable())
907     return symbol_iterator(Symbol(this, 0, 0));
908 
909   const char *buf = getSymbolTable().begin();
910   if (kind() == K_GNU) {
911     uint32_t symbol_count = 0;
912     symbol_count = read32be(buf);
913     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
914   } else if (kind() == K_GNU64) {
915     uint64_t symbol_count = read64be(buf);
916     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
917   } else if (kind() == K_BSD) {
918     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
919     // which is the number of bytes of ranlib structs that follow.  The ranlib
920     // structs are a pair of uint32_t's the first being a string table offset
921     // and the second being the offset into the archive of the member that
922     // define the symbol. After that the next uint32_t is the byte count of
923     // the string table followed by the string table.
924     uint32_t ranlib_count = 0;
925     ranlib_count = read32le(buf) / 8;
926     const char *ranlibs = buf + 4;
927     uint32_t ran_strx = 0;
928     ran_strx = read32le(ranlibs);
929     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
930     // Skip the byte count of the string table.
931     buf += sizeof(uint32_t);
932     buf += ran_strx;
933   } else if (kind() == K_DARWIN64) {
934     // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
935     // which is the number of bytes of ranlib_64 structs that follow.  The
936     // ranlib_64 structs are a pair of uint64_t's the first being a string
937     // table offset and the second being the offset into the archive of the
938     // member that define the symbol. After that the next uint64_t is the byte
939     // count of the string table followed by the string table.
940     uint64_t ranlib_count = 0;
941     ranlib_count = read64le(buf) / 16;
942     const char *ranlibs = buf + 8;
943     uint64_t ran_strx = 0;
944     ran_strx = read64le(ranlibs);
945     buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
946     // Skip the byte count of the string table.
947     buf += sizeof(uint64_t);
948     buf += ran_strx;
949   } else {
950     uint32_t member_count = 0;
951     uint32_t symbol_count = 0;
952     member_count = read32le(buf);
953     buf += 4 + (member_count * 4); // Skip offsets.
954     symbol_count = read32le(buf);
955     buf += 4 + (symbol_count * 2); // Skip indices.
956   }
957   uint32_t string_start_offset = buf - getSymbolTable().begin();
958   return symbol_iterator(Symbol(this, 0, string_start_offset));
959 }
960 
961 Archive::symbol_iterator Archive::symbol_end() const {
962   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
963 }
964 
965 uint32_t Archive::getNumberOfSymbols() const {
966   if (!hasSymbolTable())
967     return 0;
968   const char *buf = getSymbolTable().begin();
969   if (kind() == K_GNU)
970     return read32be(buf);
971   if (kind() == K_GNU64)
972     return read64be(buf);
973   if (kind() == K_BSD)
974     return read32le(buf) / 8;
975   if (kind() == K_DARWIN64)
976     return read64le(buf) / 16;
977   uint32_t member_count = 0;
978   member_count = read32le(buf);
979   buf += 4 + (member_count * 4); // Skip offsets.
980   return read32le(buf);
981 }
982 
983 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
984   Archive::symbol_iterator bs = symbol_begin();
985   Archive::symbol_iterator es = symbol_end();
986 
987   for (; bs != es; ++bs) {
988     StringRef SymName = bs->getName();
989     if (SymName == name) {
990       if (auto MemberOrErr = bs->getMember())
991         return Child(*MemberOrErr);
992       else
993         return MemberOrErr.takeError();
994     }
995   }
996   return Optional<Child>();
997 }
998 
999 // Returns true if archive file contains no member file.
1000 bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
1001 
1002 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
1003