xref: /freebsd/contrib/llvm-project/llvm/lib/Object/Archive.cpp (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 //===- Archive.cpp - ar File Format implementation ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the ArchiveObjectFile class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Object/Archive.h"
14 #include "llvm/ADT/Optional.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/StringRef.h"
17 #include "llvm/ADT/Twine.h"
18 #include "llvm/Object/Binary.h"
19 #include "llvm/Object/Error.h"
20 #include "llvm/Support/Chrono.h"
21 #include "llvm/Support/Endian.h"
22 #include "llvm/Support/Error.h"
23 #include "llvm/Support/ErrorOr.h"
24 #include "llvm/Support/FileSystem.h"
25 #include "llvm/Support/MemoryBuffer.h"
26 #include "llvm/Support/Path.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <algorithm>
29 #include <cassert>
30 #include <cstddef>
31 #include <cstdint>
32 #include <cstring>
33 #include <memory>
34 #include <string>
35 #include <system_error>
36 
37 using namespace llvm;
38 using namespace object;
39 using namespace llvm::support::endian;
40 
41 static const char *const Magic = "!<arch>\n";
42 static const char *const ThinMagic = "!<thin>\n";
43 
44 void Archive::anchor() {}
45 
46 static Error
47 malformedError(Twine Msg) {
48   std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
49   return make_error<GenericBinaryError>(std::move(StringMsg),
50                                         object_error::parse_failed);
51 }
52 
53 ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
54                                          const char *RawHeaderPtr,
55                                          uint64_t Size, Error *Err)
56     : Parent(Parent),
57       ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
58   if (RawHeaderPtr == nullptr)
59     return;
60   ErrorAsOutParameter ErrAsOutParam(Err);
61 
62   if (Size < sizeof(ArMemHdrType)) {
63     if (Err) {
64       std::string Msg("remaining size of archive too small for next archive "
65                       "member header ");
66       Expected<StringRef> NameOrErr = getName(Size);
67       if (!NameOrErr) {
68         consumeError(NameOrErr.takeError());
69         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
70         *Err = malformedError(Msg + "at offset " + Twine(Offset));
71       } else
72         *Err = malformedError(Msg + "for " + NameOrErr.get());
73     }
74     return;
75   }
76   if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
77     if (Err) {
78       std::string Buf;
79       raw_string_ostream OS(Buf);
80       OS.write_escaped(StringRef(ArMemHdr->Terminator,
81                                  sizeof(ArMemHdr->Terminator)));
82       OS.flush();
83       std::string Msg("terminator characters in archive member \"" + Buf +
84                       "\" not the correct \"`\\n\" values for the archive "
85                       "member header ");
86       Expected<StringRef> NameOrErr = getName(Size);
87       if (!NameOrErr) {
88         consumeError(NameOrErr.takeError());
89         uint64_t Offset = RawHeaderPtr - Parent->getData().data();
90         *Err = malformedError(Msg + "at offset " + Twine(Offset));
91       } else
92         *Err = malformedError(Msg + "for " + NameOrErr.get());
93     }
94     return;
95   }
96 }
97 
98 // This gets the raw name from the ArMemHdr->Name field and checks that it is
99 // valid for the kind of archive.  If it is not valid it returns an Error.
100 Expected<StringRef> ArchiveMemberHeader::getRawName() const {
101   char EndCond;
102   auto Kind = Parent->kind();
103   if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
104     if (ArMemHdr->Name[0] == ' ') {
105       uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
106                         Parent->getData().data();
107       return malformedError("name contains a leading space for archive member "
108                             "header at offset " + Twine(Offset));
109     }
110     EndCond = ' ';
111   }
112   else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
113     EndCond = ' ';
114   else
115     EndCond = '/';
116   StringRef::size_type end =
117       StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
118   if (end == StringRef::npos)
119     end = sizeof(ArMemHdr->Name);
120   assert(end <= sizeof(ArMemHdr->Name) && end > 0);
121   // Don't include the EndCond if there is one.
122   return StringRef(ArMemHdr->Name, end);
123 }
124 
125 // This gets the name looking up long names. Size is the size of the archive
126 // member including the header, so the size of any name following the header
127 // is checked to make sure it does not overflow.
128 Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
129 
130   // This can be called from the ArchiveMemberHeader constructor when the
131   // archive header is truncated to produce an error message with the name.
132   // Make sure the name field is not truncated.
133   if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
134     uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
135                       Parent->getData().data();
136     return malformedError("archive header truncated before the name field "
137                           "for archive member header at offset " +
138                           Twine(ArchiveOffset));
139   }
140 
141   // The raw name itself can be invalid.
142   Expected<StringRef> NameOrErr = getRawName();
143   if (!NameOrErr)
144     return NameOrErr.takeError();
145   StringRef Name = NameOrErr.get();
146 
147   // Check if it's a special name.
148   if (Name[0] == '/') {
149     if (Name.size() == 1) // Linker member.
150       return Name;
151     if (Name.size() == 2 && Name[1] == '/') // String table.
152       return Name;
153     // It's a long name.
154     // Get the string table offset.
155     std::size_t StringOffset;
156     if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
157       std::string Buf;
158       raw_string_ostream OS(Buf);
159       OS.write_escaped(Name.substr(1).rtrim(' '));
160       OS.flush();
161       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
162                                Parent->getData().data();
163       return malformedError("long name offset characters after the '/' are "
164                             "not all decimal numbers: '" + Buf + "' for "
165                             "archive member header at offset " +
166                             Twine(ArchiveOffset));
167     }
168 
169     // Verify it.
170     if (StringOffset >= Parent->getStringTable().size()) {
171       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
172                                Parent->getData().data();
173       return malformedError("long name offset " + Twine(StringOffset) + " past "
174                             "the end of the string table for archive member "
175                             "header at offset " + Twine(ArchiveOffset));
176     }
177 
178     // GNU long file names end with a "/\n".
179     if (Parent->kind() == Archive::K_GNU ||
180         Parent->kind() == Archive::K_GNU64) {
181       size_t End = Parent->getStringTable().find('\n', /*From=*/StringOffset);
182       if (End == StringRef::npos || End < 1 ||
183           Parent->getStringTable()[End - 1] != '/') {
184         return malformedError("string table at long name offset " +
185                               Twine(StringOffset) + "not terminated");
186       }
187       return Parent->getStringTable().slice(StringOffset, End - 1);
188     }
189     return Parent->getStringTable().begin() + StringOffset;
190   }
191 
192   if (Name.startswith("#1/")) {
193     uint64_t NameLength;
194     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
195       std::string Buf;
196       raw_string_ostream OS(Buf);
197       OS.write_escaped(Name.substr(3).rtrim(' '));
198       OS.flush();
199       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
200                         Parent->getData().data();
201       return malformedError("long name length characters after the #1/ are "
202                             "not all decimal numbers: '" + Buf + "' for "
203                             "archive member header at offset " +
204                             Twine(ArchiveOffset));
205     }
206     if (getSizeOf() + NameLength > Size) {
207       uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
208                         Parent->getData().data();
209       return malformedError("long name length: " + Twine(NameLength) +
210                             " extends past the end of the member or archive "
211                             "for archive member header at offset " +
212                             Twine(ArchiveOffset));
213     }
214     return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
215                      NameLength).rtrim('\0');
216   }
217 
218   // It is not a long name so trim the blanks at the end of the name.
219   if (Name[Name.size() - 1] != '/')
220     return Name.rtrim(' ');
221 
222   // It's a simple name.
223   return Name.drop_back(1);
224 }
225 
226 Expected<uint64_t> ArchiveMemberHeader::getSize() const {
227   uint64_t Ret;
228   if (StringRef(ArMemHdr->Size,
229                 sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
230     std::string Buf;
231     raw_string_ostream OS(Buf);
232     OS.write_escaped(StringRef(ArMemHdr->Size,
233                                sizeof(ArMemHdr->Size)).rtrim(" "));
234     OS.flush();
235     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
236                       Parent->getData().data();
237     return malformedError("characters in size field in archive header are not "
238                           "all decimal numbers: '" + Buf + "' for archive "
239                           "member header at offset " + Twine(Offset));
240   }
241   return Ret;
242 }
243 
244 Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
245   unsigned Ret;
246   if (StringRef(ArMemHdr->AccessMode,
247                 sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) {
248     std::string Buf;
249     raw_string_ostream OS(Buf);
250     OS.write_escaped(StringRef(ArMemHdr->AccessMode,
251                                sizeof(ArMemHdr->AccessMode)).rtrim(" "));
252     OS.flush();
253     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
254                       Parent->getData().data();
255     return malformedError("characters in AccessMode field in archive header "
256                           "are not all decimal numbers: '" + Buf + "' for the "
257                           "archive member header at offset " + Twine(Offset));
258   }
259   return static_cast<sys::fs::perms>(Ret);
260 }
261 
262 Expected<sys::TimePoint<std::chrono::seconds>>
263 ArchiveMemberHeader::getLastModified() const {
264   unsigned Seconds;
265   if (StringRef(ArMemHdr->LastModified,
266                 sizeof(ArMemHdr->LastModified)).rtrim(' ')
267           .getAsInteger(10, Seconds)) {
268     std::string Buf;
269     raw_string_ostream OS(Buf);
270     OS.write_escaped(StringRef(ArMemHdr->LastModified,
271                                sizeof(ArMemHdr->LastModified)).rtrim(" "));
272     OS.flush();
273     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
274                       Parent->getData().data();
275     return malformedError("characters in LastModified field in archive header "
276                           "are not all decimal numbers: '" + Buf + "' for the "
277                           "archive member header at offset " + Twine(Offset));
278   }
279 
280   return sys::toTimePoint(Seconds);
281 }
282 
283 Expected<unsigned> ArchiveMemberHeader::getUID() const {
284   unsigned Ret;
285   StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
286   if (User.empty())
287     return 0;
288   if (User.getAsInteger(10, Ret)) {
289     std::string Buf;
290     raw_string_ostream OS(Buf);
291     OS.write_escaped(User);
292     OS.flush();
293     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
294                       Parent->getData().data();
295     return malformedError("characters in UID field in archive header "
296                           "are not all decimal numbers: '" + Buf + "' for the "
297                           "archive member header at offset " + Twine(Offset));
298   }
299   return Ret;
300 }
301 
302 Expected<unsigned> ArchiveMemberHeader::getGID() const {
303   unsigned Ret;
304   StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
305   if (Group.empty())
306     return 0;
307   if (Group.getAsInteger(10, Ret)) {
308     std::string Buf;
309     raw_string_ostream OS(Buf);
310     OS.write_escaped(Group);
311     OS.flush();
312     uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
313                       Parent->getData().data();
314     return malformedError("characters in GID field in archive header "
315                           "are not all decimal numbers: '" + Buf + "' for the "
316                           "archive member header at offset " + Twine(Offset));
317   }
318   return Ret;
319 }
320 
321 Archive::Child::Child(const Archive *Parent, StringRef Data,
322                       uint16_t StartOfFile)
323     : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
324       Data(Data), StartOfFile(StartOfFile) {
325 }
326 
327 Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
328     : Parent(Parent),
329       Header(Parent, Start,
330              Parent
331                ? Parent->getData().size() - (Start - Parent->getData().data())
332                : 0, Err) {
333   if (!Start)
334     return;
335 
336   // If we are pointed to real data, Start is not a nullptr, then there must be
337   // a non-null Err pointer available to report malformed data on.  Only in
338   // the case sentinel value is being constructed is Err is permitted to be a
339   // nullptr.
340   assert(Err && "Err can't be nullptr if Start is not a nullptr");
341 
342   ErrorAsOutParameter ErrAsOutParam(Err);
343 
344   // If there was an error in the construction of the Header
345   // then just return with the error now set.
346   if (*Err)
347     return;
348 
349   uint64_t Size = Header.getSizeOf();
350   Data = StringRef(Start, Size);
351   Expected<bool> isThinOrErr = isThinMember();
352   if (!isThinOrErr) {
353     *Err = isThinOrErr.takeError();
354     return;
355   }
356   bool isThin = isThinOrErr.get();
357   if (!isThin) {
358     Expected<uint64_t> MemberSize = getRawSize();
359     if (!MemberSize) {
360       *Err = MemberSize.takeError();
361       return;
362     }
363     Size += MemberSize.get();
364     Data = StringRef(Start, Size);
365   }
366 
367   // Setup StartOfFile and PaddingBytes.
368   StartOfFile = Header.getSizeOf();
369   // Don't include attached name.
370   Expected<StringRef> NameOrErr = getRawName();
371   if (!NameOrErr){
372     *Err = NameOrErr.takeError();
373     return;
374   }
375   StringRef Name = NameOrErr.get();
376   if (Name.startswith("#1/")) {
377     uint64_t NameSize;
378     if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
379       std::string Buf;
380       raw_string_ostream OS(Buf);
381       OS.write_escaped(Name.substr(3).rtrim(' '));
382       OS.flush();
383       uint64_t Offset = Start - Parent->getData().data();
384       *Err = malformedError("long name length characters after the #1/ are "
385                             "not all decimal numbers: '" + Buf + "' for "
386                             "archive member header at offset " +
387                             Twine(Offset));
388       return;
389     }
390     StartOfFile += NameSize;
391   }
392 }
393 
394 Expected<uint64_t> Archive::Child::getSize() const {
395   if (Parent->IsThin)
396     return Header.getSize();
397   return Data.size() - StartOfFile;
398 }
399 
400 Expected<uint64_t> Archive::Child::getRawSize() const {
401   return Header.getSize();
402 }
403 
404 Expected<bool> Archive::Child::isThinMember() const {
405   Expected<StringRef> NameOrErr = Header.getRawName();
406   if (!NameOrErr)
407     return NameOrErr.takeError();
408   StringRef Name = NameOrErr.get();
409   return Parent->IsThin && Name != "/" && Name != "//";
410 }
411 
412 Expected<std::string> Archive::Child::getFullName() const {
413   Expected<bool> isThin = isThinMember();
414   if (!isThin)
415     return isThin.takeError();
416   assert(isThin.get());
417   Expected<StringRef> NameOrErr = getName();
418   if (!NameOrErr)
419     return NameOrErr.takeError();
420   StringRef Name = *NameOrErr;
421   if (sys::path::is_absolute(Name))
422     return std::string(Name);
423 
424   SmallString<128> FullName = sys::path::parent_path(
425       Parent->getMemoryBufferRef().getBufferIdentifier());
426   sys::path::append(FullName, Name);
427   return std::string(FullName.str());
428 }
429 
430 Expected<StringRef> Archive::Child::getBuffer() const {
431   Expected<bool> isThinOrErr = isThinMember();
432   if (!isThinOrErr)
433     return isThinOrErr.takeError();
434   bool isThin = isThinOrErr.get();
435   if (!isThin) {
436     Expected<uint64_t> Size = getSize();
437     if (!Size)
438       return Size.takeError();
439     return StringRef(Data.data() + StartOfFile, Size.get());
440   }
441   Expected<std::string> FullNameOrErr = getFullName();
442   if (!FullNameOrErr)
443     return FullNameOrErr.takeError();
444   const std::string &FullName = *FullNameOrErr;
445   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
446   if (std::error_code EC = Buf.getError())
447     return errorCodeToError(EC);
448   Parent->ThinBuffers.push_back(std::move(*Buf));
449   return Parent->ThinBuffers.back()->getBuffer();
450 }
451 
452 Expected<Archive::Child> Archive::Child::getNext() const {
453   size_t SpaceToSkip = Data.size();
454   // If it's odd, add 1 to make it even.
455   if (SpaceToSkip & 1)
456     ++SpaceToSkip;
457 
458   const char *NextLoc = Data.data() + SpaceToSkip;
459 
460   // Check to see if this is at the end of the archive.
461   if (NextLoc == Parent->Data.getBufferEnd())
462     return Child(nullptr, nullptr, nullptr);
463 
464   // Check to see if this is past the end of the archive.
465   if (NextLoc > Parent->Data.getBufferEnd()) {
466     std::string Msg("offset to next archive member past the end of the archive "
467                     "after member ");
468     Expected<StringRef> NameOrErr = getName();
469     if (!NameOrErr) {
470       consumeError(NameOrErr.takeError());
471       uint64_t Offset = Data.data() - Parent->getData().data();
472       return malformedError(Msg + "at offset " + Twine(Offset));
473     } else
474       return malformedError(Msg + NameOrErr.get());
475   }
476 
477   Error Err = Error::success();
478   Child Ret(Parent, NextLoc, &Err);
479   if (Err)
480     return std::move(Err);
481   return Ret;
482 }
483 
484 uint64_t Archive::Child::getChildOffset() const {
485   const char *a = Parent->Data.getBuffer().data();
486   const char *c = Data.data();
487   uint64_t offset = c - a;
488   return offset;
489 }
490 
491 Expected<StringRef> Archive::Child::getName() const {
492   Expected<uint64_t> RawSizeOrErr = getRawSize();
493   if (!RawSizeOrErr)
494     return RawSizeOrErr.takeError();
495   uint64_t RawSize = RawSizeOrErr.get();
496   Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
497   if (!NameOrErr)
498     return NameOrErr.takeError();
499   StringRef Name = NameOrErr.get();
500   return Name;
501 }
502 
503 Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
504   Expected<StringRef> NameOrErr = getName();
505   if (!NameOrErr)
506     return NameOrErr.takeError();
507   StringRef Name = NameOrErr.get();
508   Expected<StringRef> Buf = getBuffer();
509   if (!Buf)
510     return createFileError(Name, Buf.takeError());
511   return MemoryBufferRef(*Buf, Name);
512 }
513 
514 Expected<std::unique_ptr<Binary>>
515 Archive::Child::getAsBinary(LLVMContext *Context) const {
516   Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
517   if (!BuffOrErr)
518     return BuffOrErr.takeError();
519 
520   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
521   if (BinaryOrErr)
522     return std::move(*BinaryOrErr);
523   return BinaryOrErr.takeError();
524 }
525 
526 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
527   Error Err = Error::success();
528   std::unique_ptr<Archive> Ret(new Archive(Source, Err));
529   if (Err)
530     return std::move(Err);
531   return std::move(Ret);
532 }
533 
534 void Archive::setFirstRegular(const Child &C) {
535   FirstRegularData = C.Data;
536   FirstRegularStartOfFile = C.StartOfFile;
537 }
538 
539 Archive::Archive(MemoryBufferRef Source, Error &Err)
540     : Binary(Binary::ID_Archive, Source) {
541   ErrorAsOutParameter ErrAsOutParam(&Err);
542   StringRef Buffer = Data.getBuffer();
543   // Check for sufficient magic.
544   if (Buffer.startswith(ThinMagic)) {
545     IsThin = true;
546   } else if (Buffer.startswith(Magic)) {
547     IsThin = false;
548   } else {
549     Err = make_error<GenericBinaryError>("file too small to be an archive",
550                                          object_error::invalid_file_type);
551     return;
552   }
553 
554   // Make sure Format is initialized before any call to
555   // ArchiveMemberHeader::getName() is made.  This could be a valid empty
556   // archive which is the same in all formats.  So claiming it to be gnu to is
557   // fine if not totally correct before we look for a string table or table of
558   // contents.
559   Format = K_GNU;
560 
561   // Get the special members.
562   child_iterator I = child_begin(Err, false);
563   if (Err)
564     return;
565   child_iterator E = child_end();
566 
567   // See if this is a valid empty archive and if so return.
568   if (I == E) {
569     Err = Error::success();
570     return;
571   }
572   const Child *C = &*I;
573 
574   auto Increment = [&]() {
575     ++I;
576     if (Err)
577       return true;
578     C = &*I;
579     return false;
580   };
581 
582   Expected<StringRef> NameOrErr = C->getRawName();
583   if (!NameOrErr) {
584     Err = NameOrErr.takeError();
585     return;
586   }
587   StringRef Name = NameOrErr.get();
588 
589   // Below is the pattern that is used to figure out the archive format
590   // GNU archive format
591   //  First member : / (may exist, if it exists, points to the symbol table )
592   //  Second member : // (may exist, if it exists, points to the string table)
593   //  Note : The string table is used if the filename exceeds 15 characters
594   // BSD archive format
595   //  First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
596   //  There is no string table, if the filename exceeds 15 characters or has a
597   //  embedded space, the filename has #1/<size>, The size represents the size
598   //  of the filename that needs to be read after the archive header
599   // COFF archive format
600   //  First member : /
601   //  Second member : / (provides a directory of symbols)
602   //  Third member : // (may exist, if it exists, contains the string table)
603   //  Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
604   //  even if the string table is empty. However, lib.exe does not in fact
605   //  seem to create the third member if there's no member whose filename
606   //  exceeds 15 characters. So the third member is optional.
607 
608   if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
609     if (Name == "__.SYMDEF")
610       Format = K_BSD;
611     else // Name == "__.SYMDEF_64"
612       Format = K_DARWIN64;
613     // We know that the symbol table is not an external file, but we still must
614     // check any Expected<> return value.
615     Expected<StringRef> BufOrErr = C->getBuffer();
616     if (!BufOrErr) {
617       Err = BufOrErr.takeError();
618       return;
619     }
620     SymbolTable = BufOrErr.get();
621     if (Increment())
622       return;
623     setFirstRegular(*C);
624 
625     Err = Error::success();
626     return;
627   }
628 
629   if (Name.startswith("#1/")) {
630     Format = K_BSD;
631     // We know this is BSD, so getName will work since there is no string table.
632     Expected<StringRef> NameOrErr = C->getName();
633     if (!NameOrErr) {
634       Err = NameOrErr.takeError();
635       return;
636     }
637     Name = NameOrErr.get();
638     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
639       // We know that the symbol table is not an external file, but we still
640       // must check any Expected<> return value.
641       Expected<StringRef> BufOrErr = C->getBuffer();
642       if (!BufOrErr) {
643         Err = BufOrErr.takeError();
644         return;
645       }
646       SymbolTable = BufOrErr.get();
647       if (Increment())
648         return;
649     }
650     else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
651       Format = K_DARWIN64;
652       // We know that the symbol table is not an external file, but we still
653       // must check any Expected<> return value.
654       Expected<StringRef> BufOrErr = C->getBuffer();
655       if (!BufOrErr) {
656         Err = BufOrErr.takeError();
657         return;
658       }
659       SymbolTable = BufOrErr.get();
660       if (Increment())
661         return;
662     }
663     setFirstRegular(*C);
664     return;
665   }
666 
667   // MIPS 64-bit ELF archives use a special format of a symbol table.
668   // This format is marked by `ar_name` field equals to "/SYM64/".
669   // For detailed description see page 96 in the following document:
670   // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
671 
672   bool has64SymTable = false;
673   if (Name == "/" || Name == "/SYM64/") {
674     // We know that the symbol table is not an external file, but we still
675     // must check any Expected<> return value.
676     Expected<StringRef> BufOrErr = C->getBuffer();
677     if (!BufOrErr) {
678       Err = BufOrErr.takeError();
679       return;
680     }
681     SymbolTable = BufOrErr.get();
682     if (Name == "/SYM64/")
683       has64SymTable = true;
684 
685     if (Increment())
686       return;
687     if (I == E) {
688       Err = Error::success();
689       return;
690     }
691     Expected<StringRef> NameOrErr = C->getRawName();
692     if (!NameOrErr) {
693       Err = NameOrErr.takeError();
694       return;
695     }
696     Name = NameOrErr.get();
697   }
698 
699   if (Name == "//") {
700     Format = has64SymTable ? K_GNU64 : K_GNU;
701     // The string table is never an external member, but we still
702     // must check any Expected<> return value.
703     Expected<StringRef> BufOrErr = C->getBuffer();
704     if (!BufOrErr) {
705       Err = BufOrErr.takeError();
706       return;
707     }
708     StringTable = BufOrErr.get();
709     if (Increment())
710       return;
711     setFirstRegular(*C);
712     Err = Error::success();
713     return;
714   }
715 
716   if (Name[0] != '/') {
717     Format = has64SymTable ? K_GNU64 : K_GNU;
718     setFirstRegular(*C);
719     Err = Error::success();
720     return;
721   }
722 
723   if (Name != "/") {
724     Err = errorCodeToError(object_error::parse_failed);
725     return;
726   }
727 
728   Format = K_COFF;
729   // We know that the symbol table is not an external file, but we still
730   // must check any Expected<> return value.
731   Expected<StringRef> BufOrErr = C->getBuffer();
732   if (!BufOrErr) {
733     Err = BufOrErr.takeError();
734     return;
735   }
736   SymbolTable = BufOrErr.get();
737 
738   if (Increment())
739     return;
740 
741   if (I == E) {
742     setFirstRegular(*C);
743     Err = Error::success();
744     return;
745   }
746 
747   NameOrErr = C->getRawName();
748   if (!NameOrErr) {
749     Err = NameOrErr.takeError();
750     return;
751   }
752   Name = NameOrErr.get();
753 
754   if (Name == "//") {
755     // The string table is never an external member, but we still
756     // must check any Expected<> return value.
757     Expected<StringRef> BufOrErr = C->getBuffer();
758     if (!BufOrErr) {
759       Err = BufOrErr.takeError();
760       return;
761     }
762     StringTable = BufOrErr.get();
763     if (Increment())
764       return;
765   }
766 
767   setFirstRegular(*C);
768   Err = Error::success();
769 }
770 
771 Archive::child_iterator Archive::child_begin(Error &Err,
772                                              bool SkipInternal) const {
773   if (isEmpty())
774     return child_end();
775 
776   if (SkipInternal)
777     return child_iterator::itr(
778         Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
779 
780   const char *Loc = Data.getBufferStart() + strlen(Magic);
781   Child C(this, Loc, &Err);
782   if (Err)
783     return child_end();
784   return child_iterator::itr(C, Err);
785 }
786 
787 Archive::child_iterator Archive::child_end() const {
788   return child_iterator::end(Child(nullptr, nullptr, nullptr));
789 }
790 
791 StringRef Archive::Symbol::getName() const {
792   return Parent->getSymbolTable().begin() + StringIndex;
793 }
794 
795 Expected<Archive::Child> Archive::Symbol::getMember() const {
796   const char *Buf = Parent->getSymbolTable().begin();
797   const char *Offsets = Buf;
798   if (Parent->kind() == K_GNU64 || Parent->kind() == K_DARWIN64)
799     Offsets += sizeof(uint64_t);
800   else
801     Offsets += sizeof(uint32_t);
802   uint64_t Offset = 0;
803   if (Parent->kind() == K_GNU) {
804     Offset = read32be(Offsets + SymbolIndex * 4);
805   } else if (Parent->kind() == K_GNU64) {
806     Offset = read64be(Offsets + SymbolIndex * 8);
807   } else if (Parent->kind() == K_BSD) {
808     // The SymbolIndex is an index into the ranlib structs that start at
809     // Offsets (the first uint32_t is the number of bytes of the ranlib
810     // structs).  The ranlib structs are a pair of uint32_t's the first
811     // being a string table offset and the second being the offset into
812     // the archive of the member that defines the symbol.  Which is what
813     // is needed here.
814     Offset = read32le(Offsets + SymbolIndex * 8 + 4);
815   } else if (Parent->kind() == K_DARWIN64) {
816     // The SymbolIndex is an index into the ranlib_64 structs that start at
817     // Offsets (the first uint64_t is the number of bytes of the ranlib_64
818     // structs).  The ranlib_64 structs are a pair of uint64_t's the first
819     // being a string table offset and the second being the offset into
820     // the archive of the member that defines the symbol.  Which is what
821     // is needed here.
822     Offset = read64le(Offsets + SymbolIndex * 16 + 8);
823   } else {
824     // Skip offsets.
825     uint32_t MemberCount = read32le(Buf);
826     Buf += MemberCount * 4 + 4;
827 
828     uint32_t SymbolCount = read32le(Buf);
829     if (SymbolIndex >= SymbolCount)
830       return errorCodeToError(object_error::parse_failed);
831 
832     // Skip SymbolCount to get to the indices table.
833     const char *Indices = Buf + 4;
834 
835     // Get the index of the offset in the file member offset table for this
836     // symbol.
837     uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
838     // Subtract 1 since OffsetIndex is 1 based.
839     --OffsetIndex;
840 
841     if (OffsetIndex >= MemberCount)
842       return errorCodeToError(object_error::parse_failed);
843 
844     Offset = read32le(Offsets + OffsetIndex * 4);
845   }
846 
847   const char *Loc = Parent->getData().begin() + Offset;
848   Error Err = Error::success();
849   Child C(Parent, Loc, &Err);
850   if (Err)
851     return std::move(Err);
852   return C;
853 }
854 
855 Archive::Symbol Archive::Symbol::getNext() const {
856   Symbol t(*this);
857   if (Parent->kind() == K_BSD) {
858     // t.StringIndex is an offset from the start of the __.SYMDEF or
859     // "__.SYMDEF SORTED" member into the string table for the ranlib
860     // struct indexed by t.SymbolIndex .  To change t.StringIndex to the
861     // offset in the string table for t.SymbolIndex+1 we subtract the
862     // its offset from the start of the string table for t.SymbolIndex
863     // and add the offset of the string table for t.SymbolIndex+1.
864 
865     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
866     // which is the number of bytes of ranlib structs that follow.  The ranlib
867     // structs are a pair of uint32_t's the first being a string table offset
868     // and the second being the offset into the archive of the member that
869     // define the symbol. After that the next uint32_t is the byte count of
870     // the string table followed by the string table.
871     const char *Buf = Parent->getSymbolTable().begin();
872     uint32_t RanlibCount = 0;
873     RanlibCount = read32le(Buf) / 8;
874     // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
875     // don't change the t.StringIndex as we don't want to reference a ranlib
876     // past RanlibCount.
877     if (t.SymbolIndex + 1 < RanlibCount) {
878       const char *Ranlibs = Buf + 4;
879       uint32_t CurRanStrx = 0;
880       uint32_t NextRanStrx = 0;
881       CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
882       NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
883       t.StringIndex -= CurRanStrx;
884       t.StringIndex += NextRanStrx;
885     }
886   } else {
887     // Go to one past next null.
888     t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
889   }
890   ++t.SymbolIndex;
891   return t;
892 }
893 
894 Archive::symbol_iterator Archive::symbol_begin() const {
895   if (!hasSymbolTable())
896     return symbol_iterator(Symbol(this, 0, 0));
897 
898   const char *buf = getSymbolTable().begin();
899   if (kind() == K_GNU) {
900     uint32_t symbol_count = 0;
901     symbol_count = read32be(buf);
902     buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
903   } else if (kind() == K_GNU64) {
904     uint64_t symbol_count = read64be(buf);
905     buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
906   } else if (kind() == K_BSD) {
907     // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
908     // which is the number of bytes of ranlib structs that follow.  The ranlib
909     // structs are a pair of uint32_t's the first being a string table offset
910     // and the second being the offset into the archive of the member that
911     // define the symbol. After that the next uint32_t is the byte count of
912     // the string table followed by the string table.
913     uint32_t ranlib_count = 0;
914     ranlib_count = read32le(buf) / 8;
915     const char *ranlibs = buf + 4;
916     uint32_t ran_strx = 0;
917     ran_strx = read32le(ranlibs);
918     buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
919     // Skip the byte count of the string table.
920     buf += sizeof(uint32_t);
921     buf += ran_strx;
922   } else if (kind() == K_DARWIN64) {
923     // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
924     // which is the number of bytes of ranlib_64 structs that follow.  The
925     // ranlib_64 structs are a pair of uint64_t's the first being a string
926     // table offset and the second being the offset into the archive of the
927     // member that define the symbol. After that the next uint64_t is the byte
928     // count of the string table followed by the string table.
929     uint64_t ranlib_count = 0;
930     ranlib_count = read64le(buf) / 16;
931     const char *ranlibs = buf + 8;
932     uint64_t ran_strx = 0;
933     ran_strx = read64le(ranlibs);
934     buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
935     // Skip the byte count of the string table.
936     buf += sizeof(uint64_t);
937     buf += ran_strx;
938   } else {
939     uint32_t member_count = 0;
940     uint32_t symbol_count = 0;
941     member_count = read32le(buf);
942     buf += 4 + (member_count * 4); // Skip offsets.
943     symbol_count = read32le(buf);
944     buf += 4 + (symbol_count * 2); // Skip indices.
945   }
946   uint32_t string_start_offset = buf - getSymbolTable().begin();
947   return symbol_iterator(Symbol(this, 0, string_start_offset));
948 }
949 
950 Archive::symbol_iterator Archive::symbol_end() const {
951   return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
952 }
953 
954 uint32_t Archive::getNumberOfSymbols() const {
955   if (!hasSymbolTable())
956     return 0;
957   const char *buf = getSymbolTable().begin();
958   if (kind() == K_GNU)
959     return read32be(buf);
960   if (kind() == K_GNU64)
961     return read64be(buf);
962   if (kind() == K_BSD)
963     return read32le(buf) / 8;
964   if (kind() == K_DARWIN64)
965     return read64le(buf) / 16;
966   uint32_t member_count = 0;
967   member_count = read32le(buf);
968   buf += 4 + (member_count * 4); // Skip offsets.
969   return read32le(buf);
970 }
971 
972 Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
973   Archive::symbol_iterator bs = symbol_begin();
974   Archive::symbol_iterator es = symbol_end();
975 
976   for (; bs != es; ++bs) {
977     StringRef SymName = bs->getName();
978     if (SymName == name) {
979       if (auto MemberOrErr = bs->getMember())
980         return Child(*MemberOrErr);
981       else
982         return MemberOrErr.takeError();
983     }
984   }
985   return Optional<Child>();
986 }
987 
988 // Returns true if archive file contains no member file.
989 bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
990 
991 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
992