xref: /freebsd/contrib/llvm-project/llvm/lib/BinaryFormat/Magic.cpp (revision a977168c48d45085cdf0c40f9b9bde3850b1f3ea)
1  //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
2  //
3  // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  // See https://llvm.org/LICENSE.txt for license information.
5  // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  //
7  //===----------------------------------------------------------------------===//
8  
9  #include "llvm/BinaryFormat/Magic.h"
10  #include "llvm/ADT/StringRef.h"
11  #include "llvm/ADT/Twine.h"
12  #include "llvm/BinaryFormat/COFF.h"
13  #include "llvm/BinaryFormat/MachO.h"
14  #include "llvm/Support/Endian.h"
15  #include "llvm/Support/MemoryBuffer.h"
16  
17  #if !defined(_MSC_VER) && !defined(__MINGW32__)
18  #include <unistd.h>
19  #else
20  #include <io.h>
21  #endif
22  
23  using namespace llvm;
24  using namespace llvm::support::endian;
25  using namespace llvm::sys::fs;
26  
27  template <size_t N>
28  static bool startswith(StringRef Magic, const char (&S)[N]) {
29    return Magic.startswith(StringRef(S, N - 1));
30  }
31  
32  /// Identify the magic in magic.
33  file_magic llvm::identify_magic(StringRef Magic) {
34    if (Magic.size() < 4)
35      return file_magic::unknown;
36    switch ((unsigned char)Magic[0]) {
37    case 0x00: {
38      // COFF bigobj, CL.exe's LTO object file, or short import library file
39      if (startswith(Magic, "\0\0\xFF\xFF")) {
40        size_t MinSize =
41            offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
42        if (Magic.size() < MinSize)
43          return file_magic::coff_import_library;
44  
45        const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
46        if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
47          return file_magic::coff_object;
48        if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
49          return file_magic::coff_cl_gl_object;
50        return file_magic::coff_import_library;
51      }
52      // Windows resource file
53      if (Magic.size() >= sizeof(COFF::WinResMagic) &&
54          memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
55        return file_magic::windows_resource;
56      // 0x0000 = COFF unknown machine type
57      if (Magic[1] == 0)
58        return file_magic::coff_object;
59      if (startswith(Magic, "\0asm"))
60        return file_magic::wasm_object;
61      break;
62    }
63  
64    case 0x01:
65      // XCOFF format
66      if (startswith(Magic, "\x01\xDF"))
67        return file_magic::xcoff_object_32;
68      if (startswith(Magic, "\x01\xF7"))
69        return file_magic::xcoff_object_64;
70      break;
71  
72    case 0x03:
73      if (startswith(Magic, "\x03\xF0\x00"))
74        return file_magic::goff_object;
75      break;
76  
77    case 0xDE: // 0x0B17C0DE = BC wraper
78      if (startswith(Magic, "\xDE\xC0\x17\x0B"))
79        return file_magic::bitcode;
80      break;
81    case 'B':
82      if (startswith(Magic, "BC\xC0\xDE"))
83        return file_magic::bitcode;
84      break;
85    case '!':
86      if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
87        return file_magic::archive;
88      break;
89    case '<':
90      if (startswith(Magic, "<bigaf>\n"))
91        return file_magic::archive;
92      break;
93    case '\177':
94      if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
95        bool Data2MSB = Magic[5] == 2;
96        unsigned high = Data2MSB ? 16 : 17;
97        unsigned low = Data2MSB ? 17 : 16;
98        if (Magic[high] == 0) {
99          switch (Magic[low]) {
100          default:
101            return file_magic::elf;
102          case 1:
103            return file_magic::elf_relocatable;
104          case 2:
105            return file_magic::elf_executable;
106          case 3:
107            return file_magic::elf_shared_object;
108          case 4:
109            return file_magic::elf_core;
110          }
111        }
112        // It's still some type of ELF file.
113        return file_magic::elf;
114      }
115      break;
116  
117    case 0xCA:
118      if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
119          startswith(Magic, "\xCA\xFE\xBA\xBF")) {
120        // This is complicated by an overlap with Java class files.
121        // See the Mach-O section in /usr/share/file/magic for details.
122        if (Magic.size() >= 8 && Magic[7] < 43)
123          return file_magic::macho_universal_binary;
124      }
125      break;
126  
127    // The two magic numbers for mach-o are:
128    // 0xfeedface - 32-bit mach-o
129    // 0xfeedfacf - 64-bit mach-o
130    case 0xFE:
131    case 0xCE:
132    case 0xCF: {
133      uint16_t type = 0;
134      if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
135          startswith(Magic, "\xFE\xED\xFA\xCF")) {
136        /* Native endian */
137        size_t MinSize;
138        if (Magic[3] == char(0xCE))
139          MinSize = sizeof(MachO::mach_header);
140        else
141          MinSize = sizeof(MachO::mach_header_64);
142        if (Magic.size() >= MinSize)
143          type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
144      } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
145                 startswith(Magic, "\xCF\xFA\xED\xFE")) {
146        /* Reverse endian */
147        size_t MinSize;
148        if (Magic[0] == char(0xCE))
149          MinSize = sizeof(MachO::mach_header);
150        else
151          MinSize = sizeof(MachO::mach_header_64);
152        if (Magic.size() >= MinSize)
153          type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
154      }
155      switch (type) {
156      default:
157        break;
158      case 1:
159        return file_magic::macho_object;
160      case 2:
161        return file_magic::macho_executable;
162      case 3:
163        return file_magic::macho_fixed_virtual_memory_shared_lib;
164      case 4:
165        return file_magic::macho_core;
166      case 5:
167        return file_magic::macho_preload_executable;
168      case 6:
169        return file_magic::macho_dynamically_linked_shared_lib;
170      case 7:
171        return file_magic::macho_dynamic_linker;
172      case 8:
173        return file_magic::macho_bundle;
174      case 9:
175        return file_magic::macho_dynamically_linked_shared_lib_stub;
176      case 10:
177        return file_magic::macho_dsym_companion;
178      case 11:
179        return file_magic::macho_kext_bundle;
180      }
181      break;
182    }
183    case 0xF0: // PowerPC Windows
184    case 0x83: // Alpha 32-bit
185    case 0x84: // Alpha 64-bit
186    case 0x66: // MPS R4000 Windows
187    case 0x50: // mc68K
188    case 0x4c: // 80386 Windows
189    case 0xc4: // ARMNT Windows
190      if (Magic[1] == 0x01)
191        return file_magic::coff_object;
192      LLVM_FALLTHROUGH;
193  
194    case 0x90: // PA-RISC Windows
195    case 0x68: // mc68K Windows
196      if (Magic[1] == 0x02)
197        return file_magic::coff_object;
198      break;
199  
200    case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
201              // Minidump file.
202      if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
203        uint32_t off = read32le(Magic.data() + 0x3c);
204        // PE/COFF file, either EXE or DLL.
205        if (Magic.substr(off).startswith(
206                StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
207          return file_magic::pecoff_executable;
208      }
209      if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
210        return file_magic::pdb;
211      if (startswith(Magic, "MDMP"))
212        return file_magic::minidump;
213      break;
214  
215    case 0x64: // x86-64 or ARM64 Windows.
216      if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
217        return file_magic::coff_object;
218      break;
219  
220    case 0x2d: // YAML '-'
221      if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
222        return file_magic::tapi_file;
223      break;
224  
225    default:
226      break;
227    }
228    return file_magic::unknown;
229  }
230  
231  std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
232    auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
233                                             /*RequiresNullTerminator=*/false);
234    if (!FileOrError)
235      return FileOrError.getError();
236  
237    std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
238    Result = identify_magic(FileBuffer->getBuffer());
239  
240    return std::error_code();
241  }
242