xref: /freebsd/contrib/llvm-project/llvm/lib/BinaryFormat/Magic.cpp (revision 7a6dacaca14b62ca4b74406814becb87a3fefac0)
10b57cec5SDimitry Andric //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric 
90b57cec5SDimitry Andric #include "llvm/BinaryFormat/Magic.h"
105ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h"
115ffd83dbSDimitry Andric #include "llvm/ADT/Twine.h"
120b57cec5SDimitry Andric #include "llvm/BinaryFormat/COFF.h"
130b57cec5SDimitry Andric #include "llvm/BinaryFormat/MachO.h"
140b57cec5SDimitry Andric #include "llvm/Support/Endian.h"
150b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #if !defined(_MSC_VER) && !defined(__MINGW32__)
180b57cec5SDimitry Andric #include <unistd.h>
190b57cec5SDimitry Andric #else
200b57cec5SDimitry Andric #include <io.h>
210b57cec5SDimitry Andric #endif
220b57cec5SDimitry Andric 
230b57cec5SDimitry Andric using namespace llvm;
240b57cec5SDimitry Andric using namespace llvm::support::endian;
250b57cec5SDimitry Andric using namespace llvm::sys::fs;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric template <size_t N>
280b57cec5SDimitry Andric static bool startswith(StringRef Magic, const char (&S)[N]) {
295f757f3fSDimitry Andric   return Magic.starts_with(StringRef(S, N - 1));
300b57cec5SDimitry Andric }
310b57cec5SDimitry Andric 
320b57cec5SDimitry Andric /// Identify the magic in magic.
330b57cec5SDimitry Andric file_magic llvm::identify_magic(StringRef Magic) {
340b57cec5SDimitry Andric   if (Magic.size() < 4)
350b57cec5SDimitry Andric     return file_magic::unknown;
360b57cec5SDimitry Andric   switch ((unsigned char)Magic[0]) {
370b57cec5SDimitry Andric   case 0x00: {
380b57cec5SDimitry Andric     // COFF bigobj, CL.exe's LTO object file, or short import library file
390b57cec5SDimitry Andric     if (startswith(Magic, "\0\0\xFF\xFF")) {
400b57cec5SDimitry Andric       size_t MinSize =
410b57cec5SDimitry Andric           offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
420b57cec5SDimitry Andric       if (Magic.size() < MinSize)
430b57cec5SDimitry Andric         return file_magic::coff_import_library;
440b57cec5SDimitry Andric 
450b57cec5SDimitry Andric       const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
460b57cec5SDimitry Andric       if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
470b57cec5SDimitry Andric         return file_magic::coff_object;
480b57cec5SDimitry Andric       if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
490b57cec5SDimitry Andric         return file_magic::coff_cl_gl_object;
500b57cec5SDimitry Andric       return file_magic::coff_import_library;
510b57cec5SDimitry Andric     }
520b57cec5SDimitry Andric     // Windows resource file
530b57cec5SDimitry Andric     if (Magic.size() >= sizeof(COFF::WinResMagic) &&
540b57cec5SDimitry Andric         memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
550b57cec5SDimitry Andric       return file_magic::windows_resource;
560b57cec5SDimitry Andric     // 0x0000 = COFF unknown machine type
570b57cec5SDimitry Andric     if (Magic[1] == 0)
580b57cec5SDimitry Andric       return file_magic::coff_object;
590b57cec5SDimitry Andric     if (startswith(Magic, "\0asm"))
600b57cec5SDimitry Andric       return file_magic::wasm_object;
610b57cec5SDimitry Andric     break;
620b57cec5SDimitry Andric   }
630b57cec5SDimitry Andric 
640b57cec5SDimitry Andric   case 0x01:
650b57cec5SDimitry Andric     // XCOFF format
660b57cec5SDimitry Andric     if (startswith(Magic, "\x01\xDF"))
670b57cec5SDimitry Andric       return file_magic::xcoff_object_32;
680b57cec5SDimitry Andric     if (startswith(Magic, "\x01\xF7"))
690b57cec5SDimitry Andric       return file_magic::xcoff_object_64;
700b57cec5SDimitry Andric     break;
710b57cec5SDimitry Andric 
72fe6060f1SDimitry Andric   case 0x03:
73fe6060f1SDimitry Andric     if (startswith(Magic, "\x03\xF0\x00"))
74fe6060f1SDimitry Andric       return file_magic::goff_object;
755f757f3fSDimitry Andric     // SPIR-V format in little-endian mode.
765f757f3fSDimitry Andric     if (startswith(Magic, "\x03\x02\x23\x07"))
775f757f3fSDimitry Andric       return file_magic::spirv_object;
785f757f3fSDimitry Andric     break;
795f757f3fSDimitry Andric 
805f757f3fSDimitry Andric   case 0x07: // SPIR-V format in big-endian mode.
815f757f3fSDimitry Andric     if (startswith(Magic, "\x07\x23\x02\x03"))
825f757f3fSDimitry Andric       return file_magic::spirv_object;
83fe6060f1SDimitry Andric     break;
84fe6060f1SDimitry Andric 
8581ad6265SDimitry Andric   case 0x10:
8681ad6265SDimitry Andric     if (startswith(Magic, "\x10\xFF\x10\xAD"))
8781ad6265SDimitry Andric       return file_magic::offload_binary;
8881ad6265SDimitry Andric     break;
8981ad6265SDimitry Andric 
900b57cec5SDimitry Andric   case 0xDE: // 0x0B17C0DE = BC wraper
910b57cec5SDimitry Andric     if (startswith(Magic, "\xDE\xC0\x17\x0B"))
920b57cec5SDimitry Andric       return file_magic::bitcode;
930b57cec5SDimitry Andric     break;
940b57cec5SDimitry Andric   case 'B':
950b57cec5SDimitry Andric     if (startswith(Magic, "BC\xC0\xDE"))
960b57cec5SDimitry Andric       return file_magic::bitcode;
970b57cec5SDimitry Andric     break;
985f757f3fSDimitry Andric   case 'C':
995f757f3fSDimitry Andric     if (startswith(Magic, "CCOB"))
1005f757f3fSDimitry Andric       return file_magic::offload_bundle_compressed;
101*7a6dacacSDimitry Andric     if (startswith(Magic, "CPCH"))
102*7a6dacacSDimitry Andric       return file_magic::clang_ast;
1035f757f3fSDimitry Andric     break;
1040b57cec5SDimitry Andric   case '!':
1050b57cec5SDimitry Andric     if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
1060b57cec5SDimitry Andric       return file_magic::archive;
1070b57cec5SDimitry Andric     break;
10804eeddc0SDimitry Andric   case '<':
10904eeddc0SDimitry Andric     if (startswith(Magic, "<bigaf>\n"))
11004eeddc0SDimitry Andric       return file_magic::archive;
11104eeddc0SDimitry Andric     break;
1120b57cec5SDimitry Andric   case '\177':
1130b57cec5SDimitry Andric     if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
1140b57cec5SDimitry Andric       bool Data2MSB = Magic[5] == 2;
1150b57cec5SDimitry Andric       unsigned high = Data2MSB ? 16 : 17;
1160b57cec5SDimitry Andric       unsigned low = Data2MSB ? 17 : 16;
1170b57cec5SDimitry Andric       if (Magic[high] == 0) {
1180b57cec5SDimitry Andric         switch (Magic[low]) {
1190b57cec5SDimitry Andric         default:
1200b57cec5SDimitry Andric           return file_magic::elf;
1210b57cec5SDimitry Andric         case 1:
1220b57cec5SDimitry Andric           return file_magic::elf_relocatable;
1230b57cec5SDimitry Andric         case 2:
1240b57cec5SDimitry Andric           return file_magic::elf_executable;
1250b57cec5SDimitry Andric         case 3:
1260b57cec5SDimitry Andric           return file_magic::elf_shared_object;
1270b57cec5SDimitry Andric         case 4:
1280b57cec5SDimitry Andric           return file_magic::elf_core;
1290b57cec5SDimitry Andric         }
1300b57cec5SDimitry Andric       }
1310b57cec5SDimitry Andric       // It's still some type of ELF file.
1320b57cec5SDimitry Andric       return file_magic::elf;
1330b57cec5SDimitry Andric     }
1340b57cec5SDimitry Andric     break;
1350b57cec5SDimitry Andric 
1360b57cec5SDimitry Andric   case 0xCA:
1370b57cec5SDimitry Andric     if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
1380b57cec5SDimitry Andric         startswith(Magic, "\xCA\xFE\xBA\xBF")) {
1390b57cec5SDimitry Andric       // This is complicated by an overlap with Java class files.
1400b57cec5SDimitry Andric       // See the Mach-O section in /usr/share/file/magic for details.
1410b57cec5SDimitry Andric       if (Magic.size() >= 8 && Magic[7] < 43)
1420b57cec5SDimitry Andric         return file_magic::macho_universal_binary;
1430b57cec5SDimitry Andric     }
1440b57cec5SDimitry Andric     break;
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric   // The two magic numbers for mach-o are:
1470b57cec5SDimitry Andric   // 0xfeedface - 32-bit mach-o
1480b57cec5SDimitry Andric   // 0xfeedfacf - 64-bit mach-o
1490b57cec5SDimitry Andric   case 0xFE:
1500b57cec5SDimitry Andric   case 0xCE:
1510b57cec5SDimitry Andric   case 0xCF: {
1520b57cec5SDimitry Andric     uint16_t type = 0;
1530b57cec5SDimitry Andric     if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
1540b57cec5SDimitry Andric         startswith(Magic, "\xFE\xED\xFA\xCF")) {
1550b57cec5SDimitry Andric       /* Native endian */
1560b57cec5SDimitry Andric       size_t MinSize;
1570b57cec5SDimitry Andric       if (Magic[3] == char(0xCE))
1580b57cec5SDimitry Andric         MinSize = sizeof(MachO::mach_header);
1590b57cec5SDimitry Andric       else
1600b57cec5SDimitry Andric         MinSize = sizeof(MachO::mach_header_64);
1610b57cec5SDimitry Andric       if (Magic.size() >= MinSize)
1620b57cec5SDimitry Andric         type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
1630b57cec5SDimitry Andric     } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
1640b57cec5SDimitry Andric                startswith(Magic, "\xCF\xFA\xED\xFE")) {
1650b57cec5SDimitry Andric       /* Reverse endian */
1660b57cec5SDimitry Andric       size_t MinSize;
1670b57cec5SDimitry Andric       if (Magic[0] == char(0xCE))
1680b57cec5SDimitry Andric         MinSize = sizeof(MachO::mach_header);
1690b57cec5SDimitry Andric       else
1700b57cec5SDimitry Andric         MinSize = sizeof(MachO::mach_header_64);
1710b57cec5SDimitry Andric       if (Magic.size() >= MinSize)
1720b57cec5SDimitry Andric         type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
1730b57cec5SDimitry Andric     }
1740b57cec5SDimitry Andric     switch (type) {
1750b57cec5SDimitry Andric     default:
1760b57cec5SDimitry Andric       break;
1770b57cec5SDimitry Andric     case 1:
1780b57cec5SDimitry Andric       return file_magic::macho_object;
1790b57cec5SDimitry Andric     case 2:
1800b57cec5SDimitry Andric       return file_magic::macho_executable;
1810b57cec5SDimitry Andric     case 3:
1820b57cec5SDimitry Andric       return file_magic::macho_fixed_virtual_memory_shared_lib;
1830b57cec5SDimitry Andric     case 4:
1840b57cec5SDimitry Andric       return file_magic::macho_core;
1850b57cec5SDimitry Andric     case 5:
1860b57cec5SDimitry Andric       return file_magic::macho_preload_executable;
1870b57cec5SDimitry Andric     case 6:
1880b57cec5SDimitry Andric       return file_magic::macho_dynamically_linked_shared_lib;
1890b57cec5SDimitry Andric     case 7:
1900b57cec5SDimitry Andric       return file_magic::macho_dynamic_linker;
1910b57cec5SDimitry Andric     case 8:
1920b57cec5SDimitry Andric       return file_magic::macho_bundle;
1930b57cec5SDimitry Andric     case 9:
1940b57cec5SDimitry Andric       return file_magic::macho_dynamically_linked_shared_lib_stub;
1950b57cec5SDimitry Andric     case 10:
1960b57cec5SDimitry Andric       return file_magic::macho_dsym_companion;
1970b57cec5SDimitry Andric     case 11:
1980b57cec5SDimitry Andric       return file_magic::macho_kext_bundle;
199bdd1243dSDimitry Andric     case 12:
200bdd1243dSDimitry Andric       return file_magic::macho_file_set;
2010b57cec5SDimitry Andric     }
2020b57cec5SDimitry Andric     break;
2030b57cec5SDimitry Andric   }
2040b57cec5SDimitry Andric   case 0xF0: // PowerPC Windows
2050b57cec5SDimitry Andric   case 0x83: // Alpha 32-bit
2060b57cec5SDimitry Andric   case 0x84: // Alpha 64-bit
2070b57cec5SDimitry Andric   case 0x66: // MPS R4000 Windows
2080b57cec5SDimitry Andric   case 0x50: // mc68K
20981ad6265SDimitry Andric     if (startswith(Magic, "\x50\xed\x55\xba"))
21081ad6265SDimitry Andric       return file_magic::cuda_fatbinary;
211bdd1243dSDimitry Andric     [[fallthrough]];
21281ad6265SDimitry Andric 
2130b57cec5SDimitry Andric   case 0x4c: // 80386 Windows
2140b57cec5SDimitry Andric   case 0xc4: // ARMNT Windows
2150b57cec5SDimitry Andric     if (Magic[1] == 0x01)
2160b57cec5SDimitry Andric       return file_magic::coff_object;
217bdd1243dSDimitry Andric     [[fallthrough]];
2180b57cec5SDimitry Andric 
2190b57cec5SDimitry Andric   case 0x90: // PA-RISC Windows
2200b57cec5SDimitry Andric   case 0x68: // mc68K Windows
2210b57cec5SDimitry Andric     if (Magic[1] == 0x02)
2220b57cec5SDimitry Andric       return file_magic::coff_object;
2230b57cec5SDimitry Andric     break;
2240b57cec5SDimitry Andric 
2250b57cec5SDimitry Andric   case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
2260b57cec5SDimitry Andric             // Minidump file.
2270b57cec5SDimitry Andric     if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
2280b57cec5SDimitry Andric       uint32_t off = read32le(Magic.data() + 0x3c);
2290b57cec5SDimitry Andric       // PE/COFF file, either EXE or DLL.
2305f757f3fSDimitry Andric       if (Magic.substr(off).starts_with(
2310b57cec5SDimitry Andric               StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
2320b57cec5SDimitry Andric         return file_magic::pecoff_executable;
2330b57cec5SDimitry Andric     }
2345f757f3fSDimitry Andric     if (Magic.starts_with("Microsoft C/C++ MSF 7.00\r\n"))
2350b57cec5SDimitry Andric       return file_magic::pdb;
2360b57cec5SDimitry Andric     if (startswith(Magic, "MDMP"))
2370b57cec5SDimitry Andric       return file_magic::minidump;
2380b57cec5SDimitry Andric     break;
2390b57cec5SDimitry Andric 
2400b57cec5SDimitry Andric   case 0x64: // x86-64 or ARM64 Windows.
2410b57cec5SDimitry Andric     if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
2420b57cec5SDimitry Andric       return file_magic::coff_object;
2430b57cec5SDimitry Andric     break;
2440b57cec5SDimitry Andric 
24506c3fb27SDimitry Andric   case 0x2d: // YAML '-' MachO TBD.
2468bcb0991SDimitry Andric     if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
2478bcb0991SDimitry Andric       return file_magic::tapi_file;
2488bcb0991SDimitry Andric     break;
24906c3fb27SDimitry Andric   case 0x7b: // JSON '{' MachO TBD.
25006c3fb27SDimitry Andric     return file_magic::tapi_file;
25106c3fb27SDimitry Andric     break;
2528bcb0991SDimitry Andric 
25381ad6265SDimitry Andric   case 'D': // DirectX container file - DXBC
25481ad6265SDimitry Andric     if (startswith(Magic, "DXBC"))
25581ad6265SDimitry Andric       return file_magic::dxcontainer_object;
25681ad6265SDimitry Andric     break;
25781ad6265SDimitry Andric 
258bdd1243dSDimitry Andric   case 0x41: // ARM64EC windows
259bdd1243dSDimitry Andric     if (Magic[1] == char(0xA6))
260bdd1243dSDimitry Andric       return file_magic::coff_object;
261bdd1243dSDimitry Andric     break;
262bdd1243dSDimitry Andric 
26306c3fb27SDimitry Andric   case 0x4e: // ARM64X windows
26406c3fb27SDimitry Andric     if (Magic[1] == char(0xA6))
26506c3fb27SDimitry Andric       return file_magic::coff_object;
26606c3fb27SDimitry Andric     break;
26706c3fb27SDimitry Andric 
2685f757f3fSDimitry Andric   case '_': {
2695f757f3fSDimitry Andric     const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__";
2705f757f3fSDimitry Andric     if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic))
2715f757f3fSDimitry Andric       return file_magic::offload_bundle;
2725f757f3fSDimitry Andric     break;
2735f757f3fSDimitry Andric   }
2745f757f3fSDimitry Andric 
2750b57cec5SDimitry Andric   default:
2760b57cec5SDimitry Andric     break;
2770b57cec5SDimitry Andric   }
2780b57cec5SDimitry Andric   return file_magic::unknown;
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
282fe6060f1SDimitry Andric   auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
283fe6060f1SDimitry Andric                                            /*RequiresNullTerminator=*/false);
2840b57cec5SDimitry Andric   if (!FileOrError)
2850b57cec5SDimitry Andric     return FileOrError.getError();
2860b57cec5SDimitry Andric 
2870b57cec5SDimitry Andric   std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
2880b57cec5SDimitry Andric   Result = identify_magic(FileBuffer->getBuffer());
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric   return std::error_code();
2910b57cec5SDimitry Andric }
292