10b57cec5SDimitry Andric //===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric 90b57cec5SDimitry Andric #include "llvm/BinaryFormat/Magic.h" 105ffd83dbSDimitry Andric #include "llvm/ADT/StringRef.h" 115ffd83dbSDimitry Andric #include "llvm/ADT/Twine.h" 120b57cec5SDimitry Andric #include "llvm/BinaryFormat/COFF.h" 130b57cec5SDimitry Andric #include "llvm/BinaryFormat/MachO.h" 140b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 150b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h" 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #if !defined(_MSC_VER) && !defined(__MINGW32__) 180b57cec5SDimitry Andric #include <unistd.h> 190b57cec5SDimitry Andric #else 200b57cec5SDimitry Andric #include <io.h> 210b57cec5SDimitry Andric #endif 220b57cec5SDimitry Andric 230b57cec5SDimitry Andric using namespace llvm; 240b57cec5SDimitry Andric using namespace llvm::support::endian; 250b57cec5SDimitry Andric using namespace llvm::sys::fs; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric template <size_t N> 280b57cec5SDimitry Andric static bool startswith(StringRef Magic, const char (&S)[N]) { 295f757f3fSDimitry Andric return Magic.starts_with(StringRef(S, N - 1)); 300b57cec5SDimitry Andric } 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric /// Identify the magic in magic. 330b57cec5SDimitry Andric file_magic llvm::identify_magic(StringRef Magic) { 340b57cec5SDimitry Andric if (Magic.size() < 4) 350b57cec5SDimitry Andric return file_magic::unknown; 360b57cec5SDimitry Andric switch ((unsigned char)Magic[0]) { 370b57cec5SDimitry Andric case 0x00: { 380b57cec5SDimitry Andric // COFF bigobj, CL.exe's LTO object file, or short import library file 390b57cec5SDimitry Andric if (startswith(Magic, "\0\0\xFF\xFF")) { 400b57cec5SDimitry Andric size_t MinSize = 410b57cec5SDimitry Andric offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic); 420b57cec5SDimitry Andric if (Magic.size() < MinSize) 430b57cec5SDimitry Andric return file_magic::coff_import_library; 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID); 460b57cec5SDimitry Andric if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0) 470b57cec5SDimitry Andric return file_magic::coff_object; 480b57cec5SDimitry Andric if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0) 490b57cec5SDimitry Andric return file_magic::coff_cl_gl_object; 500b57cec5SDimitry Andric return file_magic::coff_import_library; 510b57cec5SDimitry Andric } 520b57cec5SDimitry Andric // Windows resource file 530b57cec5SDimitry Andric if (Magic.size() >= sizeof(COFF::WinResMagic) && 540b57cec5SDimitry Andric memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0) 550b57cec5SDimitry Andric return file_magic::windows_resource; 560b57cec5SDimitry Andric // 0x0000 = COFF unknown machine type 570b57cec5SDimitry Andric if (Magic[1] == 0) 580b57cec5SDimitry Andric return file_magic::coff_object; 590b57cec5SDimitry Andric if (startswith(Magic, "\0asm")) 600b57cec5SDimitry Andric return file_magic::wasm_object; 610b57cec5SDimitry Andric break; 620b57cec5SDimitry Andric } 630b57cec5SDimitry Andric 640b57cec5SDimitry Andric case 0x01: 650b57cec5SDimitry Andric // XCOFF format 660b57cec5SDimitry Andric if (startswith(Magic, "\x01\xDF")) 670b57cec5SDimitry Andric return file_magic::xcoff_object_32; 680b57cec5SDimitry Andric if (startswith(Magic, "\x01\xF7")) 690b57cec5SDimitry Andric return file_magic::xcoff_object_64; 700b57cec5SDimitry Andric break; 710b57cec5SDimitry Andric 72fe6060f1SDimitry Andric case 0x03: 73fe6060f1SDimitry Andric if (startswith(Magic, "\x03\xF0\x00")) 74fe6060f1SDimitry Andric return file_magic::goff_object; 755f757f3fSDimitry Andric // SPIR-V format in little-endian mode. 765f757f3fSDimitry Andric if (startswith(Magic, "\x03\x02\x23\x07")) 775f757f3fSDimitry Andric return file_magic::spirv_object; 785f757f3fSDimitry Andric break; 795f757f3fSDimitry Andric 805f757f3fSDimitry Andric case 0x07: // SPIR-V format in big-endian mode. 815f757f3fSDimitry Andric if (startswith(Magic, "\x07\x23\x02\x03")) 825f757f3fSDimitry Andric return file_magic::spirv_object; 83fe6060f1SDimitry Andric break; 84fe6060f1SDimitry Andric 8581ad6265SDimitry Andric case 0x10: 8681ad6265SDimitry Andric if (startswith(Magic, "\x10\xFF\x10\xAD")) 8781ad6265SDimitry Andric return file_magic::offload_binary; 8881ad6265SDimitry Andric break; 8981ad6265SDimitry Andric 900b57cec5SDimitry Andric case 0xDE: // 0x0B17C0DE = BC wraper 910b57cec5SDimitry Andric if (startswith(Magic, "\xDE\xC0\x17\x0B")) 920b57cec5SDimitry Andric return file_magic::bitcode; 930b57cec5SDimitry Andric break; 940b57cec5SDimitry Andric case 'B': 950b57cec5SDimitry Andric if (startswith(Magic, "BC\xC0\xDE")) 960b57cec5SDimitry Andric return file_magic::bitcode; 970b57cec5SDimitry Andric break; 985f757f3fSDimitry Andric case 'C': 995f757f3fSDimitry Andric if (startswith(Magic, "CCOB")) 1005f757f3fSDimitry Andric return file_magic::offload_bundle_compressed; 101*7a6dacacSDimitry Andric if (startswith(Magic, "CPCH")) 102*7a6dacacSDimitry Andric return file_magic::clang_ast; 1035f757f3fSDimitry Andric break; 1040b57cec5SDimitry Andric case '!': 1050b57cec5SDimitry Andric if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n")) 1060b57cec5SDimitry Andric return file_magic::archive; 1070b57cec5SDimitry Andric break; 10804eeddc0SDimitry Andric case '<': 10904eeddc0SDimitry Andric if (startswith(Magic, "<bigaf>\n")) 11004eeddc0SDimitry Andric return file_magic::archive; 11104eeddc0SDimitry Andric break; 1120b57cec5SDimitry Andric case '\177': 1130b57cec5SDimitry Andric if (startswith(Magic, "\177ELF") && Magic.size() >= 18) { 1140b57cec5SDimitry Andric bool Data2MSB = Magic[5] == 2; 1150b57cec5SDimitry Andric unsigned high = Data2MSB ? 16 : 17; 1160b57cec5SDimitry Andric unsigned low = Data2MSB ? 17 : 16; 1170b57cec5SDimitry Andric if (Magic[high] == 0) { 1180b57cec5SDimitry Andric switch (Magic[low]) { 1190b57cec5SDimitry Andric default: 1200b57cec5SDimitry Andric return file_magic::elf; 1210b57cec5SDimitry Andric case 1: 1220b57cec5SDimitry Andric return file_magic::elf_relocatable; 1230b57cec5SDimitry Andric case 2: 1240b57cec5SDimitry Andric return file_magic::elf_executable; 1250b57cec5SDimitry Andric case 3: 1260b57cec5SDimitry Andric return file_magic::elf_shared_object; 1270b57cec5SDimitry Andric case 4: 1280b57cec5SDimitry Andric return file_magic::elf_core; 1290b57cec5SDimitry Andric } 1300b57cec5SDimitry Andric } 1310b57cec5SDimitry Andric // It's still some type of ELF file. 1320b57cec5SDimitry Andric return file_magic::elf; 1330b57cec5SDimitry Andric } 1340b57cec5SDimitry Andric break; 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric case 0xCA: 1370b57cec5SDimitry Andric if (startswith(Magic, "\xCA\xFE\xBA\xBE") || 1380b57cec5SDimitry Andric startswith(Magic, "\xCA\xFE\xBA\xBF")) { 1390b57cec5SDimitry Andric // This is complicated by an overlap with Java class files. 1400b57cec5SDimitry Andric // See the Mach-O section in /usr/share/file/magic for details. 1410b57cec5SDimitry Andric if (Magic.size() >= 8 && Magic[7] < 43) 1420b57cec5SDimitry Andric return file_magic::macho_universal_binary; 1430b57cec5SDimitry Andric } 1440b57cec5SDimitry Andric break; 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric // The two magic numbers for mach-o are: 1470b57cec5SDimitry Andric // 0xfeedface - 32-bit mach-o 1480b57cec5SDimitry Andric // 0xfeedfacf - 64-bit mach-o 1490b57cec5SDimitry Andric case 0xFE: 1500b57cec5SDimitry Andric case 0xCE: 1510b57cec5SDimitry Andric case 0xCF: { 1520b57cec5SDimitry Andric uint16_t type = 0; 1530b57cec5SDimitry Andric if (startswith(Magic, "\xFE\xED\xFA\xCE") || 1540b57cec5SDimitry Andric startswith(Magic, "\xFE\xED\xFA\xCF")) { 1550b57cec5SDimitry Andric /* Native endian */ 1560b57cec5SDimitry Andric size_t MinSize; 1570b57cec5SDimitry Andric if (Magic[3] == char(0xCE)) 1580b57cec5SDimitry Andric MinSize = sizeof(MachO::mach_header); 1590b57cec5SDimitry Andric else 1600b57cec5SDimitry Andric MinSize = sizeof(MachO::mach_header_64); 1610b57cec5SDimitry Andric if (Magic.size() >= MinSize) 1620b57cec5SDimitry Andric type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15]; 1630b57cec5SDimitry Andric } else if (startswith(Magic, "\xCE\xFA\xED\xFE") || 1640b57cec5SDimitry Andric startswith(Magic, "\xCF\xFA\xED\xFE")) { 1650b57cec5SDimitry Andric /* Reverse endian */ 1660b57cec5SDimitry Andric size_t MinSize; 1670b57cec5SDimitry Andric if (Magic[0] == char(0xCE)) 1680b57cec5SDimitry Andric MinSize = sizeof(MachO::mach_header); 1690b57cec5SDimitry Andric else 1700b57cec5SDimitry Andric MinSize = sizeof(MachO::mach_header_64); 1710b57cec5SDimitry Andric if (Magic.size() >= MinSize) 1720b57cec5SDimitry Andric type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12]; 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric switch (type) { 1750b57cec5SDimitry Andric default: 1760b57cec5SDimitry Andric break; 1770b57cec5SDimitry Andric case 1: 1780b57cec5SDimitry Andric return file_magic::macho_object; 1790b57cec5SDimitry Andric case 2: 1800b57cec5SDimitry Andric return file_magic::macho_executable; 1810b57cec5SDimitry Andric case 3: 1820b57cec5SDimitry Andric return file_magic::macho_fixed_virtual_memory_shared_lib; 1830b57cec5SDimitry Andric case 4: 1840b57cec5SDimitry Andric return file_magic::macho_core; 1850b57cec5SDimitry Andric case 5: 1860b57cec5SDimitry Andric return file_magic::macho_preload_executable; 1870b57cec5SDimitry Andric case 6: 1880b57cec5SDimitry Andric return file_magic::macho_dynamically_linked_shared_lib; 1890b57cec5SDimitry Andric case 7: 1900b57cec5SDimitry Andric return file_magic::macho_dynamic_linker; 1910b57cec5SDimitry Andric case 8: 1920b57cec5SDimitry Andric return file_magic::macho_bundle; 1930b57cec5SDimitry Andric case 9: 1940b57cec5SDimitry Andric return file_magic::macho_dynamically_linked_shared_lib_stub; 1950b57cec5SDimitry Andric case 10: 1960b57cec5SDimitry Andric return file_magic::macho_dsym_companion; 1970b57cec5SDimitry Andric case 11: 1980b57cec5SDimitry Andric return file_magic::macho_kext_bundle; 199bdd1243dSDimitry Andric case 12: 200bdd1243dSDimitry Andric return file_magic::macho_file_set; 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric break; 2030b57cec5SDimitry Andric } 2040b57cec5SDimitry Andric case 0xF0: // PowerPC Windows 2050b57cec5SDimitry Andric case 0x83: // Alpha 32-bit 2060b57cec5SDimitry Andric case 0x84: // Alpha 64-bit 2070b57cec5SDimitry Andric case 0x66: // MPS R4000 Windows 2080b57cec5SDimitry Andric case 0x50: // mc68K 20981ad6265SDimitry Andric if (startswith(Magic, "\x50\xed\x55\xba")) 21081ad6265SDimitry Andric return file_magic::cuda_fatbinary; 211bdd1243dSDimitry Andric [[fallthrough]]; 21281ad6265SDimitry Andric 2130b57cec5SDimitry Andric case 0x4c: // 80386 Windows 2140b57cec5SDimitry Andric case 0xc4: // ARMNT Windows 2150b57cec5SDimitry Andric if (Magic[1] == 0x01) 2160b57cec5SDimitry Andric return file_magic::coff_object; 217bdd1243dSDimitry Andric [[fallthrough]]; 2180b57cec5SDimitry Andric 2190b57cec5SDimitry Andric case 0x90: // PA-RISC Windows 2200b57cec5SDimitry Andric case 0x68: // mc68K Windows 2210b57cec5SDimitry Andric if (Magic[1] == 0x02) 2220b57cec5SDimitry Andric return file_magic::coff_object; 2230b57cec5SDimitry Andric break; 2240b57cec5SDimitry Andric 2250b57cec5SDimitry Andric case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a 2260b57cec5SDimitry Andric // Minidump file. 2270b57cec5SDimitry Andric if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) { 2280b57cec5SDimitry Andric uint32_t off = read32le(Magic.data() + 0x3c); 2290b57cec5SDimitry Andric // PE/COFF file, either EXE or DLL. 2305f757f3fSDimitry Andric if (Magic.substr(off).starts_with( 2310b57cec5SDimitry Andric StringRef(COFF::PEMagic, sizeof(COFF::PEMagic)))) 2320b57cec5SDimitry Andric return file_magic::pecoff_executable; 2330b57cec5SDimitry Andric } 2345f757f3fSDimitry Andric if (Magic.starts_with("Microsoft C/C++ MSF 7.00\r\n")) 2350b57cec5SDimitry Andric return file_magic::pdb; 2360b57cec5SDimitry Andric if (startswith(Magic, "MDMP")) 2370b57cec5SDimitry Andric return file_magic::minidump; 2380b57cec5SDimitry Andric break; 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric case 0x64: // x86-64 or ARM64 Windows. 2410b57cec5SDimitry Andric if (Magic[1] == char(0x86) || Magic[1] == char(0xaa)) 2420b57cec5SDimitry Andric return file_magic::coff_object; 2430b57cec5SDimitry Andric break; 2440b57cec5SDimitry Andric 24506c3fb27SDimitry Andric case 0x2d: // YAML '-' MachO TBD. 2468bcb0991SDimitry Andric if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:")) 2478bcb0991SDimitry Andric return file_magic::tapi_file; 2488bcb0991SDimitry Andric break; 24906c3fb27SDimitry Andric case 0x7b: // JSON '{' MachO TBD. 25006c3fb27SDimitry Andric return file_magic::tapi_file; 25106c3fb27SDimitry Andric break; 2528bcb0991SDimitry Andric 25381ad6265SDimitry Andric case 'D': // DirectX container file - DXBC 25481ad6265SDimitry Andric if (startswith(Magic, "DXBC")) 25581ad6265SDimitry Andric return file_magic::dxcontainer_object; 25681ad6265SDimitry Andric break; 25781ad6265SDimitry Andric 258bdd1243dSDimitry Andric case 0x41: // ARM64EC windows 259bdd1243dSDimitry Andric if (Magic[1] == char(0xA6)) 260bdd1243dSDimitry Andric return file_magic::coff_object; 261bdd1243dSDimitry Andric break; 262bdd1243dSDimitry Andric 26306c3fb27SDimitry Andric case 0x4e: // ARM64X windows 26406c3fb27SDimitry Andric if (Magic[1] == char(0xA6)) 26506c3fb27SDimitry Andric return file_magic::coff_object; 26606c3fb27SDimitry Andric break; 26706c3fb27SDimitry Andric 2685f757f3fSDimitry Andric case '_': { 2695f757f3fSDimitry Andric const char OBMagic[] = "__CLANG_OFFLOAD_BUNDLE__"; 2705f757f3fSDimitry Andric if (Magic.size() >= sizeof(OBMagic) && startswith(Magic, OBMagic)) 2715f757f3fSDimitry Andric return file_magic::offload_bundle; 2725f757f3fSDimitry Andric break; 2735f757f3fSDimitry Andric } 2745f757f3fSDimitry Andric 2750b57cec5SDimitry Andric default: 2760b57cec5SDimitry Andric break; 2770b57cec5SDimitry Andric } 2780b57cec5SDimitry Andric return file_magic::unknown; 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) { 282fe6060f1SDimitry Andric auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false, 283fe6060f1SDimitry Andric /*RequiresNullTerminator=*/false); 2840b57cec5SDimitry Andric if (!FileOrError) 2850b57cec5SDimitry Andric return FileOrError.getError(); 2860b57cec5SDimitry Andric 2870b57cec5SDimitry Andric std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError); 2880b57cec5SDimitry Andric Result = identify_magic(FileBuffer->getBuffer()); 2890b57cec5SDimitry Andric 2900b57cec5SDimitry Andric return std::error_code(); 2910b57cec5SDimitry Andric } 292