1 /* Copyright (c) 2018-2018, David Anderson 2 All rights reserved. 3 4 Redistribution and use in source and binary forms, with 5 or without modification, are permitted provided that the 6 following conditions are met: 7 8 Redistributions of source code must retain the above 9 copyright notice, this list of conditions and the following 10 disclaimer. 11 12 Redistributions in binary form must reproduce the above 13 copyright notice, this list of conditions and the following 14 disclaimer in the documentation and/or other materials 15 provided with the distribution. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND 18 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 19 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 22 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 28 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 29 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include "config.h" 33 #include <stdio.h> 34 #include <sys/types.h> /* open() */ 35 #include <sys/stat.h> /* open() */ 36 #include <fcntl.h> /* O_RDONLY */ 37 #ifdef HAVE_UNISTD_H 38 #include <unistd.h> /* lseek read close */ 39 #elif defined(_WIN32) && defined(_MSC_VER) 40 #include <io.h> 41 #include <basetsd.h> 42 typedef SSIZE_T ssize_t; /* MSVC does not have POSIX ssize_t */ 43 #endif /* HAVE_UNISTD_H */ 44 #ifdef HAVE_STRING_H 45 #include <string.h> /* memcpy, strcpy */ 46 #endif /* HAVE_STRING_H */ 47 48 /* Windows specific header files */ 49 #if defined(_WIN32) && defined(HAVE_STDAFX_H) 50 #include "stdafx.h" 51 #endif /* HAVE_STDAFX_H */ 52 53 #include "libdwarf.h" 54 #include "memcpy_swap.h" 55 #include "dwarf_object_read_common.h" 56 #include "dwarf_object_detector.h" 57 58 #ifndef O_BINARY 59 #define O_BINARY 0 60 #endif /* O_BINARY */ 61 62 /* This is the main() program for the object_detector executable. */ 63 64 #ifndef TRUE 65 #define TRUE 1 66 #define FALSE 0 67 #endif /* TRUE */ 68 69 #ifndef O_RDONLY 70 #define O_RDONLY 0 71 #endif 72 73 /* TYP, SIZEOFT32 and ASNAR 74 mean we can use correctly-sized arrays of char for the 75 struct members instead of determing a proper integer 76 that size. 77 78 We are dealing with carefully constructed structs 79 that do not have any alignment-forced (hidden) 80 unused bytes so reading lengths from the real structs 81 works for each variable. */ 82 83 #define TYP(n,l) char n[l] 84 #define SIZEOFT32 4 85 86 87 #define DW_DLV_NO_ENTRY -1 88 #define DW_DLV_OK 0 89 #define DW_DLV_ERROR 1 90 91 #ifndef EI_NIDENT 92 #define EI_NIDENT 16 93 #define EI_CLASS 4 94 #define EI_DATA 5 95 #define EI_VERSION 6 96 #define ELFCLASS32 1 97 #define ELFCLASS64 2 98 #define ELFDATA2LSB 1 99 #define ELFDATA2MSB 2 100 #endif /* EI_NIDENT */ 101 102 #define DSYM_SUFFIX ".dSYM/Contents/Resources/DWARF/" 103 #define PATHSIZE 2000 104 105 #ifndef MH_MAGIC 106 /* mach-o 32bit */ 107 #define MH_MAGIC 0xfeedface 108 #define MH_CIGAM 0xcefaedfe 109 #endif /* MH_MAGIC */ 110 #ifndef MH_MAGIC_64 111 /* mach-o 64bit */ 112 #define MH_MAGIC_64 0xfeedfacf 113 #define MH_CIGAM_64 0xcffaedfe 114 #endif /* MH_MAGIC_64 */ 115 116 static unsigned long 117 magic_copy(unsigned char *d, unsigned len) 118 { 119 unsigned i = 0; 120 unsigned long v = 0; 121 122 v = d[0]; 123 for(i = 1 ; i < len; ++i) { 124 v <<= 8; 125 v |= d[i]; 126 } 127 return v; 128 } 129 130 131 #ifdef WORDS_BIGENDIAN 132 #define ASNAR(func,t,s) \ 133 do { \ 134 unsigned tbyte = sizeof(t) - sizeof(s); \ 135 t = 0; \ 136 func(((char *)&t)+tbyte ,&s[0],sizeof(s)); \ 137 } while (0) 138 #else /* LITTLE ENDIAN */ 139 #define ASNAR(func,t,s) \ 140 do { \ 141 t = 0; \ 142 func(&t,&s[0],sizeof(s)); \ 143 } while (0) 144 #endif /* end LITTLE- BIG-ENDIAN */ 145 146 147 #define EI_NIDENT 16 148 /* An incomplete elf header, good for 32 and 64bit elf */ 149 struct elf_header { 150 unsigned char e_ident[EI_NIDENT]; 151 TYP(e_type,2); 152 TYP(e_machine,2); 153 TYP(e_version,4); 154 #ifdef HAVE_CUSTOM_LIBELF 155 /* In the case of custom ELF, use extra space */ 156 TYP(e_custom,64); 157 #endif /* HAVE_CUSTOM_LIBELF */ 158 }; 159 160 /* Windows. Certain PE objects. 161 The following references may be of interest. 162 https://msdn.microsoft.com/library/windows/desktop/ms680547(v=vs.85).aspx #PE format overview and various machine magic numbers 163 164 https://msdn.microsoft.com/en-us/library/ms809762.aspx # describes some details of PE headers, basically an overview 165 166 https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751(v=vs.85).aspx #defines sizes of various types 167 168 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680313(v=vs.85).aspx #defines IMAGE_FILE_HEADER and Machine fields (32/64) 169 170 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680305(v=vs.85).aspx #defines IMAGE_DATA_DIRECTORY 171 172 https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx #Defines IMAGE_OPTIONAL_HEADER and some magic numbers 173 174 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680336(v=vs.85).aspx # defines _IMAGE_NT_HEADERS 32 64 175 176 https://msdn.microsoft.com/en-us/library/windows/desktop/ms680341(v=vs.85).aspx # defines _IMAGE_SECTION_HEADER 177 178 */ 179 180 /* ===== START pe structures */ 181 182 struct dos_header { 183 TYP(dh_mz,2); 184 TYP(dh_dos_data,58); 185 TYP(dh_image_offset,4); 186 }; 187 188 #define IMAGE_DOS_SIGNATURE_dw 0x5A4D 189 #define IMAGE_DOS_REVSIGNATURE_dw 0x4D5A 190 #define IMAGE_NT_SIGNATURE_dw 0x00004550 191 #define IMAGE_FILE_MACHINE_I386_dw 0x14c 192 #define IMAGE_FILE_MACHINE_IA64_dw 0x200 193 #define IMAGE_FILE_MACHINE_AMD64_dw 0x8664 194 195 196 struct pe_image_file_header { 197 TYP(im_machine,2); 198 TYP(im_sectioncount,2); 199 TYP(im_ignoring,(3*4)); 200 TYP(im_opt_header_size,2); 201 TYP(im_ignoringb,2); 202 }; 203 204 /* ===== END pe structures */ 205 206 207 /* For following MacOS file naming convention */ 208 static const char * 209 getseparator (const char *f) 210 { 211 const char *p = 0; 212 const char *q = 0; 213 char c = 0;; 214 215 p = NULL; 216 q = f; 217 do { 218 c = *q++; 219 if (c == '\\' || c == '/' || c == ':') { 220 p = q; 221 } 222 } while (c); 223 return p; 224 } 225 226 static const char * 227 getbasename (const char *f) 228 { 229 const char *pseparator = getseparator (f); 230 if (!pseparator) { 231 return f; 232 } 233 return pseparator; 234 } 235 236 /* Not a standard function, though part of GNU libc 237 since 2008 (I have never examined the GNU version). */ 238 static char * 239 dw_stpcpy(char *dest,const char *src) 240 { 241 const char *cp = src; 242 char *dp = dest; 243 244 for ( ; *cp; ++cp,++dp) { 245 *dp = *cp; 246 } 247 *dp = 0; 248 return dp; 249 } 250 251 252 253 /* This started like Elf, so check initial fields. */ 254 static int 255 fill_in_elf_fields(struct elf_header *h, 256 unsigned *endian, 257 /* Size of the object file offsets, not DWARF offset 258 size. */ 259 unsigned *objoffsetsize, 260 int *errcode) 261 { 262 unsigned locendian = 0; 263 unsigned locoffsetsize = 0; 264 265 switch(h->e_ident[EI_CLASS]) { 266 case ELFCLASS32: 267 locoffsetsize = 32; 268 break; 269 case ELFCLASS64: 270 locoffsetsize = 64; 271 break; 272 default: 273 *errcode = DW_DLE_ELF_CLASS_BAD; 274 return DW_DLV_ERROR; 275 } 276 switch(h->e_ident[EI_DATA]) { 277 case ELFDATA2LSB: 278 locendian = DW_ENDIAN_LITTLE; 279 break; 280 case ELFDATA2MSB: 281 locendian = DW_ENDIAN_BIG; 282 break; 283 default: 284 *errcode = DW_DLE_ELF_ENDIAN_BAD; 285 return DW_DLV_ERROR; 286 } 287 if (h->e_ident[EI_VERSION] != 1 /* EV_CURRENT */) { 288 *errcode = DW_DLE_ELF_VERSION_BAD; 289 return DW_DLV_ERROR; 290 } 291 *endian = locendian; 292 *objoffsetsize = locoffsetsize; 293 return DW_DLV_OK; 294 } 295 static char archive_magic[8] = { 296 '!','<','a','r','c','h','>',0x0a 297 }; 298 static int 299 is_archive_magic(struct elf_header *h) { 300 int i = 0; 301 int len = sizeof(archive_magic); 302 const char *cp = (const char *)h; 303 for( ; i < len; ++i) { 304 if (cp[i] != archive_magic[i]) { 305 return FALSE; 306 } 307 } 308 return TRUE; 309 } 310 311 /* A bit unusual in that it always sets *is_pe_flag 312 Return of DW_DLV_OK it is a PE file we recognize. */ 313 static int 314 is_pe_object(int fd, 315 unsigned long filesize, 316 unsigned *endian, 317 unsigned *offsetsize, 318 int *errcode) 319 { 320 unsigned dos_sig = 0; 321 unsigned locendian = 0; 322 void (*word_swap) (void *, const void *, unsigned long); 323 unsigned long nt_address = 0; 324 struct dos_header dhinmem; 325 char nt_sig_array[4]; 326 unsigned long nt_sig = 0; 327 struct pe_image_file_header ifh; 328 int res = 0; 329 330 if (filesize < (sizeof (struct dos_header) + 331 SIZEOFT32 + sizeof(struct pe_image_file_header))) { 332 *errcode = DW_DLE_FILE_TOO_SMALL; 333 return DW_DLV_ERROR; 334 } 335 res = _dwarf_object_read_random(fd,(char *)&dhinmem, 336 0,sizeof(dhinmem),filesize,errcode); 337 if (res != DW_DLV_OK) { 338 return res; 339 } 340 /* No swap here, want it as in the file */ 341 dos_sig = magic_copy((unsigned char *)dhinmem.dh_mz, 342 sizeof(dhinmem.dh_mz)); 343 if (dos_sig == IMAGE_DOS_SIGNATURE_dw) { 344 /* IMAGE_DOS_SIGNATURE_dw assumes bytes reversed by little-endian 345 load, so we intrepet a match the other way. */ 346 /* BIG ENDIAN. From looking at hex characters in object */ 347 #ifdef WORDS_BIGENDIAN 348 word_swap = _dwarf_memcpy_noswap_bytes; 349 #else /* LITTLE ENDIAN */ 350 word_swap = _dwarf_memcpy_swap_bytes; 351 #endif /* LITTLE- BIG-ENDIAN */ 352 locendian = DW_ENDIAN_BIG; 353 } else if (dos_sig == IMAGE_DOS_REVSIGNATURE_dw) { 354 /* raw load, so intrepet a match the other way. */ 355 /* LITTLE ENDIAN */ 356 #ifdef WORDS_BIGENDIAN 357 word_swap = _dwarf_memcpy_swap_bytes; 358 #else /* LITTLE ENDIAN */ 359 word_swap = _dwarf_memcpy_noswap_bytes; 360 #endif /* LITTLE- BIG-ENDIAN */ 361 locendian = DW_ENDIAN_LITTLE; 362 } else { 363 /* Not dos header not a PE file we recognize */ 364 *errcode = DW_DLE_FILE_WRONG_TYPE; 365 return DW_DLV_ERROR; 366 } 367 ASNAR(word_swap,nt_address, dhinmem.dh_image_offset); 368 if (filesize < nt_address) { 369 /* Not dos header not a PE file we recognize */ 370 *errcode = DW_DLE_FILE_TOO_SMALL; 371 return DW_DLV_ERROR; 372 } 373 if (filesize < (nt_address + SIZEOFT32 + 374 sizeof(struct pe_image_file_header))) { 375 *errcode = DW_DLE_FILE_TOO_SMALL; 376 /* Not dos header not a PE file we recognize */ 377 return DW_DLV_ERROR; 378 } 379 res = _dwarf_object_read_random(fd,(char *)&nt_sig_array[0], 380 nt_address, sizeof(nt_sig_array),filesize,errcode); 381 if (res != DW_DLV_OK) { 382 return res; 383 } 384 { unsigned long lsig = 0; 385 386 ASNAR(word_swap,lsig,nt_sig_array); 387 nt_sig = lsig; 388 } 389 if (nt_sig != IMAGE_NT_SIGNATURE_dw) { 390 *errcode = DW_DLE_FILE_WRONG_TYPE; 391 return DW_DLV_ERROR; 392 } 393 res = _dwarf_object_read_random(fd,(char *)&ifh, 394 nt_address + SIZEOFT32, 395 sizeof(struct pe_image_file_header), 396 filesize, 397 errcode); 398 if (res != DW_DLV_OK) { 399 return res; 400 } 401 { 402 unsigned long machine = 0; 403 404 ASNAR(word_swap,machine,ifh.im_machine); 405 switch(machine) { 406 case IMAGE_FILE_MACHINE_I386_dw: 407 *offsetsize = 32; 408 *endian = locendian; 409 return DW_DLV_OK; 410 case IMAGE_FILE_MACHINE_IA64_dw: 411 case IMAGE_FILE_MACHINE_AMD64_dw: 412 *offsetsize = 64; 413 *endian = locendian; 414 return DW_DLV_OK; 415 } 416 } 417 *errcode = DW_DLE_IMAGE_FILE_UNKNOWN_TYPE; 418 return DW_DLV_ERROR; 419 } 420 421 static int 422 is_mach_o_magic(struct elf_header *h, 423 unsigned *endian, 424 unsigned *offsetsize) 425 { 426 unsigned long magicval = 0; 427 unsigned locendian = 0; 428 unsigned locoffsetsize = 0; 429 430 /* No swapping here. Need to match size of 431 Mach-o magic field. */ 432 magicval = magic_copy(h->e_ident,4); 433 if (magicval == MH_MAGIC) { 434 locendian = DW_ENDIAN_BIG; 435 locoffsetsize = 32; 436 } else if (magicval == MH_CIGAM) { 437 locendian = DW_ENDIAN_LITTLE; 438 locoffsetsize = 32; 439 }else if (magicval == MH_MAGIC_64) { 440 locendian = DW_ENDIAN_BIG; 441 locoffsetsize = 64; 442 } else if (magicval == MH_CIGAM_64) { 443 locendian = DW_ENDIAN_LITTLE; 444 locoffsetsize = 64; 445 } else { 446 return FALSE; 447 } 448 *endian = locendian; 449 *offsetsize = locoffsetsize; 450 return TRUE; 451 } 452 453 int 454 dwarf_object_detector_fd(int fd, 455 unsigned *ftype, 456 unsigned *endian, 457 unsigned *offsetsize, 458 Dwarf_Unsigned *filesize, 459 int *errcode) 460 { 461 struct elf_header h; 462 size_t readlen = sizeof(h); 463 int res = 0; 464 off_t fsize = 0; 465 off_t lsval = 0; 466 ssize_t readval = 0; 467 468 fsize = lseek(fd,0L,SEEK_END); 469 if(fsize < 0) { 470 *errcode = DW_DLE_SEEK_ERROR; 471 return DW_DLV_ERROR; 472 } 473 if (fsize <= (off_t)readlen) { 474 /* Not a real object file */ 475 *errcode = DW_DLE_FILE_TOO_SMALL; 476 return DW_DLV_ERROR; 477 } 478 lsval = lseek(fd,0L,SEEK_SET); 479 if(lsval < 0) { 480 *errcode = DW_DLE_SEEK_ERROR; 481 return DW_DLV_ERROR; 482 } 483 readval = read(fd,&h,readlen); 484 if (readval != (ssize_t)readlen) { 485 *errcode = DW_DLE_READ_ERROR; 486 return DW_DLV_ERROR; 487 } 488 if (h.e_ident[0] == 0x7f && 489 h.e_ident[1] == 'E' && 490 h.e_ident[2] == 'L' && 491 h.e_ident[3] == 'F') { 492 /* is ELF */ 493 494 res = fill_in_elf_fields(&h,endian,offsetsize,errcode); 495 if (res != DW_DLV_OK) { 496 return res; 497 } 498 *ftype = DW_FTYPE_ELF; 499 *filesize = (size_t)fsize; 500 return DW_DLV_OK; 501 } 502 if (is_mach_o_magic(&h,endian,offsetsize)) { 503 *ftype = DW_FTYPE_MACH_O; 504 *filesize = (size_t)fsize; 505 return DW_DLV_OK; 506 } 507 if (is_archive_magic(&h)) { 508 *ftype = DW_FTYPE_ARCHIVE; 509 *filesize = (size_t)fsize; 510 return DW_DLV_OK; 511 } 512 res = is_pe_object(fd,fsize,endian,offsetsize,errcode); 513 if (res == DW_DLV_OK ) { 514 *ftype = DW_FTYPE_PE; 515 *filesize = (size_t)fsize; 516 return DW_DLV_OK; 517 } 518 /* Check for custom ELF format. */ 519 #ifdef HAVE_CUSTOM_LIBELF 520 res = elf_is_custom_format(&h,readlen,&fsize,endian,offsetsize,errcode); 521 if (res == DW_DLV_OK) { 522 *ftype = DW_FTYPE_CUSTOM_ELF; 523 *filesize = (size_t)fsize; 524 return res; 525 } 526 #endif /* HAVE_CUSTOM_LIBELF */ 527 528 /* Unknown object format. */ 529 return DW_DLV_NO_ENTRY; 530 } 531 532 int 533 dwarf_object_detector_path(const char *path, 534 char *outpath,unsigned long outpath_len, 535 unsigned *ftype, 536 unsigned *endian, 537 unsigned *offsetsize, 538 Dwarf_Unsigned *filesize, 539 int *errcode) 540 { 541 char *cp = 0; 542 size_t plen = strlen(path); 543 size_t dsprefixlen = sizeof(DSYM_SUFFIX); 544 int fd = -1; 545 int res = 0; 546 int have_outpath = outpath && outpath_len; 547 548 #if !defined(S_ISREG) 549 #define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG) 550 #endif 551 #if !defined(S_ISDIR) 552 #define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR) 553 #endif 554 555 if (have_outpath) { 556 if ((2*plen + dsprefixlen +2) >= outpath_len) { 557 *errcode = DW_DLE_PATH_SIZE_TOO_SMALL; 558 return DW_DLV_ERROR; 559 } 560 cp = dw_stpcpy(outpath,path); 561 cp = dw_stpcpy(cp,DSYM_SUFFIX); 562 dw_stpcpy(cp,getbasename(path)); 563 fd = open(outpath,O_RDONLY|O_BINARY); 564 if (fd < 0) { 565 *outpath = 0; 566 fd = open(path,O_RDONLY|O_BINARY); 567 dw_stpcpy(outpath,path); 568 } 569 } else { 570 fd = open(path,O_RDONLY|O_BINARY); 571 } 572 if (fd < 0) { 573 if (have_outpath) { 574 *outpath = 0; 575 } 576 return DW_DLV_NO_ENTRY; 577 } 578 res = dwarf_object_detector_fd(fd, 579 ftype,endian,offsetsize,filesize,errcode); 580 if (res != DW_DLV_OK && have_outpath) { 581 *outpath = 0; 582 } 583 close(fd); 584 return res; 585 } 586