1 /* Copyright (c) 2018-2018, David Anderson
2 All rights reserved.
3
4 Redistribution and use in source and binary forms, with
5 or without modification, are permitted provided that the
6 following conditions are met:
7
8 Redistributions of source code must retain the above
9 copyright notice, this list of conditions and the following
10 disclaimer.
11
12 Redistributions in binary form must reproduce the above
13 copyright notice, this list of conditions and the following
14 disclaimer in the documentation and/or other materials
15 provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
18 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include "config.h"
33 #include <stdio.h>
34 #include <sys/types.h> /* open() */
35 #include <sys/stat.h> /* open() */
36 #include <fcntl.h> /* O_RDONLY */
37 #ifdef HAVE_UNISTD_H
38 #include <unistd.h> /* lseek read close */
39 #elif defined(_WIN32) && defined(_MSC_VER)
40 #include <io.h>
41 #include <basetsd.h>
42 typedef SSIZE_T ssize_t; /* MSVC does not have POSIX ssize_t */
43 #endif /* HAVE_UNISTD_H */
44 #ifdef HAVE_STRING_H
45 #include <string.h> /* memcpy, strcpy */
46 #endif /* HAVE_STRING_H */
47
48 /* Windows specific header files */
49 #if defined(_WIN32) && defined(HAVE_STDAFX_H)
50 #include "stdafx.h"
51 #endif /* HAVE_STDAFX_H */
52
53 #include "libdwarf.h"
54 #include "memcpy_swap.h"
55 #include "dwarf_object_read_common.h"
56 #include "dwarf_object_detector.h"
57
58 #ifndef O_BINARY
59 #define O_BINARY 0
60 #endif /* O_BINARY */
61
62 /* This is the main() program for the object_detector executable. */
63
64 #ifndef TRUE
65 #define TRUE 1
66 #define FALSE 0
67 #endif /* TRUE */
68
69 #ifndef O_RDONLY
70 #define O_RDONLY 0
71 #endif
72
73 /* TYP, SIZEOFT32 and ASNAR
74 mean we can use correctly-sized arrays of char for the
75 struct members instead of determing a proper integer
76 that size.
77
78 We are dealing with carefully constructed structs
79 that do not have any alignment-forced (hidden)
80 unused bytes so reading lengths from the real structs
81 works for each variable. */
82
83 #define TYP(n,l) char n[l]
84 #define SIZEOFT32 4
85
86
87 #define DW_DLV_NO_ENTRY -1
88 #define DW_DLV_OK 0
89 #define DW_DLV_ERROR 1
90
91 #ifndef EI_NIDENT
92 #define EI_NIDENT 16
93 #define EI_CLASS 4
94 #define EI_DATA 5
95 #define EI_VERSION 6
96 #define ELFCLASS32 1
97 #define ELFCLASS64 2
98 #define ELFDATA2LSB 1
99 #define ELFDATA2MSB 2
100 #endif /* EI_NIDENT */
101
102 #define DSYM_SUFFIX ".dSYM/Contents/Resources/DWARF/"
103 #define PATHSIZE 2000
104
105 #ifndef MH_MAGIC
106 /* mach-o 32bit */
107 #define MH_MAGIC 0xfeedface
108 #define MH_CIGAM 0xcefaedfe
109 #endif /* MH_MAGIC */
110 #ifndef MH_MAGIC_64
111 /* mach-o 64bit */
112 #define MH_MAGIC_64 0xfeedfacf
113 #define MH_CIGAM_64 0xcffaedfe
114 #endif /* MH_MAGIC_64 */
115
116 static unsigned long
magic_copy(unsigned char * d,unsigned len)117 magic_copy(unsigned char *d, unsigned len)
118 {
119 unsigned i = 0;
120 unsigned long v = 0;
121
122 v = d[0];
123 for(i = 1 ; i < len; ++i) {
124 v <<= 8;
125 v |= d[i];
126 }
127 return v;
128 }
129
130
131 #ifdef WORDS_BIGENDIAN
132 #define ASNAR(func,t,s) \
133 do { \
134 unsigned tbyte = sizeof(t) - sizeof(s); \
135 t = 0; \
136 func(((char *)&t)+tbyte ,&s[0],sizeof(s)); \
137 } while (0)
138 #else /* LITTLE ENDIAN */
139 #define ASNAR(func,t,s) \
140 do { \
141 t = 0; \
142 func(&t,&s[0],sizeof(s)); \
143 } while (0)
144 #endif /* end LITTLE- BIG-ENDIAN */
145
146
147 #define EI_NIDENT 16
148 /* An incomplete elf header, good for 32 and 64bit elf */
149 struct elf_header {
150 unsigned char e_ident[EI_NIDENT];
151 TYP(e_type,2);
152 TYP(e_machine,2);
153 TYP(e_version,4);
154 #ifdef HAVE_CUSTOM_LIBELF
155 /* In the case of custom ELF, use extra space */
156 TYP(e_custom,64);
157 #endif /* HAVE_CUSTOM_LIBELF */
158 };
159
160 /* Windows. Certain PE objects.
161 The following references may be of interest.
162 https://msdn.microsoft.com/library/windows/desktop/ms680547(v=vs.85).aspx #PE format overview and various machine magic numbers
163
164 https://msdn.microsoft.com/en-us/library/ms809762.aspx # describes some details of PE headers, basically an overview
165
166 https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751(v=vs.85).aspx #defines sizes of various types
167
168 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680313(v=vs.85).aspx #defines IMAGE_FILE_HEADER and Machine fields (32/64)
169
170 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680305(v=vs.85).aspx #defines IMAGE_DATA_DIRECTORY
171
172 https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx #Defines IMAGE_OPTIONAL_HEADER and some magic numbers
173
174 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680336(v=vs.85).aspx # defines _IMAGE_NT_HEADERS 32 64
175
176 https://msdn.microsoft.com/en-us/library/windows/desktop/ms680341(v=vs.85).aspx # defines _IMAGE_SECTION_HEADER
177
178 */
179
180 /* ===== START pe structures */
181
182 struct dos_header {
183 TYP(dh_mz,2);
184 TYP(dh_dos_data,58);
185 TYP(dh_image_offset,4);
186 };
187
188 #define IMAGE_DOS_SIGNATURE_dw 0x5A4D
189 #define IMAGE_DOS_REVSIGNATURE_dw 0x4D5A
190 #define IMAGE_NT_SIGNATURE_dw 0x00004550
191 #define IMAGE_FILE_MACHINE_I386_dw 0x14c
192 #define IMAGE_FILE_MACHINE_IA64_dw 0x200
193 #define IMAGE_FILE_MACHINE_AMD64_dw 0x8664
194
195
196 struct pe_image_file_header {
197 TYP(im_machine,2);
198 TYP(im_sectioncount,2);
199 TYP(im_ignoring,(3*4));
200 TYP(im_opt_header_size,2);
201 TYP(im_ignoringb,2);
202 };
203
204 /* ===== END pe structures */
205
206
207 /* For following MacOS file naming convention */
208 static const char *
getseparator(const char * f)209 getseparator (const char *f)
210 {
211 const char *p = 0;
212 const char *q = 0;
213 char c = 0;;
214
215 p = NULL;
216 q = f;
217 do {
218 c = *q++;
219 if (c == '\\' || c == '/' || c == ':') {
220 p = q;
221 }
222 } while (c);
223 return p;
224 }
225
226 static const char *
getbasename(const char * f)227 getbasename (const char *f)
228 {
229 const char *pseparator = getseparator (f);
230 if (!pseparator) {
231 return f;
232 }
233 return pseparator;
234 }
235
236 /* Not a standard function, though part of GNU libc
237 since 2008 (I have never examined the GNU version). */
238 static char *
dw_stpcpy(char * dest,const char * src)239 dw_stpcpy(char *dest,const char *src)
240 {
241 const char *cp = src;
242 char *dp = dest;
243
244 for ( ; *cp; ++cp,++dp) {
245 *dp = *cp;
246 }
247 *dp = 0;
248 return dp;
249 }
250
251
252
253 /* This started like Elf, so check initial fields. */
254 static int
fill_in_elf_fields(struct elf_header * h,unsigned * endian,unsigned * objoffsetsize,int * errcode)255 fill_in_elf_fields(struct elf_header *h,
256 unsigned *endian,
257 /* Size of the object file offsets, not DWARF offset
258 size. */
259 unsigned *objoffsetsize,
260 int *errcode)
261 {
262 unsigned locendian = 0;
263 unsigned locoffsetsize = 0;
264
265 switch(h->e_ident[EI_CLASS]) {
266 case ELFCLASS32:
267 locoffsetsize = 32;
268 break;
269 case ELFCLASS64:
270 locoffsetsize = 64;
271 break;
272 default:
273 *errcode = DW_DLE_ELF_CLASS_BAD;
274 return DW_DLV_ERROR;
275 }
276 switch(h->e_ident[EI_DATA]) {
277 case ELFDATA2LSB:
278 locendian = DW_ENDIAN_LITTLE;
279 break;
280 case ELFDATA2MSB:
281 locendian = DW_ENDIAN_BIG;
282 break;
283 default:
284 *errcode = DW_DLE_ELF_ENDIAN_BAD;
285 return DW_DLV_ERROR;
286 }
287 if (h->e_ident[EI_VERSION] != 1 /* EV_CURRENT */) {
288 *errcode = DW_DLE_ELF_VERSION_BAD;
289 return DW_DLV_ERROR;
290 }
291 *endian = locendian;
292 *objoffsetsize = locoffsetsize;
293 return DW_DLV_OK;
294 }
295 static char archive_magic[8] = {
296 '!','<','a','r','c','h','>',0x0a
297 };
298 static int
is_archive_magic(struct elf_header * h)299 is_archive_magic(struct elf_header *h) {
300 int i = 0;
301 int len = sizeof(archive_magic);
302 const char *cp = (const char *)h;
303 for( ; i < len; ++i) {
304 if (cp[i] != archive_magic[i]) {
305 return FALSE;
306 }
307 }
308 return TRUE;
309 }
310
311 /* A bit unusual in that it always sets *is_pe_flag
312 Return of DW_DLV_OK it is a PE file we recognize. */
313 static int
is_pe_object(int fd,unsigned long filesize,unsigned * endian,unsigned * offsetsize,int * errcode)314 is_pe_object(int fd,
315 unsigned long filesize,
316 unsigned *endian,
317 unsigned *offsetsize,
318 int *errcode)
319 {
320 unsigned dos_sig = 0;
321 unsigned locendian = 0;
322 void (*word_swap) (void *, const void *, unsigned long);
323 unsigned long nt_address = 0;
324 struct dos_header dhinmem;
325 char nt_sig_array[4];
326 unsigned long nt_sig = 0;
327 struct pe_image_file_header ifh;
328 int res = 0;
329
330 if (filesize < (sizeof (struct dos_header) +
331 SIZEOFT32 + sizeof(struct pe_image_file_header))) {
332 *errcode = DW_DLE_FILE_TOO_SMALL;
333 return DW_DLV_ERROR;
334 }
335 res = _dwarf_object_read_random(fd,(char *)&dhinmem,
336 0,sizeof(dhinmem),filesize,errcode);
337 if (res != DW_DLV_OK) {
338 return res;
339 }
340 /* No swap here, want it as in the file */
341 dos_sig = magic_copy((unsigned char *)dhinmem.dh_mz,
342 sizeof(dhinmem.dh_mz));
343 if (dos_sig == IMAGE_DOS_SIGNATURE_dw) {
344 /* IMAGE_DOS_SIGNATURE_dw assumes bytes reversed by little-endian
345 load, so we intrepet a match the other way. */
346 /* BIG ENDIAN. From looking at hex characters in object */
347 #ifdef WORDS_BIGENDIAN
348 word_swap = _dwarf_memcpy_noswap_bytes;
349 #else /* LITTLE ENDIAN */
350 word_swap = _dwarf_memcpy_swap_bytes;
351 #endif /* LITTLE- BIG-ENDIAN */
352 locendian = DW_ENDIAN_BIG;
353 } else if (dos_sig == IMAGE_DOS_REVSIGNATURE_dw) {
354 /* raw load, so intrepet a match the other way. */
355 /* LITTLE ENDIAN */
356 #ifdef WORDS_BIGENDIAN
357 word_swap = _dwarf_memcpy_swap_bytes;
358 #else /* LITTLE ENDIAN */
359 word_swap = _dwarf_memcpy_noswap_bytes;
360 #endif /* LITTLE- BIG-ENDIAN */
361 locendian = DW_ENDIAN_LITTLE;
362 } else {
363 /* Not dos header not a PE file we recognize */
364 *errcode = DW_DLE_FILE_WRONG_TYPE;
365 return DW_DLV_ERROR;
366 }
367 ASNAR(word_swap,nt_address, dhinmem.dh_image_offset);
368 if (filesize < nt_address) {
369 /* Not dos header not a PE file we recognize */
370 *errcode = DW_DLE_FILE_TOO_SMALL;
371 return DW_DLV_ERROR;
372 }
373 if (filesize < (nt_address + SIZEOFT32 +
374 sizeof(struct pe_image_file_header))) {
375 *errcode = DW_DLE_FILE_TOO_SMALL;
376 /* Not dos header not a PE file we recognize */
377 return DW_DLV_ERROR;
378 }
379 res = _dwarf_object_read_random(fd,(char *)&nt_sig_array[0],
380 nt_address, sizeof(nt_sig_array),filesize,errcode);
381 if (res != DW_DLV_OK) {
382 return res;
383 }
384 { unsigned long lsig = 0;
385
386 ASNAR(word_swap,lsig,nt_sig_array);
387 nt_sig = lsig;
388 }
389 if (nt_sig != IMAGE_NT_SIGNATURE_dw) {
390 *errcode = DW_DLE_FILE_WRONG_TYPE;
391 return DW_DLV_ERROR;
392 }
393 res = _dwarf_object_read_random(fd,(char *)&ifh,
394 nt_address + SIZEOFT32,
395 sizeof(struct pe_image_file_header),
396 filesize,
397 errcode);
398 if (res != DW_DLV_OK) {
399 return res;
400 }
401 {
402 unsigned long machine = 0;
403
404 ASNAR(word_swap,machine,ifh.im_machine);
405 switch(machine) {
406 case IMAGE_FILE_MACHINE_I386_dw:
407 *offsetsize = 32;
408 *endian = locendian;
409 return DW_DLV_OK;
410 case IMAGE_FILE_MACHINE_IA64_dw:
411 case IMAGE_FILE_MACHINE_AMD64_dw:
412 *offsetsize = 64;
413 *endian = locendian;
414 return DW_DLV_OK;
415 }
416 }
417 *errcode = DW_DLE_IMAGE_FILE_UNKNOWN_TYPE;
418 return DW_DLV_ERROR;
419 }
420
421 static int
is_mach_o_magic(struct elf_header * h,unsigned * endian,unsigned * offsetsize)422 is_mach_o_magic(struct elf_header *h,
423 unsigned *endian,
424 unsigned *offsetsize)
425 {
426 unsigned long magicval = 0;
427 unsigned locendian = 0;
428 unsigned locoffsetsize = 0;
429
430 /* No swapping here. Need to match size of
431 Mach-o magic field. */
432 magicval = magic_copy(h->e_ident,4);
433 if (magicval == MH_MAGIC) {
434 locendian = DW_ENDIAN_BIG;
435 locoffsetsize = 32;
436 } else if (magicval == MH_CIGAM) {
437 locendian = DW_ENDIAN_LITTLE;
438 locoffsetsize = 32;
439 }else if (magicval == MH_MAGIC_64) {
440 locendian = DW_ENDIAN_BIG;
441 locoffsetsize = 64;
442 } else if (magicval == MH_CIGAM_64) {
443 locendian = DW_ENDIAN_LITTLE;
444 locoffsetsize = 64;
445 } else {
446 return FALSE;
447 }
448 *endian = locendian;
449 *offsetsize = locoffsetsize;
450 return TRUE;
451 }
452
453 int
dwarf_object_detector_fd(int fd,unsigned * ftype,unsigned * endian,unsigned * offsetsize,Dwarf_Unsigned * filesize,int * errcode)454 dwarf_object_detector_fd(int fd,
455 unsigned *ftype,
456 unsigned *endian,
457 unsigned *offsetsize,
458 Dwarf_Unsigned *filesize,
459 int *errcode)
460 {
461 struct elf_header h;
462 size_t readlen = sizeof(h);
463 int res = 0;
464 off_t fsize = 0;
465 off_t lsval = 0;
466 ssize_t readval = 0;
467
468 fsize = lseek(fd,0L,SEEK_END);
469 if(fsize < 0) {
470 *errcode = DW_DLE_SEEK_ERROR;
471 return DW_DLV_ERROR;
472 }
473 if (fsize <= (off_t)readlen) {
474 /* Not a real object file */
475 *errcode = DW_DLE_FILE_TOO_SMALL;
476 return DW_DLV_ERROR;
477 }
478 lsval = lseek(fd,0L,SEEK_SET);
479 if(lsval < 0) {
480 *errcode = DW_DLE_SEEK_ERROR;
481 return DW_DLV_ERROR;
482 }
483 readval = read(fd,&h,readlen);
484 if (readval != (ssize_t)readlen) {
485 *errcode = DW_DLE_READ_ERROR;
486 return DW_DLV_ERROR;
487 }
488 if (h.e_ident[0] == 0x7f &&
489 h.e_ident[1] == 'E' &&
490 h.e_ident[2] == 'L' &&
491 h.e_ident[3] == 'F') {
492 /* is ELF */
493
494 res = fill_in_elf_fields(&h,endian,offsetsize,errcode);
495 if (res != DW_DLV_OK) {
496 return res;
497 }
498 *ftype = DW_FTYPE_ELF;
499 *filesize = (size_t)fsize;
500 return DW_DLV_OK;
501 }
502 if (is_mach_o_magic(&h,endian,offsetsize)) {
503 *ftype = DW_FTYPE_MACH_O;
504 *filesize = (size_t)fsize;
505 return DW_DLV_OK;
506 }
507 if (is_archive_magic(&h)) {
508 *ftype = DW_FTYPE_ARCHIVE;
509 *filesize = (size_t)fsize;
510 return DW_DLV_OK;
511 }
512 res = is_pe_object(fd,fsize,endian,offsetsize,errcode);
513 if (res == DW_DLV_OK ) {
514 *ftype = DW_FTYPE_PE;
515 *filesize = (size_t)fsize;
516 return DW_DLV_OK;
517 }
518 /* Check for custom ELF format. */
519 #ifdef HAVE_CUSTOM_LIBELF
520 res = elf_is_custom_format(&h,readlen,&fsize,endian,offsetsize,errcode);
521 if (res == DW_DLV_OK) {
522 *ftype = DW_FTYPE_CUSTOM_ELF;
523 *filesize = (size_t)fsize;
524 return res;
525 }
526 #endif /* HAVE_CUSTOM_LIBELF */
527
528 /* Unknown object format. */
529 return DW_DLV_NO_ENTRY;
530 }
531
532 int
dwarf_object_detector_path(const char * path,char * outpath,unsigned long outpath_len,unsigned * ftype,unsigned * endian,unsigned * offsetsize,Dwarf_Unsigned * filesize,int * errcode)533 dwarf_object_detector_path(const char *path,
534 char *outpath,unsigned long outpath_len,
535 unsigned *ftype,
536 unsigned *endian,
537 unsigned *offsetsize,
538 Dwarf_Unsigned *filesize,
539 int *errcode)
540 {
541 char *cp = 0;
542 size_t plen = strlen(path);
543 size_t dsprefixlen = sizeof(DSYM_SUFFIX);
544 int fd = -1;
545 int res = 0;
546 int have_outpath = outpath && outpath_len;
547
548 #if !defined(S_ISREG)
549 #define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
550 #endif
551 #if !defined(S_ISDIR)
552 #define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
553 #endif
554
555 if (have_outpath) {
556 if ((2*plen + dsprefixlen +2) >= outpath_len) {
557 *errcode = DW_DLE_PATH_SIZE_TOO_SMALL;
558 return DW_DLV_ERROR;
559 }
560 cp = dw_stpcpy(outpath,path);
561 cp = dw_stpcpy(cp,DSYM_SUFFIX);
562 dw_stpcpy(cp,getbasename(path));
563 fd = open(outpath,O_RDONLY|O_BINARY);
564 if (fd < 0) {
565 *outpath = 0;
566 fd = open(path,O_RDONLY|O_BINARY);
567 dw_stpcpy(outpath,path);
568 }
569 } else {
570 fd = open(path,O_RDONLY|O_BINARY);
571 }
572 if (fd < 0) {
573 if (have_outpath) {
574 *outpath = 0;
575 }
576 return DW_DLV_NO_ENTRY;
577 }
578 res = dwarf_object_detector_fd(fd,
579 ftype,endian,offsetsize,filesize,errcode);
580 if (res != DW_DLV_OK && have_outpath) {
581 *outpath = 0;
582 }
583 close(fd);
584 return res;
585 }
586