xref: /illumos-gate/usr/src/lib/libdwarf/common/dwarf_object_detector.c (revision d48be21240dfd051b689384ce2b23479d757f2d8)
1 /* Copyright (c) 2018-2018, David Anderson
2 All rights reserved.
3 
4 Redistribution and use in source and binary forms, with
5 or without modification, are permitted provided that the
6 following conditions are met:
7 
8     Redistributions of source code must retain the above
9     copyright notice, this list of conditions and the following
10     disclaimer.
11 
12     Redistributions in binary form must reproduce the above
13     copyright notice, this list of conditions and the following
14     disclaimer in the documentation and/or other materials
15     provided with the distribution.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
18 CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
19 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
22 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
28 OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29 EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31 
32 #include "config.h"
33 #include <stdio.h>
34 #include <sys/types.h> /* open() */
35 #include <sys/stat.h> /* open() */
36 #include <fcntl.h> /* O_RDONLY */
37 #ifdef HAVE_UNISTD_H
38 #include <unistd.h> /* lseek read close */
39 #elif defined(_WIN32) && defined(_MSC_VER)
40 #include <io.h>
41 #include <basetsd.h>
42 typedef SSIZE_T ssize_t; /* MSVC does not have POSIX ssize_t */
43 #endif /* HAVE_UNISTD_H */
44 #ifdef HAVE_STRING_H
45 #include <string.h> /* memcpy, strcpy */
46 #endif /* HAVE_STRING_H */
47 
48 /* Windows specific header files */
49 #if defined(_WIN32) && defined(HAVE_STDAFX_H)
50 #include "stdafx.h"
51 #endif /* HAVE_STDAFX_H */
52 
53 #include "libdwarf.h"
54 #include "memcpy_swap.h"
55 #include "dwarf_object_read_common.h"
56 #include "dwarf_object_detector.h"
57 
58 #ifndef O_BINARY
59 #define O_BINARY 0
60 #endif /* O_BINARY */
61 
62 /* This is the main() program for the object_detector executable. */
63 
64 #ifndef TRUE
65 #define TRUE 1
66 #define FALSE 0
67 #endif /* TRUE */
68 
69 #ifndef O_RDONLY
70 #define O_RDONLY 0
71 #endif
72 
73 /*  TYP, SIZEOFT32 and ASNAR
74     mean we can use correctly-sized arrays of char for the
75     struct members instead of determing a proper integer
76     that size.
77 
78     We are dealing with carefully constructed structs
79     that do not have any alignment-forced (hidden)
80     unused bytes so reading lengths from the real structs
81     works for each variable.  */
82 
83 #define TYP(n,l) char n[l]
84 #define SIZEOFT32 4
85 
86 
87 #define DW_DLV_NO_ENTRY -1
88 #define DW_DLV_OK        0
89 #define DW_DLV_ERROR     1
90 
91 #ifndef EI_NIDENT
92 #define EI_NIDENT 16
93 #define EI_CLASS  4
94 #define EI_DATA   5
95 #define EI_VERSION 6
96 #define ELFCLASS32 1
97 #define ELFCLASS64 2
98 #define ELFDATA2LSB 1
99 #define ELFDATA2MSB 2
100 #endif /* EI_NIDENT */
101 
102 #define DSYM_SUFFIX ".dSYM/Contents/Resources/DWARF/"
103 #define PATHSIZE 2000
104 
105 #ifndef  MH_MAGIC
106 /* mach-o 32bit */
107 #define MH_MAGIC        0xfeedface
108 #define MH_CIGAM        0xcefaedfe
109 #endif /*  MH_MAGIC */
110 #ifndef  MH_MAGIC_64
111 /* mach-o 64bit */
112 #define MH_MAGIC_64 0xfeedfacf
113 #define MH_CIGAM_64 0xcffaedfe
114 #endif /*  MH_MAGIC_64 */
115 
116 static unsigned long
117 magic_copy(unsigned char *d, unsigned len)
118 {
119     unsigned i = 0;
120     unsigned long v = 0;
121 
122     v = d[0];
123     for(i = 1 ; i < len; ++i) {
124         v <<= 8;
125         v |=  d[i];
126     }
127     return v;
128 }
129 
130 
131 #ifdef WORDS_BIGENDIAN
132 #define ASNAR(func,t,s)                         \
133     do {                                        \
134         unsigned tbyte = sizeof(t) - sizeof(s); \
135         t = 0;                                  \
136         func(((char *)&t)+tbyte ,&s[0],sizeof(s));  \
137     } while (0)
138 #else /* LITTLE ENDIAN */
139 #define ASNAR(func,t,s)                         \
140     do {                                        \
141         t = 0;                                  \
142         func(&t,&s[0],sizeof(s));               \
143     } while (0)
144 #endif /* end LITTLE- BIG-ENDIAN */
145 
146 
147 #define EI_NIDENT 16
148 /* An incomplete elf header, good for 32 and 64bit elf */
149 struct elf_header {
150     unsigned char  e_ident[EI_NIDENT];
151     TYP(e_type,2);
152     TYP(e_machine,2);
153     TYP(e_version,4);
154 #ifdef HAVE_CUSTOM_LIBELF
155     /* In the case of custom ELF, use extra space */
156     TYP(e_custom,64);
157 #endif /* HAVE_CUSTOM_LIBELF */
158 };
159 
160 /*  Windows. Certain PE objects.
161     The following references may be of interest.
162 https://msdn.microsoft.com/library/windows/desktop/ms680547(v=vs.85).aspx       #PE format overview and various machine magic numbers
163 
164 https://msdn.microsoft.com/en-us/library/ms809762.aspx  # describes some details of PE headers, basically an overview
165 
166 https://msdn.microsoft.com/en-us/library/windows/desktop/aa383751(v=vs.85).aspx #defines sizes of various types
167 
168 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680313(v=vs.85).aspx #defines IMAGE_FILE_HEADER and Machine fields (32/64)
169 
170 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680305(v=vs.85).aspx #defines IMAGE_DATA_DIRECTORY
171 
172 https://msdn.microsoft.com/en-us/library/windows/desktop/ms680339(v=vs.85).aspx #Defines IMAGE_OPTIONAL_HEADER and some magic numbers
173 
174 https://msdn.microsoft.com/fr-fr/library/windows/desktop/ms680336(v=vs.85).aspx # defines _IMAGE_NT_HEADERS 32 64
175 
176 https://msdn.microsoft.com/en-us/library/windows/desktop/ms680341(v=vs.85).aspx # defines _IMAGE_SECTION_HEADER
177 
178 */
179 
180 /* ===== START pe structures */
181 
182 struct dos_header {
183     TYP(dh_mz,2);
184     TYP(dh_dos_data,58);
185     TYP(dh_image_offset,4);
186 };
187 
188 #define IMAGE_DOS_SIGNATURE_dw      0x5A4D
189 #define IMAGE_DOS_REVSIGNATURE_dw   0x4D5A
190 #define IMAGE_NT_SIGNATURE_dw       0x00004550
191 #define IMAGE_FILE_MACHINE_I386_dw  0x14c
192 #define IMAGE_FILE_MACHINE_IA64_dw  0x200
193 #define IMAGE_FILE_MACHINE_AMD64_dw 0x8664
194 
195 
196 struct pe_image_file_header {
197     TYP(im_machine,2);
198     TYP(im_sectioncount,2);
199     TYP(im_ignoring,(3*4));
200     TYP(im_opt_header_size,2);
201     TYP(im_ignoringb,2);
202 };
203 
204 /* ===== END pe structures */
205 
206 
207 /*  For following MacOS file naming convention */
208 static const char *
209 getseparator (const char *f)
210 {
211     const char *p = 0;
212     const char *q = 0;
213     char c = 0;;
214 
215     p = NULL;
216     q = f;
217     do  {
218         c = *q++;
219         if (c == '\\' || c == '/' || c == ':') {
220             p = q;
221         }
222     } while (c);
223     return p;
224 }
225 
226 static const char *
227 getbasename (const char *f)
228 {
229     const char *pseparator = getseparator (f);
230     if (!pseparator) {
231         return f;
232     }
233     return pseparator;
234 }
235 
236 /*  Not a standard function, though part of GNU libc
237     since 2008 (I have never examined the GNU version).  */
238 static char *
239 dw_stpcpy(char *dest,const char *src)
240 {
241     const char *cp = src;
242     char *dp = dest;
243 
244     for ( ; *cp; ++cp,++dp) {
245         *dp = *cp;
246     }
247     *dp = 0;
248     return dp;
249 }
250 
251 
252 
253 /* This started like Elf, so check initial fields. */
254 static int
255 fill_in_elf_fields(struct elf_header *h,
256     unsigned *endian,
257     /*  Size of the object file offsets, not DWARF offset
258         size. */
259     unsigned *objoffsetsize,
260     int *errcode)
261 {
262     unsigned locendian = 0;
263     unsigned locoffsetsize = 0;
264 
265     switch(h->e_ident[EI_CLASS]) {
266     case ELFCLASS32:
267         locoffsetsize = 32;
268         break;
269     case ELFCLASS64:
270         locoffsetsize = 64;
271         break;
272     default:
273         *errcode = DW_DLE_ELF_CLASS_BAD;
274         return DW_DLV_ERROR;
275     }
276     switch(h->e_ident[EI_DATA]) {
277     case ELFDATA2LSB:
278         locendian = DW_ENDIAN_LITTLE;
279         break;
280     case ELFDATA2MSB:
281         locendian = DW_ENDIAN_BIG;
282         break;
283     default:
284         *errcode = DW_DLE_ELF_ENDIAN_BAD;
285         return DW_DLV_ERROR;
286     }
287     if (h->e_ident[EI_VERSION] != 1 /* EV_CURRENT */) {
288         *errcode = DW_DLE_ELF_VERSION_BAD;
289         return DW_DLV_ERROR;
290     }
291     *endian = locendian;
292     *objoffsetsize = locoffsetsize;
293     return DW_DLV_OK;
294 }
295 static char archive_magic[8] = {
296 '!','<','a','r','c','h','>',0x0a
297 };
298 static int
299 is_archive_magic(struct elf_header *h) {
300     int i = 0;
301     int len = sizeof(archive_magic);
302     const char *cp = (const char *)h;
303     for( ; i < len; ++i) {
304         if (cp[i] != archive_magic[i]) {
305             return FALSE;
306         }
307     }
308     return TRUE;
309 }
310 
311 /*  A bit unusual in that it always sets *is_pe_flag
312     Return of DW_DLV_OK  it is a PE file we recognize. */
313 static int
314 is_pe_object(int fd,
315     unsigned long filesize,
316     unsigned *endian,
317     unsigned *offsetsize,
318     int *errcode)
319 {
320     unsigned dos_sig = 0;
321     unsigned locendian = 0;
322     void (*word_swap) (void *, const void *, unsigned long);
323     unsigned long nt_address = 0;
324     struct dos_header dhinmem;
325     char nt_sig_array[4];
326     unsigned long nt_sig = 0;
327     struct pe_image_file_header ifh;
328     int res = 0;
329 
330     if (filesize < (sizeof (struct dos_header) +
331         SIZEOFT32 + sizeof(struct pe_image_file_header))) {
332         *errcode = DW_DLE_FILE_TOO_SMALL;
333         return DW_DLV_ERROR;
334     }
335     res = _dwarf_object_read_random(fd,(char *)&dhinmem,
336         0,sizeof(dhinmem),filesize,errcode);
337     if (res != DW_DLV_OK) {
338         return res;
339     }
340     /* No swap here, want it as in the file */
341     dos_sig = magic_copy((unsigned char *)dhinmem.dh_mz,
342         sizeof(dhinmem.dh_mz));
343     if (dos_sig == IMAGE_DOS_SIGNATURE_dw) {
344         /*  IMAGE_DOS_SIGNATURE_dw assumes bytes reversed by little-endian
345             load, so we intrepet a match the other way. */
346         /* BIG ENDIAN. From looking at hex characters in object  */
347 #ifdef  WORDS_BIGENDIAN
348         word_swap = _dwarf_memcpy_noswap_bytes;
349 #else   /* LITTLE ENDIAN */
350         word_swap =  _dwarf_memcpy_swap_bytes;
351 #endif  /* LITTLE- BIG-ENDIAN */
352         locendian = DW_ENDIAN_BIG;
353     } else if (dos_sig == IMAGE_DOS_REVSIGNATURE_dw) {
354         /* raw load, so  intrepet a match the other way. */
355         /* LITTLE ENDIAN */
356 #ifdef  WORDS_BIGENDIAN
357         word_swap =  _dwarf_memcpy_swap_bytes;
358 #else   /* LITTLE ENDIAN */
359         word_swap = _dwarf_memcpy_noswap_bytes;
360 #endif  /* LITTLE- BIG-ENDIAN */
361         locendian = DW_ENDIAN_LITTLE;
362     } else {
363         /* Not dos header not a PE file we recognize */
364         *errcode = DW_DLE_FILE_WRONG_TYPE;
365         return DW_DLV_ERROR;
366     }
367     ASNAR(word_swap,nt_address, dhinmem.dh_image_offset);
368     if (filesize < nt_address) {
369         /* Not dos header not a PE file we recognize */
370         *errcode = DW_DLE_FILE_TOO_SMALL;
371         return DW_DLV_ERROR;
372     }
373     if (filesize < (nt_address + SIZEOFT32 +
374         sizeof(struct pe_image_file_header))) {
375         *errcode = DW_DLE_FILE_TOO_SMALL;
376         /* Not dos header not a PE file we recognize */
377         return DW_DLV_ERROR;
378     }
379     res =  _dwarf_object_read_random(fd,(char *)&nt_sig_array[0],
380         nt_address, sizeof(nt_sig_array),filesize,errcode);
381     if (res != DW_DLV_OK) {
382         return res;
383     }
384     {   unsigned long lsig = 0;
385 
386         ASNAR(word_swap,lsig,nt_sig_array);
387         nt_sig = lsig;
388     }
389     if (nt_sig != IMAGE_NT_SIGNATURE_dw) {
390         *errcode = DW_DLE_FILE_WRONG_TYPE;
391         return DW_DLV_ERROR;
392     }
393     res = _dwarf_object_read_random(fd,(char *)&ifh,
394         nt_address + SIZEOFT32,
395         sizeof(struct pe_image_file_header),
396         filesize,
397         errcode);
398     if (res != DW_DLV_OK) {
399         return res;
400     }
401     {
402         unsigned long machine = 0;
403 
404         ASNAR(word_swap,machine,ifh.im_machine);
405         switch(machine) {
406         case IMAGE_FILE_MACHINE_I386_dw:
407             *offsetsize = 32;
408             *endian = locendian;
409             return DW_DLV_OK;
410         case IMAGE_FILE_MACHINE_IA64_dw:
411         case IMAGE_FILE_MACHINE_AMD64_dw:
412             *offsetsize = 64;
413             *endian = locendian;
414             return DW_DLV_OK;
415         }
416     }
417     *errcode = DW_DLE_IMAGE_FILE_UNKNOWN_TYPE;
418     return DW_DLV_ERROR;
419 }
420 
421 static int
422 is_mach_o_magic(struct elf_header *h,
423     unsigned *endian,
424     unsigned *offsetsize)
425 {
426     unsigned long magicval = 0;
427     unsigned locendian = 0;
428     unsigned locoffsetsize = 0;
429 
430     /*  No swapping here. Need to match size of
431         Mach-o magic field. */
432     magicval = magic_copy(h->e_ident,4);
433     if (magicval == MH_MAGIC) {
434         locendian = DW_ENDIAN_BIG;
435         locoffsetsize = 32;
436     } else if (magicval == MH_CIGAM) {
437         locendian = DW_ENDIAN_LITTLE;
438         locoffsetsize = 32;
439     }else if (magicval == MH_MAGIC_64) {
440         locendian = DW_ENDIAN_BIG;
441         locoffsetsize = 64;
442     } else if (magicval == MH_CIGAM_64) {
443         locendian = DW_ENDIAN_LITTLE;
444         locoffsetsize = 64;
445     } else {
446         return FALSE;
447     }
448     *endian = locendian;
449     *offsetsize = locoffsetsize;
450     return TRUE;
451 }
452 
453 int
454 dwarf_object_detector_fd(int fd,
455     unsigned *ftype,
456     unsigned *endian,
457     unsigned *offsetsize,
458     Dwarf_Unsigned  *filesize,
459     int *errcode)
460 {
461     struct elf_header h;
462     size_t readlen = sizeof(h);
463     int res = 0;
464     off_t fsize = 0;
465     off_t lsval = 0;
466     ssize_t readval = 0;
467 
468     fsize = lseek(fd,0L,SEEK_END);
469     if(fsize < 0) {
470         *errcode = DW_DLE_SEEK_ERROR;
471         return DW_DLV_ERROR;
472     }
473     if (fsize <= (off_t)readlen) {
474         /* Not a real object file */
475         *errcode = DW_DLE_FILE_TOO_SMALL;
476         return DW_DLV_ERROR;
477     }
478     lsval  = lseek(fd,0L,SEEK_SET);
479     if(lsval < 0) {
480         *errcode = DW_DLE_SEEK_ERROR;
481         return DW_DLV_ERROR;
482     }
483     readval = read(fd,&h,readlen);
484     if (readval != (ssize_t)readlen) {
485         *errcode = DW_DLE_READ_ERROR;
486         return DW_DLV_ERROR;
487     }
488     if (h.e_ident[0] == 0x7f &&
489         h.e_ident[1] == 'E' &&
490         h.e_ident[2] == 'L' &&
491         h.e_ident[3] == 'F') {
492         /* is ELF */
493 
494         res = fill_in_elf_fields(&h,endian,offsetsize,errcode);
495         if (res != DW_DLV_OK) {
496             return res;
497         }
498         *ftype = DW_FTYPE_ELF;
499         *filesize = (size_t)fsize;
500         return DW_DLV_OK;
501     }
502     if (is_mach_o_magic(&h,endian,offsetsize)) {
503         *ftype = DW_FTYPE_MACH_O;
504         *filesize = (size_t)fsize;
505         return DW_DLV_OK;
506     }
507     if (is_archive_magic(&h)) {
508         *ftype = DW_FTYPE_ARCHIVE;
509         *filesize = (size_t)fsize;
510         return DW_DLV_OK;
511     }
512     res = is_pe_object(fd,fsize,endian,offsetsize,errcode);
513     if (res == DW_DLV_OK ) {
514         *ftype = DW_FTYPE_PE;
515         *filesize = (size_t)fsize;
516         return DW_DLV_OK;
517     }
518     /* Check for custom ELF format. */
519 #ifdef HAVE_CUSTOM_LIBELF
520     res = elf_is_custom_format(&h,readlen,&fsize,endian,offsetsize,errcode);
521     if (res == DW_DLV_OK) {
522         *ftype = DW_FTYPE_CUSTOM_ELF;
523         *filesize = (size_t)fsize;
524         return res;
525     }
526 #endif /* HAVE_CUSTOM_LIBELF */
527 
528     /* Unknown object format. */
529     return DW_DLV_NO_ENTRY;
530 }
531 
532 int
533 dwarf_object_detector_path(const char  *path,
534     char *outpath,unsigned long outpath_len,
535     unsigned *ftype,
536     unsigned *endian,
537     unsigned *offsetsize,
538     Dwarf_Unsigned  *filesize,
539     int *errcode)
540 {
541     char *cp = 0;
542     size_t plen = strlen(path);
543     size_t dsprefixlen = sizeof(DSYM_SUFFIX);
544     int fd = -1;
545     int res = 0;
546     int have_outpath = outpath && outpath_len;
547 
548 #if !defined(S_ISREG)
549 #define S_ISREG(mode) (((mode) & S_IFMT) == S_IFREG)
550 #endif
551 #if !defined(S_ISDIR)
552 #define S_ISDIR(mode) (((mode) & S_IFMT) == S_IFDIR)
553 #endif
554 
555     if (have_outpath) {
556         if ((2*plen + dsprefixlen +2) >= outpath_len) {
557             *errcode =  DW_DLE_PATH_SIZE_TOO_SMALL;
558             return DW_DLV_ERROR;
559         }
560         cp = dw_stpcpy(outpath,path);
561         cp = dw_stpcpy(cp,DSYM_SUFFIX);
562         dw_stpcpy(cp,getbasename(path));
563         fd = open(outpath,O_RDONLY|O_BINARY);
564         if (fd < 0) {
565             *outpath = 0;
566             fd = open(path,O_RDONLY|O_BINARY);
567             dw_stpcpy(outpath,path);
568         }
569     } else {
570         fd = open(path,O_RDONLY|O_BINARY);
571     }
572     if (fd < 0) {
573         if (have_outpath) {
574             *outpath = 0;
575         }
576         return DW_DLV_NO_ENTRY;
577     }
578     res = dwarf_object_detector_fd(fd,
579         ftype,endian,offsetsize,filesize,errcode);
580     if (res != DW_DLV_OK && have_outpath) {
581         *outpath = 0;
582     }
583     close(fd);
584     return res;
585 }
586