xref: /freebsd/contrib/elftoolchain/strings/strings.c (revision 168fce73b59d6023cab45d063a452551a1f2103e)
1 /*-
2  * Copyright (c) 2007 S.Sam Arun Raj
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/stat.h>
28 #include <sys/types.h>
29 
30 #include <ctype.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <getopt.h>
35 #include <inttypes.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <sysexits.h>
41 #include <unistd.h>
42 
43 #include <libelf.h>
44 #include <libelftc.h>
45 #include <gelf.h>
46 
47 #include "_elftc.h"
48 
49 ELFTC_VCSID("$Id: strings.c 3446 2016-05-03 01:31:17Z emaste $");
50 
51 enum radix_style {
52 	RADIX_DECIMAL,
53 	RADIX_HEX,
54 	RADIX_OCTAL
55 };
56 
57 enum encoding_style {
58 	ENCODING_7BIT,
59 	ENCODING_8BIT,
60 	ENCODING_16BIT_BIG,
61 	ENCODING_16BIT_LITTLE,
62 	ENCODING_32BIT_BIG,
63 	ENCODING_32BIT_LITTLE
64 };
65 
66 #define PRINTABLE(c)						\
67       ((c) >= 0 && (c) <= 255 && 				\
68 	  ((c) == '\t' || isprint((c)) ||			\
69 	      (encoding == ENCODING_8BIT && (c) > 127)))
70 
71 static int encoding_size, entire_file, show_filename, show_loc;
72 static enum encoding_style encoding;
73 static enum radix_style radix;
74 static intmax_t min_len;
75 
76 static struct option strings_longopts[] = {
77 	{ "all",		no_argument,		NULL,	'a'},
78 	{ "bytes",		required_argument,	NULL,	'n'},
79 	{ "encoding",		required_argument,	NULL,	'e'},
80 	{ "help",		no_argument,		NULL,	'h'},
81 	{ "print-file-name",	no_argument,		NULL,	'f'},
82 	{ "radix",		required_argument,	NULL,	't'},
83 	{ "version",		no_argument,		NULL,	'v'},
84 	{ NULL, 0, NULL, 0 }
85 };
86 
87 long	getcharacter(void);
88 int	handle_file(const char *);
89 int	handle_elf(const char *, int);
90 int	handle_binary(const char *, int);
91 int	find_strings(const char *, off_t, off_t);
92 void	show_version(void);
93 void	usage(void);
94 
95 /*
96  * strings(1) extracts text(contiguous printable characters)
97  * from elf and binary files.
98  */
99 int
100 main(int argc, char **argv)
101 {
102 	int ch, rc;
103 
104 	rc = 0;
105 	min_len = 0;
106 	encoding_size = 1;
107 	if (elf_version(EV_CURRENT) == EV_NONE)
108 		errx(EXIT_FAILURE, "ELF library initialization failed: %s",
109 		    elf_errmsg(-1));
110 
111 	while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
112 	    strings_longopts, NULL)) != -1)
113 		switch((char)ch) {
114 		case 'a':
115 			entire_file = 1;
116 			break;
117 		case 'e':
118 			if (*optarg == 's') {
119 				encoding = ENCODING_7BIT;
120 			} else if (*optarg == 'S') {
121 				encoding = ENCODING_8BIT;
122 			} else if (*optarg == 'b') {
123 				encoding = ENCODING_16BIT_BIG;
124 				encoding_size = 2;
125 			} else if (*optarg == 'B') {
126 				encoding = ENCODING_32BIT_BIG;
127 				encoding_size = 4;
128 			} else if (*optarg == 'l') {
129 				encoding = ENCODING_16BIT_LITTLE;
130 				encoding_size = 2;
131 			} else if (*optarg == 'L') {
132 				encoding = ENCODING_32BIT_LITTLE;
133 				encoding_size = 4;
134 			} else
135 				usage();
136 			        /* NOTREACHED */
137 			break;
138 		case 'f':
139 			show_filename = 1;
140 			break;
141 		case 'n':
142 			min_len = strtoimax(optarg, (char**)NULL, 10);
143 			if (min_len <= 0)
144 				errx(EX_USAGE, "option -n should specify a "
145 				    "positive decimal integer.");
146 			break;
147 		case 'o':
148 			show_loc = 1;
149 			radix = RADIX_OCTAL;
150 			break;
151 		case 't':
152 			show_loc = 1;
153 			if (*optarg == 'd')
154 				radix = RADIX_DECIMAL;
155 			else if (*optarg == 'o')
156 				radix = RADIX_OCTAL;
157 			else if (*optarg == 'x')
158 				radix = RADIX_HEX;
159 			else
160 				usage();
161 			        /* NOTREACHED */
162 			break;
163 		case 'v':
164 		case 'V':
165 			show_version();
166 			/* NOTREACHED */
167 		case '0':
168 	        case '1':
169 		case '2':
170 		case '3':
171 		case '4':
172 		case '5':
173 		case '6':
174 		case '7':
175 		case '8':
176 		case '9':
177 			min_len *= 10;
178 			min_len += ch - '0';
179 			break;
180 		case 'h':
181 		case '?':
182 		default:
183 			usage();
184 			/* NOTREACHED */
185 		}
186 	argc -= optind;
187 	argv += optind;
188 
189 	if (!min_len)
190 		min_len = 4;
191 	if (!*argv)
192 		rc = handle_file("{standard input}");
193 	else while (*argv) {
194 		if (handle_file(*argv) != 0)
195 			rc = 1;
196 		argv++;
197 	}
198 	return (rc);
199 }
200 
201 int
202 handle_file(const char *name)
203 {
204 	int fd, rt;
205 
206 	if (name == NULL)
207 		return (1);
208 	if (strcmp("{standard input}", name) != 0) {
209 		if (freopen(name, "rb", stdin) == NULL) {
210 			warnx("'%s': %s", name, strerror(errno));
211 			return (1);
212 		}
213 	} else {
214 		return (find_strings(name, (off_t)0, (off_t)0));
215 	}
216 
217 	fd = fileno(stdin);
218 	if (fd < 0)
219 		return (1);
220 	rt = handle_elf(name, fd);
221 	return (rt);
222 }
223 
224 /*
225  * Files not understood by handle_elf, will be passed off here and will
226  * treated as a binary file. This would include text file, core dumps ...
227  */
228 int
229 handle_binary(const char *name, int fd)
230 {
231 	struct stat buf;
232 
233 	memset(&buf, 0, sizeof(struct stat));
234 	(void) lseek(fd, (off_t)0, SEEK_SET);
235 	if (!fstat(fd, &buf))
236 		return (find_strings(name, (off_t)0, buf.st_size));
237 	return (1);
238 }
239 
240 /*
241  * Will analyse a file to see if it ELF, other files including ar(1),
242  * core dumps are passed off and treated as flat binary files. Unlike
243  * GNU size in FreeBSD this routine will not treat ELF object from
244  * different archs as flat binary files(has to overridden using -a).
245  */
246 int
247 handle_elf(const char *name, int fd)
248 {
249 	GElf_Ehdr elfhdr;
250 	GElf_Shdr shdr;
251 	Elf *elf;
252 	Elf_Scn *scn;
253 	int rc;
254 
255 	rc = 0;
256 	/* If entire file is chosen, treat it as a binary file */
257 	if (entire_file)
258 		return (handle_binary(name, fd));
259 
260 	(void) lseek(fd, (off_t)0, SEEK_SET);
261 	elf = elf_begin(fd, ELF_C_READ, NULL);
262 	if (elf_kind(elf) != ELF_K_ELF) {
263 		(void) elf_end(elf);
264 		return (handle_binary(name, fd));
265 	}
266 
267 	if (gelf_getehdr(elf, &elfhdr) == NULL) {
268 		(void) elf_end(elf);
269 		warnx("%s: ELF file could not be processed", name);
270 		return (1);
271 	}
272 
273 	if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
274 		(void) elf_end(elf);
275 		return (handle_binary(name, fd));
276 	} else {
277 		scn = NULL;
278 		while ((scn = elf_nextscn(elf, scn)) != NULL) {
279 			if (gelf_getshdr(scn, &shdr) == NULL)
280 				continue;
281 			if (shdr.sh_type != SHT_NOBITS &&
282 			    (shdr.sh_flags & SHF_ALLOC) != 0) {
283 				rc = find_strings(name, shdr.sh_offset,
284 				    shdr.sh_size);
285 			}
286 		}
287 	}
288 	(void) elf_end(elf);
289 	return (rc);
290 }
291 
292 /*
293  * Retrieves a character from input stream based on the encoding
294  * type requested.
295  */
296 long
297 getcharacter(void)
298 {
299 	long rt;
300 	int i;
301 	char buf[4], c;
302 
303 	rt = EOF;
304 	for(i = 0; i < encoding_size; i++) {
305 		c = getc(stdin);
306 		if (feof(stdin))
307 			return (EOF);
308 		buf[i] = c;
309 	}
310 
311 	switch(encoding) {
312 	case ENCODING_7BIT:
313 	case ENCODING_8BIT:
314 		rt = buf[0];
315 		break;
316 	case ENCODING_16BIT_BIG:
317 		rt = (buf[0] << 8) | buf[1];
318 		break;
319 	case ENCODING_16BIT_LITTLE:
320 		 rt = buf[0] | (buf[1] << 8);
321 		 break;
322 	case ENCODING_32BIT_BIG:
323 		rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
324            	    ((long) buf[2] << 8) | buf[3];
325            	break;
326 	case ENCODING_32BIT_LITTLE:
327 		rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
328         	    ((long) buf[3] << 24);
329            	break;
330 	}
331 	return (rt);
332 }
333 
334 /*
335  * Input stream stdin is read until the end of file is reached or until
336  * the section size is reached in case of ELF files. Contiguous
337  * characters of >= min_size(default 4) will be displayed.
338  */
339 int
340 find_strings(const char *name, off_t offset, off_t size)
341 {
342 	off_t cur_off, start_off;
343 	char *obuf;
344 	long c;
345 	int i;
346 
347 	if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
348 		(void) fprintf(stderr, "Unable to allocate memory: %s\n",
349 		     strerror(errno));
350 		return (1);
351 	}
352 
353 	(void) fseeko(stdin, offset, SEEK_SET);
354 	cur_off = offset;
355 	start_off = 0;
356 	while(1) {
357 		if ((offset + size) && (cur_off >= offset + size))
358 			break;
359 		start_off = cur_off;
360 		memset(obuf, 0, min_len+1);
361 		for(i = 0; i < min_len; i++) {
362 			c = getcharacter();
363 			if (c == EOF && feof(stdin))
364 				goto _exit1;
365 		 	if (PRINTABLE(c)) {
366 		 		obuf[i] = c;
367 		 		obuf[i+1] = 0;
368 		 		cur_off += encoding_size;
369 		 	} else {
370 				if (encoding == ENCODING_8BIT &&
371 				    (uint8_t)c > 127) {
372 			 		obuf[i] = c;
373 			 		obuf[i+1] = 0;
374 			 		cur_off += encoding_size;
375 			 		continue;
376 			 	}
377 	 			cur_off += encoding_size;
378 	 			break;
379 		 	}
380 		}
381 
382 		if (i >= min_len && ((cur_off <= offset + size) ||
383 		    !(offset + size))) {
384 			if (show_filename)
385 				printf ("%s: ", name);
386 			if (show_loc) {
387 				switch(radix) {
388 				case RADIX_DECIMAL:
389 					(void) printf("%7ju ",
390 					    (uintmax_t)start_off);
391 					break;
392 				case RADIX_HEX:
393 					(void) printf("%7jx ",
394 					    (uintmax_t)start_off);
395 					break;
396 				case RADIX_OCTAL:
397 					(void) printf("%7jo ",
398 					    (uintmax_t)start_off);
399 					break;
400 				}
401 			}
402 			printf("%s", obuf);
403 
404 			while(1) {
405 				if ((offset + size) &&
406 				    (cur_off >= offset + size))
407 					break;
408 				c = getcharacter();
409 				cur_off += encoding_size;
410 				if (encoding == ENCODING_8BIT &&
411 				    (uint8_t)c > 127) {
412 			 		putchar(c);
413 			 		continue;
414 			 	}
415 				if (!PRINTABLE(c) || c == EOF)
416 					break;
417 				putchar(c);
418 			}
419 			putchar('\n');
420 		}
421 	}
422 _exit1:
423 	free(obuf);
424 	return (0);
425 }
426 
427 #define	USAGE_MESSAGE	"\
428 Usage: %s [options] [file...]\n\
429   Print contiguous sequences of printable characters.\n\n\
430   Options:\n\
431   -a     | --all               Scan the entire file for strings.\n\
432   -e ENC | --encoding=ENC      Select the character encoding to use.\n\
433   -f     | --print-file-name   Print the file name before each string.\n\
434   -h     | --help              Print a help message and exit.\n\
435   -n N   | --bytes=N | -N      Print sequences with 'N' or more characters.\n\
436   -o                           Print offsets in octal.\n\
437   -t R   | --radix=R           Print offsets using the radix named by 'R'.\n\
438   -v     | --version           Print a version identifier and exit.\n"
439 
440 void
441 usage(void)
442 {
443 	(void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
444 	exit(EXIT_FAILURE);
445 }
446 
447 void
448 show_version(void)
449 {
450         (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
451         exit(EXIT_SUCCESS);
452 }
453