xref: /freebsd/contrib/elftoolchain/strings/strings.c (revision 545ddfbe7d4fe8adfb862903b24eac1d5896c1ef)
1 /*-
2  * Copyright (c) 2007 S.Sam Arun Raj
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 #include <sys/stat.h>
29 #include <sys/types.h>
30 
31 #include <ctype.h>
32 #include <err.h>
33 #include <errno.h>
34 #include <fcntl.h>
35 #include <getopt.h>
36 #include <inttypes.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 
43 #include <libelf.h>
44 #include <libelftc.h>
45 #include <gelf.h>
46 
47 #include "_elftc.h"
48 
49 ELFTC_VCSID("$Id: strings.c 3124 2014-12-21 05:46:28Z kaiwang27 $");
50 
51 enum return_code {
52 	RETURN_OK,
53 	RETURN_NOINPUT,
54 	RETURN_SOFTWARE
55 };
56 
57 enum radix_style {
58 	RADIX_DECIMAL,
59 	RADIX_HEX,
60 	RADIX_OCTAL
61 };
62 
63 enum encoding_style {
64 	ENCODING_7BIT,
65 	ENCODING_8BIT,
66 	ENCODING_16BIT_BIG,
67 	ENCODING_16BIT_LITTLE,
68 	ENCODING_32BIT_BIG,
69 	ENCODING_32BIT_LITTLE
70 };
71 
72 #define PRINTABLE(c)						\
73       ((c) >= 0 && (c) <= 255 && 				\
74 	  ((c) == '\t' || isprint((c)) ||			\
75 	      (encoding == ENCODING_8BIT && (c) > 127)))
76 
77 
78 static int encoding_size, entire_file, min_len, show_filename, show_loc;
79 static enum encoding_style encoding;
80 static enum radix_style radix;
81 
82 static struct option strings_longopts[] = {
83 	{ "all",		no_argument,		NULL,	'a'},
84 	{ "bytes",		required_argument,	NULL,	'n'},
85 	{ "encoding",		required_argument,	NULL,	'e'},
86 	{ "help",		no_argument,		NULL,	'h'},
87 	{ "print-file-name",	no_argument,		NULL,	'f'},
88 	{ "radix",		required_argument,	NULL,	't'},
89 	{ "version",		no_argument,		NULL,	'v'},
90 	{ NULL, 0, NULL, 0 }
91 };
92 
93 long	getcharacter(void);
94 int	handle_file(const char *);
95 int	handle_elf(const char *, int);
96 int	handle_binary(const char *, int);
97 int	find_strings(const char *, off_t, off_t);
98 void	show_version(void);
99 void	usage(void);
100 
101 /*
102  * strings(1) extracts text(contiguous printable characters)
103  * from elf and binary files.
104  */
105 int
106 main(int argc, char **argv)
107 {
108 	int ch, rc;
109 
110 	rc = RETURN_OK;
111 	min_len = 0;
112 	encoding_size = 1;
113 	if (elf_version(EV_CURRENT) == EV_NONE)
114 		errx(EXIT_FAILURE, "ELF library initialization failed: %s",
115 		    elf_errmsg(-1));
116 
117 	while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv",
118 	    strings_longopts, NULL)) != -1)
119 		switch((char)ch) {
120 		case 'a':
121 			entire_file = 1;
122 			break;
123 		case 'e':
124 			if (*optarg == 's') {
125 				encoding = ENCODING_7BIT;
126 			} else if (*optarg == 'S') {
127 				encoding = ENCODING_8BIT;
128 			} else if (*optarg == 'b') {
129 				encoding = ENCODING_16BIT_BIG;
130 				encoding_size = 2;
131 			} else if (*optarg == 'B') {
132 				encoding = ENCODING_32BIT_BIG;
133 				encoding_size = 4;
134 			} else if (*optarg == 'l') {
135 				encoding = ENCODING_16BIT_LITTLE;
136 				encoding_size = 2;
137 			} else if (*optarg == 'L') {
138 				encoding = ENCODING_32BIT_LITTLE;
139 				encoding_size = 4;
140 			} else
141 				usage();
142 			        /* NOTREACHED */
143 			break;
144 		case 'f':
145 			show_filename = 1;
146 			break;
147 		case 'n':
148 			min_len = (int)strtoimax(optarg, (char**)NULL, 10);
149 			break;
150 		case 'o':
151 			show_loc = 1;
152 			radix = RADIX_OCTAL;
153 			break;
154 		case 't':
155 			show_loc = 1;
156 			if (*optarg == 'd')
157 				radix = RADIX_DECIMAL;
158 			else if (*optarg == 'o')
159 				radix = RADIX_OCTAL;
160 			else if (*optarg == 'x')
161 				radix = RADIX_HEX;
162 			else
163 				usage();
164 			        /* NOTREACHED */
165 			break;
166 		case 'v':
167 		case 'V':
168 			show_version();
169 			/* NOTREACHED */
170 		case '0':
171 	        case '1':
172 		case '2':
173 		case '3':
174 		case '4':
175 		case '5':
176 		case '6':
177 		case '7':
178 		case '8':
179 		case '9':
180 			min_len *= 10;
181 			min_len += ch - '0';
182 			break;
183 		case 'h':
184 		case '?':
185 		default:
186 			usage();
187 			/* NOTREACHED */
188 		}
189 	argc -= optind;
190 	argv += optind;
191 
192 	if (!min_len)
193 		min_len = 4;
194 	if (!*argv)
195 		rc = handle_file("{standard input}");
196 	else while (*argv) {
197 		rc = handle_file(*argv);
198 		argv++;
199 	}
200 	return (rc);
201 }
202 
203 int
204 handle_file(const char *name)
205 {
206 	int fd, rt;
207 
208 	if (name == NULL)
209 		return (RETURN_NOINPUT);
210 	if (strcmp("{standard input}", name) != 0) {
211 		if (freopen(name, "rb", stdin) == NULL) {
212 			warnx("'%s': %s", name, strerror(errno));
213 			return (RETURN_NOINPUT);
214 		}
215 	} else {
216 		return (find_strings(name, (off_t)0, (off_t)0));
217 	}
218 
219 	fd = fileno(stdin);
220 	if (fd < 0)
221 		return (RETURN_NOINPUT);
222 	rt = handle_elf(name, fd);
223 	return (rt);
224 }
225 
226 /*
227  * Files not understood by handle_elf, will be passed off here and will
228  * treated as a binary file. This would include text file, core dumps ...
229  */
230 int
231 handle_binary(const char *name, int fd)
232 {
233 	struct stat buf;
234 
235 	memset(&buf, 0, sizeof(struct stat));
236 	(void) lseek(fd, (off_t)0, SEEK_SET);
237 	if (!fstat(fd, &buf))
238 		return (find_strings(name, (off_t)0, buf.st_size));
239 	return (RETURN_SOFTWARE);
240 }
241 
242 /*
243  * Will analyse a file to see if it ELF, other files including ar(1),
244  * core dumps are passed off and treated as flat binary files. Unlike
245  * GNU size in FreeBSD this routine will not treat ELF object from
246  * different archs as flat binary files(has to overridden using -a).
247  */
248 int
249 handle_elf(const char *name, int fd)
250 {
251 	GElf_Ehdr elfhdr;
252 	GElf_Shdr shdr;
253 	Elf *elf;
254 	Elf_Scn *scn;
255 	int rc;
256 
257 	rc = RETURN_OK;
258 	/* If entire file is choosen, treat it as a binary file */
259 	if (entire_file)
260 		return (handle_binary(name, fd));
261 
262 	(void) lseek(fd, (off_t)0, SEEK_SET);
263 	elf = elf_begin(fd, ELF_C_READ, NULL);
264 	if (elf_kind(elf) != ELF_K_ELF) {
265 		(void) elf_end(elf);
266 		return (handle_binary(name, fd));
267 	}
268 
269 	if (gelf_getehdr(elf, &elfhdr) == NULL) {
270 		(void) elf_end(elf);
271 		warnx("%s: ELF file could not be processed", name);
272 		return (RETURN_SOFTWARE);
273 	}
274 
275 	if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) {
276 		(void) elf_end(elf);
277 		return (handle_binary(name, fd));
278 	} else {
279 		scn = NULL;
280 		while ((scn = elf_nextscn(elf, scn)) != NULL) {
281 			if (gelf_getshdr(scn, &shdr) == NULL)
282 				continue;
283 			if (shdr.sh_type != SHT_NOBITS &&
284 			    (shdr.sh_flags & SHF_ALLOC) != 0) {
285 				rc = find_strings(name, shdr.sh_offset,
286 				    shdr.sh_size);
287 			}
288 		}
289 	}
290 	(void) elf_end(elf);
291 	return (rc);
292 }
293 
294 /*
295  * Retrieves a character from input stream based on the encoding
296  * type requested.
297  */
298 long
299 getcharacter(void)
300 {
301 	long rt;
302 	int i;
303 	char buf[4], c;
304 
305 	rt = EOF;
306 	for(i = 0; i < encoding_size; i++) {
307 		c = getc(stdin);
308 		if (feof(stdin))
309 			return (EOF);
310 		buf[i] = c;
311 	}
312 
313 	switch(encoding) {
314 	case ENCODING_7BIT:
315 	case ENCODING_8BIT:
316 		rt = buf[0];
317 		break;
318 	case ENCODING_16BIT_BIG:
319 		rt = (buf[0] << 8) | buf[1];
320 		break;
321 	case ENCODING_16BIT_LITTLE:
322 		 rt = buf[0] | (buf[1] << 8);
323 		 break;
324 	case ENCODING_32BIT_BIG:
325 		rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) |
326            	    ((long) buf[2] << 8) | buf[3];
327            	break;
328 	case ENCODING_32BIT_LITTLE:
329 		rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) |
330         	    ((long) buf[3] << 24);
331            	break;
332 	}
333 	return (rt);
334 }
335 
336 /*
337  * Input stream stdin is read until the end of file is reached or until
338  * the section size is reached in case of ELF files. Contiguous
339  * characters of >= min_size(default 4) will be displayed.
340  */
341 int
342 find_strings(const char *name, off_t offset, off_t size)
343 {
344 	off_t cur_off, start_off;
345 	char *obuf;
346 	long c;
347 	int i;
348 
349 	if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) {
350 		(void) fprintf(stderr, "Unable to allocate memory: %s\n",
351 		     strerror(errno));
352 		return (RETURN_SOFTWARE);
353 	}
354 
355 	(void) fseeko(stdin, offset, SEEK_SET);
356 	cur_off = offset;
357 	start_off = 0;
358 	while(1) {
359 		if ((offset + size) && (cur_off >= offset + size))
360 			break;
361 		start_off = cur_off;
362 		memset(obuf, 0, min_len+1);
363 		for(i = 0; i < min_len; i++) {
364 			c = getcharacter();
365 			if (c == EOF && feof(stdin))
366 				goto _exit1;
367 		 	if (PRINTABLE(c)) {
368 		 		obuf[i] = c;
369 		 		obuf[i+1] = 0;
370 		 		cur_off += encoding_size;
371 		 	} else {
372 				if (encoding == ENCODING_8BIT &&
373 				    (uint8_t)c > 127) {
374 			 		obuf[i] = c;
375 			 		obuf[i+1] = 0;
376 			 		cur_off += encoding_size;
377 			 		continue;
378 			 	}
379 	 			cur_off += encoding_size;
380 	 			break;
381 		 	}
382 		}
383 
384 		if (i >= min_len && ((cur_off <= offset + size) ||
385 		    !(offset + size))) {
386 			if (show_filename)
387 				printf ("%s: ", name);
388 			if (show_loc) {
389 				switch(radix) {
390 				case RADIX_DECIMAL:
391 					(void) printf("%7ju ",
392 					    (uintmax_t)start_off);
393 					break;
394 				case RADIX_HEX:
395 					(void) printf("%7jx ",
396 					    (uintmax_t)start_off);
397 					break;
398 				case RADIX_OCTAL:
399 					(void) printf("%7jo ",
400 					    (uintmax_t)start_off);
401 					break;
402 				}
403 			}
404 			printf("%s", obuf);
405 
406 			while(1) {
407 				if ((offset + size) &&
408 				    (cur_off >= offset + size))
409 					break;
410 				c = getcharacter();
411 				cur_off += encoding_size;
412 				if (encoding == ENCODING_8BIT &&
413 				    (uint8_t)c > 127) {
414 			 		putchar(c);
415 			 		continue;
416 			 	}
417 				if (!PRINTABLE(c) || c == EOF)
418 					break;
419 				putchar(c);
420 			}
421 			putchar('\n');
422 		}
423 	}
424 _exit1:
425 	free(obuf);
426 	return (RETURN_OK);
427 }
428 
429 #define	USAGE_MESSAGE	"\
430 Usage: %s [options] [file...]\n\
431   Print contiguous sequences of printable characters.\n\n\
432   Options:\n\
433   -a     | --all               Scan the entire file for strings.\n\
434   -e ENC | --encoding=ENC      Select the character encoding to use.\n\
435   -f     | --print-file-name   Print the file name before each string.\n\
436   -h     | --help              Print a help message and exit.\n\
437   -n N   | --bytes=N | -N      Print sequences with 'N' or more characters.\n\
438   -o                           Print offsets in octal.\n\
439   -t R   | --radix=R           Print offsets using the radix named by 'R'.\n\
440   -v     | --version           Print a version identifier and exit.\n"
441 
442 void
443 usage(void)
444 {
445 	(void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME());
446 	exit(EXIT_FAILURE);
447 }
448 
449 void
450 show_version(void)
451 {
452         (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version());
453         exit(EXIT_SUCCESS);
454 }
455