1 /*- 2 * Copyright (c) 2007 S.Sam Arun Raj 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 #include <sys/stat.h> 29 #include <sys/types.h> 30 31 #include <ctype.h> 32 #include <err.h> 33 #include <errno.h> 34 #include <fcntl.h> 35 #include <getopt.h> 36 #include <inttypes.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 43 #include <libelf.h> 44 #include <libelftc.h> 45 #include <gelf.h> 46 47 #include "_elftc.h" 48 49 ELFTC_VCSID("$Id: strings.c 3124 2014-12-21 05:46:28Z kaiwang27 $"); 50 51 enum return_code { 52 RETURN_OK, 53 RETURN_NOINPUT, 54 RETURN_SOFTWARE 55 }; 56 57 enum radix_style { 58 RADIX_DECIMAL, 59 RADIX_HEX, 60 RADIX_OCTAL 61 }; 62 63 enum encoding_style { 64 ENCODING_7BIT, 65 ENCODING_8BIT, 66 ENCODING_16BIT_BIG, 67 ENCODING_16BIT_LITTLE, 68 ENCODING_32BIT_BIG, 69 ENCODING_32BIT_LITTLE 70 }; 71 72 #define PRINTABLE(c) \ 73 ((c) >= 0 && (c) <= 255 && \ 74 ((c) == '\t' || isprint((c)) || \ 75 (encoding == ENCODING_8BIT && (c) > 127))) 76 77 78 static int encoding_size, entire_file, min_len, show_filename, show_loc; 79 static enum encoding_style encoding; 80 static enum radix_style radix; 81 82 static struct option strings_longopts[] = { 83 { "all", no_argument, NULL, 'a'}, 84 { "bytes", required_argument, NULL, 'n'}, 85 { "encoding", required_argument, NULL, 'e'}, 86 { "help", no_argument, NULL, 'h'}, 87 { "print-file-name", no_argument, NULL, 'f'}, 88 { "radix", required_argument, NULL, 't'}, 89 { "version", no_argument, NULL, 'v'}, 90 { NULL, 0, NULL, 0 } 91 }; 92 93 long getcharacter(void); 94 int handle_file(const char *); 95 int handle_elf(const char *, int); 96 int handle_binary(const char *, int); 97 int find_strings(const char *, off_t, off_t); 98 void show_version(void); 99 void usage(void); 100 101 /* 102 * strings(1) extracts text(contiguous printable characters) 103 * from elf and binary files. 104 */ 105 int 106 main(int argc, char **argv) 107 { 108 int ch, rc; 109 110 rc = RETURN_OK; 111 min_len = 0; 112 encoding_size = 1; 113 if (elf_version(EV_CURRENT) == EV_NONE) 114 errx(EXIT_FAILURE, "ELF library initialization failed: %s", 115 elf_errmsg(-1)); 116 117 while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv", 118 strings_longopts, NULL)) != -1) 119 switch((char)ch) { 120 case 'a': 121 entire_file = 1; 122 break; 123 case 'e': 124 if (*optarg == 's') { 125 encoding = ENCODING_7BIT; 126 } else if (*optarg == 'S') { 127 encoding = ENCODING_8BIT; 128 } else if (*optarg == 'b') { 129 encoding = ENCODING_16BIT_BIG; 130 encoding_size = 2; 131 } else if (*optarg == 'B') { 132 encoding = ENCODING_32BIT_BIG; 133 encoding_size = 4; 134 } else if (*optarg == 'l') { 135 encoding = ENCODING_16BIT_LITTLE; 136 encoding_size = 2; 137 } else if (*optarg == 'L') { 138 encoding = ENCODING_32BIT_LITTLE; 139 encoding_size = 4; 140 } else 141 usage(); 142 /* NOTREACHED */ 143 break; 144 case 'f': 145 show_filename = 1; 146 break; 147 case 'n': 148 min_len = (int)strtoimax(optarg, (char**)NULL, 10); 149 break; 150 case 'o': 151 show_loc = 1; 152 radix = RADIX_OCTAL; 153 break; 154 case 't': 155 show_loc = 1; 156 if (*optarg == 'd') 157 radix = RADIX_DECIMAL; 158 else if (*optarg == 'o') 159 radix = RADIX_OCTAL; 160 else if (*optarg == 'x') 161 radix = RADIX_HEX; 162 else 163 usage(); 164 /* NOTREACHED */ 165 break; 166 case 'v': 167 case 'V': 168 show_version(); 169 /* NOTREACHED */ 170 case '0': 171 case '1': 172 case '2': 173 case '3': 174 case '4': 175 case '5': 176 case '6': 177 case '7': 178 case '8': 179 case '9': 180 min_len *= 10; 181 min_len += ch - '0'; 182 break; 183 case 'h': 184 case '?': 185 default: 186 usage(); 187 /* NOTREACHED */ 188 } 189 argc -= optind; 190 argv += optind; 191 192 if (!min_len) 193 min_len = 4; 194 if (!*argv) 195 rc = handle_file("{standard input}"); 196 else while (*argv) { 197 rc = handle_file(*argv); 198 argv++; 199 } 200 return (rc); 201 } 202 203 int 204 handle_file(const char *name) 205 { 206 int fd, rt; 207 208 if (name == NULL) 209 return (RETURN_NOINPUT); 210 if (strcmp("{standard input}", name) != 0) { 211 if (freopen(name, "rb", stdin) == NULL) { 212 warnx("'%s': %s", name, strerror(errno)); 213 return (RETURN_NOINPUT); 214 } 215 } else { 216 return (find_strings(name, (off_t)0, (off_t)0)); 217 } 218 219 fd = fileno(stdin); 220 if (fd < 0) 221 return (RETURN_NOINPUT); 222 rt = handle_elf(name, fd); 223 return (rt); 224 } 225 226 /* 227 * Files not understood by handle_elf, will be passed off here and will 228 * treated as a binary file. This would include text file, core dumps ... 229 */ 230 int 231 handle_binary(const char *name, int fd) 232 { 233 struct stat buf; 234 235 memset(&buf, 0, sizeof(struct stat)); 236 (void) lseek(fd, (off_t)0, SEEK_SET); 237 if (!fstat(fd, &buf)) 238 return (find_strings(name, (off_t)0, buf.st_size)); 239 return (RETURN_SOFTWARE); 240 } 241 242 /* 243 * Will analyse a file to see if it ELF, other files including ar(1), 244 * core dumps are passed off and treated as flat binary files. Unlike 245 * GNU size in FreeBSD this routine will not treat ELF object from 246 * different archs as flat binary files(has to overridden using -a). 247 */ 248 int 249 handle_elf(const char *name, int fd) 250 { 251 GElf_Ehdr elfhdr; 252 GElf_Shdr shdr; 253 Elf *elf; 254 Elf_Scn *scn; 255 int rc; 256 257 rc = RETURN_OK; 258 /* If entire file is choosen, treat it as a binary file */ 259 if (entire_file) 260 return (handle_binary(name, fd)); 261 262 (void) lseek(fd, (off_t)0, SEEK_SET); 263 elf = elf_begin(fd, ELF_C_READ, NULL); 264 if (elf_kind(elf) != ELF_K_ELF) { 265 (void) elf_end(elf); 266 return (handle_binary(name, fd)); 267 } 268 269 if (gelf_getehdr(elf, &elfhdr) == NULL) { 270 (void) elf_end(elf); 271 warnx("%s: ELF file could not be processed", name); 272 return (RETURN_SOFTWARE); 273 } 274 275 if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) { 276 (void) elf_end(elf); 277 return (handle_binary(name, fd)); 278 } else { 279 scn = NULL; 280 while ((scn = elf_nextscn(elf, scn)) != NULL) { 281 if (gelf_getshdr(scn, &shdr) == NULL) 282 continue; 283 if (shdr.sh_type != SHT_NOBITS && 284 (shdr.sh_flags & SHF_ALLOC) != 0) { 285 rc = find_strings(name, shdr.sh_offset, 286 shdr.sh_size); 287 } 288 } 289 } 290 (void) elf_end(elf); 291 return (rc); 292 } 293 294 /* 295 * Retrieves a character from input stream based on the encoding 296 * type requested. 297 */ 298 long 299 getcharacter(void) 300 { 301 long rt; 302 int i; 303 char buf[4], c; 304 305 rt = EOF; 306 for(i = 0; i < encoding_size; i++) { 307 c = getc(stdin); 308 if (feof(stdin)) 309 return (EOF); 310 buf[i] = c; 311 } 312 313 switch(encoding) { 314 case ENCODING_7BIT: 315 case ENCODING_8BIT: 316 rt = buf[0]; 317 break; 318 case ENCODING_16BIT_BIG: 319 rt = (buf[0] << 8) | buf[1]; 320 break; 321 case ENCODING_16BIT_LITTLE: 322 rt = buf[0] | (buf[1] << 8); 323 break; 324 case ENCODING_32BIT_BIG: 325 rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) | 326 ((long) buf[2] << 8) | buf[3]; 327 break; 328 case ENCODING_32BIT_LITTLE: 329 rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) | 330 ((long) buf[3] << 24); 331 break; 332 } 333 return (rt); 334 } 335 336 /* 337 * Input stream stdin is read until the end of file is reached or until 338 * the section size is reached in case of ELF files. Contiguous 339 * characters of >= min_size(default 4) will be displayed. 340 */ 341 int 342 find_strings(const char *name, off_t offset, off_t size) 343 { 344 off_t cur_off, start_off; 345 char *obuf; 346 long c; 347 int i; 348 349 if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) { 350 (void) fprintf(stderr, "Unable to allocate memory: %s\n", 351 strerror(errno)); 352 return (RETURN_SOFTWARE); 353 } 354 355 (void) fseeko(stdin, offset, SEEK_SET); 356 cur_off = offset; 357 start_off = 0; 358 while(1) { 359 if ((offset + size) && (cur_off >= offset + size)) 360 break; 361 start_off = cur_off; 362 memset(obuf, 0, min_len+1); 363 for(i = 0; i < min_len; i++) { 364 c = getcharacter(); 365 if (c == EOF && feof(stdin)) 366 goto _exit1; 367 if (PRINTABLE(c)) { 368 obuf[i] = c; 369 obuf[i+1] = 0; 370 cur_off += encoding_size; 371 } else { 372 if (encoding == ENCODING_8BIT && 373 (uint8_t)c > 127) { 374 obuf[i] = c; 375 obuf[i+1] = 0; 376 cur_off += encoding_size; 377 continue; 378 } 379 cur_off += encoding_size; 380 break; 381 } 382 } 383 384 if (i >= min_len && ((cur_off <= offset + size) || 385 !(offset + size))) { 386 if (show_filename) 387 printf ("%s: ", name); 388 if (show_loc) { 389 switch(radix) { 390 case RADIX_DECIMAL: 391 (void) printf("%7ju ", 392 (uintmax_t)start_off); 393 break; 394 case RADIX_HEX: 395 (void) printf("%7jx ", 396 (uintmax_t)start_off); 397 break; 398 case RADIX_OCTAL: 399 (void) printf("%7jo ", 400 (uintmax_t)start_off); 401 break; 402 } 403 } 404 printf("%s", obuf); 405 406 while(1) { 407 if ((offset + size) && 408 (cur_off >= offset + size)) 409 break; 410 c = getcharacter(); 411 cur_off += encoding_size; 412 if (encoding == ENCODING_8BIT && 413 (uint8_t)c > 127) { 414 putchar(c); 415 continue; 416 } 417 if (!PRINTABLE(c) || c == EOF) 418 break; 419 putchar(c); 420 } 421 putchar('\n'); 422 } 423 } 424 _exit1: 425 free(obuf); 426 return (RETURN_OK); 427 } 428 429 #define USAGE_MESSAGE "\ 430 Usage: %s [options] [file...]\n\ 431 Print contiguous sequences of printable characters.\n\n\ 432 Options:\n\ 433 -a | --all Scan the entire file for strings.\n\ 434 -e ENC | --encoding=ENC Select the character encoding to use.\n\ 435 -f | --print-file-name Print the file name before each string.\n\ 436 -h | --help Print a help message and exit.\n\ 437 -n N | --bytes=N | -N Print sequences with 'N' or more characters.\n\ 438 -o Print offsets in octal.\n\ 439 -t R | --radix=R Print offsets using the radix named by 'R'.\n\ 440 -v | --version Print a version identifier and exit.\n" 441 442 void 443 usage(void) 444 { 445 (void) fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME()); 446 exit(EXIT_FAILURE); 447 } 448 449 void 450 show_version(void) 451 { 452 (void) printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version()); 453 exit(EXIT_SUCCESS); 454 } 455