1 /*- 2 * Copyright (c) 2007 S.Sam Arun Raj 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/capsicum.h> 29 #include <sys/stat.h> 30 31 #include <capsicum_helpers.h> 32 #include <ctype.h> 33 #include <err.h> 34 #include <errno.h> 35 #include <fcntl.h> 36 #include <getopt.h> 37 #include <inttypes.h> 38 #include <stdint.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <sysexits.h> 43 #include <unistd.h> 44 45 #include <libelf.h> 46 #include <libelftc.h> 47 #include <gelf.h> 48 49 #include <libcasper.h> 50 #include <casper/cap_fileargs.h> 51 52 #include "_elftc.h" 53 54 ELFTC_VCSID("$Id: strings.c 3648 2018-11-22 23:26:43Z emaste $"); 55 56 enum radix_style { 57 RADIX_DECIMAL, 58 RADIX_HEX, 59 RADIX_OCTAL 60 }; 61 62 enum encoding_style { 63 ENCODING_7BIT, 64 ENCODING_8BIT, 65 ENCODING_16BIT_BIG, 66 ENCODING_16BIT_LITTLE, 67 ENCODING_32BIT_BIG, 68 ENCODING_32BIT_LITTLE 69 }; 70 71 #define PRINTABLE(c) \ 72 ((c) >= 0 && (c) <= 255 && \ 73 ((c) == '\t' || isprint((c)) || \ 74 (encoding == ENCODING_8BIT && (c) > 127))) 75 76 static int encoding_size, entire_file, show_filename, show_loc; 77 static enum encoding_style encoding; 78 static enum radix_style radix; 79 static intmax_t min_len; 80 81 static struct option strings_longopts[] = { 82 { "all", no_argument, NULL, 'a'}, 83 { "bytes", required_argument, NULL, 'n'}, 84 { "encoding", required_argument, NULL, 'e'}, 85 { "help", no_argument, NULL, 'h'}, 86 { "print-file-name", no_argument, NULL, 'f'}, 87 { "radix", required_argument, NULL, 't'}, 88 { "version", no_argument, NULL, 'v'}, 89 { NULL, 0, NULL, 0 } 90 }; 91 92 int getcharacter(FILE *, long *); 93 int handle_file(fileargs_t *fa, const char *); 94 int handle_elf(const char *, FILE *); 95 int handle_binary(const char *, FILE *, size_t); 96 int find_strings(const char *, FILE *, off_t, off_t); 97 void show_version(void); 98 void usage(void); 99 100 /* 101 * strings(1) extracts text(contiguous printable characters) 102 * from elf and binary files. 103 */ 104 int 105 main(int argc, char **argv) 106 { 107 fileargs_t *fa; 108 cap_rights_t rights; 109 int ch, rc; 110 111 rc = 0; 112 min_len = 0; 113 encoding_size = 1; 114 if (elf_version(EV_CURRENT) == EV_NONE) 115 errx(EXIT_FAILURE, "ELF library initialization failed: %s", 116 elf_errmsg(-1)); 117 118 while ((ch = getopt_long(argc, argv, "1234567890ae:fhn:ot:Vv", 119 strings_longopts, NULL)) != -1) { 120 switch ((char)ch) { 121 case 'a': 122 entire_file = 1; 123 break; 124 case 'e': 125 if (*optarg == 's') { 126 encoding = ENCODING_7BIT; 127 } else if (*optarg == 'S') { 128 encoding = ENCODING_8BIT; 129 } else if (*optarg == 'b') { 130 encoding = ENCODING_16BIT_BIG; 131 encoding_size = 2; 132 } else if (*optarg == 'B') { 133 encoding = ENCODING_32BIT_BIG; 134 encoding_size = 4; 135 } else if (*optarg == 'l') { 136 encoding = ENCODING_16BIT_LITTLE; 137 encoding_size = 2; 138 } else if (*optarg == 'L') { 139 encoding = ENCODING_32BIT_LITTLE; 140 encoding_size = 4; 141 } else 142 usage(); 143 /* NOTREACHED */ 144 break; 145 case 'f': 146 show_filename = 1; 147 break; 148 case 'n': 149 min_len = strtoimax(optarg, (char**)NULL, 10); 150 if (min_len <= 0) 151 errx(EX_USAGE, "option -n should specify a " 152 "positive decimal integer."); 153 break; 154 case 'o': 155 show_loc = 1; 156 radix = RADIX_OCTAL; 157 break; 158 case 't': 159 show_loc = 1; 160 if (*optarg == 'd') 161 radix = RADIX_DECIMAL; 162 else if (*optarg == 'o') 163 radix = RADIX_OCTAL; 164 else if (*optarg == 'x') 165 radix = RADIX_HEX; 166 else 167 usage(); 168 /* NOTREACHED */ 169 break; 170 case 'v': 171 case 'V': 172 show_version(); 173 /* NOTREACHED */ 174 case '0': 175 case '1': 176 case '2': 177 case '3': 178 case '4': 179 case '5': 180 case '6': 181 case '7': 182 case '8': 183 case '9': 184 min_len *= 10; 185 min_len += ch - '0'; 186 break; 187 case 'h': 188 case '?': 189 default: 190 usage(); 191 /* NOTREACHED */ 192 } 193 } 194 argc -= optind; 195 argv += optind; 196 197 cap_rights_init(&rights, CAP_READ, CAP_SEEK, CAP_FSTAT, CAP_FCNTL); 198 fa = fileargs_init(argc, argv, O_RDONLY, 0, &rights, FA_OPEN); 199 if (fa == NULL) 200 err(1, "Unable to initialize casper fileargs"); 201 202 caph_cache_catpages(); 203 if (caph_limit_stdio() < 0 && caph_enter_casper() < 0) { 204 fileargs_free(fa); 205 err(1, "Unable to enter capability mode"); 206 } 207 208 if (min_len == 0) 209 min_len = 4; 210 if (*argv == NULL) 211 rc = find_strings("{standard input}", stdin, 0, 0); 212 else while (*argv != NULL) { 213 if (handle_file(fa, *argv) != 0) 214 rc = 1; 215 argv++; 216 } 217 218 fileargs_free(fa); 219 220 return (rc); 221 } 222 223 int 224 handle_file(fileargs_t *fa, const char *name) 225 { 226 FILE *pfile; 227 int rt; 228 229 if (name == NULL) 230 return (1); 231 pfile = fileargs_fopen(fa, name, "rb"); 232 if (pfile == NULL) { 233 warnx("'%s': %s", name, strerror(errno)); 234 return (1); 235 } 236 237 rt = handle_elf(name, pfile); 238 fclose(pfile); 239 return (rt); 240 } 241 242 /* 243 * Files not understood by handle_elf, will be passed off here and will 244 * treated as a binary file. This would include text file, core dumps ... 245 */ 246 int 247 handle_binary(const char *name, FILE *pfile, size_t size) 248 { 249 250 (void)fseeko(pfile, 0, SEEK_SET); 251 return (find_strings(name, pfile, 0, size)); 252 } 253 254 /* 255 * Will analyse a file to see if it ELF, other files including ar(1), 256 * core dumps are passed off and treated as flat binary files. Unlike 257 * GNU size in FreeBSD this routine will not treat ELF object from 258 * different archs as flat binary files(has to overridden using -a). 259 */ 260 int 261 handle_elf(const char *name, FILE *pfile) 262 { 263 struct stat buf; 264 GElf_Ehdr elfhdr; 265 GElf_Shdr shdr; 266 Elf *elf; 267 Elf_Scn *scn; 268 int rc, fd; 269 270 rc = 0; 271 fd = fileno(pfile); 272 if (fstat(fd, &buf) < 0) 273 return (1); 274 275 /* If entire file is chosen, treat it as a binary file */ 276 if (entire_file) 277 return (handle_binary(name, pfile, buf.st_size)); 278 279 (void)lseek(fd, 0, SEEK_SET); 280 elf = elf_begin(fd, ELF_C_READ, NULL); 281 if (elf_kind(elf) != ELF_K_ELF) { 282 (void)elf_end(elf); 283 return (handle_binary(name, pfile, buf.st_size)); 284 } 285 286 if (gelf_getehdr(elf, &elfhdr) == NULL) { 287 (void)elf_end(elf); 288 warnx("%s: ELF file could not be processed", name); 289 return (1); 290 } 291 292 if (elfhdr.e_shnum == 0 && elfhdr.e_type == ET_CORE) { 293 (void)elf_end(elf); 294 return (handle_binary(name, pfile, buf.st_size)); 295 } else { 296 scn = NULL; 297 while ((scn = elf_nextscn(elf, scn)) != NULL) { 298 if (gelf_getshdr(scn, &shdr) == NULL) 299 continue; 300 if (shdr.sh_type != SHT_NOBITS && 301 (shdr.sh_flags & SHF_ALLOC) != 0) { 302 rc = find_strings(name, pfile, shdr.sh_offset, 303 shdr.sh_size); 304 } 305 } 306 } 307 (void)elf_end(elf); 308 return (rc); 309 } 310 311 /* 312 * Retrieves a character from input stream based on the encoding 313 * type requested. 314 */ 315 int 316 getcharacter(FILE *pfile, long *rt) 317 { 318 int i, c; 319 char buf[4]; 320 321 for(i = 0; i < encoding_size; i++) { 322 c = getc(pfile); 323 if (c == EOF) 324 return (-1); 325 buf[i] = c; 326 } 327 328 switch (encoding) { 329 case ENCODING_7BIT: 330 case ENCODING_8BIT: 331 *rt = buf[0]; 332 break; 333 case ENCODING_16BIT_BIG: 334 *rt = (buf[0] << 8) | buf[1]; 335 break; 336 case ENCODING_16BIT_LITTLE: 337 *rt = buf[0] | (buf[1] << 8); 338 break; 339 case ENCODING_32BIT_BIG: 340 *rt = ((long) buf[0] << 24) | ((long) buf[1] << 16) | 341 ((long) buf[2] << 8) | buf[3]; 342 break; 343 case ENCODING_32BIT_LITTLE: 344 *rt = buf[0] | ((long) buf[1] << 8) | ((long) buf[2] << 16) | 345 ((long) buf[3] << 24); 346 break; 347 default: 348 return (-1); 349 } 350 351 return (0); 352 } 353 354 /* 355 * Input stream is read until the end of file is reached or until 356 * the section size is reached in case of ELF files. Contiguous 357 * characters of >= min_size(default 4) will be displayed. 358 */ 359 int 360 find_strings(const char *name, FILE *pfile, off_t offset, off_t size) 361 { 362 off_t cur_off, start_off; 363 char *obuf; 364 long c; 365 int i; 366 367 if ((obuf = (char*)calloc(1, min_len + 1)) == NULL) { 368 fprintf(stderr, "Unable to allocate memory: %s\n", 369 strerror(errno)); 370 return (1); 371 } 372 373 (void)fseeko(pfile, offset, SEEK_SET); 374 cur_off = offset; 375 start_off = 0; 376 for (;;) { 377 if ((offset + size) && (cur_off >= offset + size)) 378 break; 379 start_off = cur_off; 380 memset(obuf, 0, min_len + 1); 381 for(i = 0; i < min_len; i++) { 382 if (getcharacter(pfile, &c) < 0) 383 goto _exit1; 384 if (PRINTABLE(c)) { 385 obuf[i] = c; 386 obuf[i + 1] = 0; 387 cur_off += encoding_size; 388 } else { 389 if (encoding == ENCODING_8BIT && 390 (uint8_t)c > 127) { 391 obuf[i] = c; 392 obuf[i + 1] = 0; 393 cur_off += encoding_size; 394 continue; 395 } 396 cur_off += encoding_size; 397 break; 398 } 399 } 400 401 if (i >= min_len && ((cur_off <= offset + size) || 402 !(offset + size))) { 403 if (show_filename) 404 printf("%s: ", name); 405 if (show_loc) { 406 switch (radix) { 407 case RADIX_DECIMAL: 408 printf("%7ju ", (uintmax_t)start_off); 409 break; 410 case RADIX_HEX: 411 printf("%7jx ", (uintmax_t)start_off); 412 break; 413 case RADIX_OCTAL: 414 printf("%7jo ", (uintmax_t)start_off); 415 break; 416 } 417 } 418 printf("%s", obuf); 419 420 for (;;) { 421 if ((offset + size) && 422 (cur_off >= offset + size)) 423 break; 424 if (getcharacter(pfile, &c) < 0) 425 break; 426 cur_off += encoding_size; 427 if (encoding == ENCODING_8BIT && 428 (uint8_t)c > 127) { 429 putchar(c); 430 continue; 431 } 432 if (!PRINTABLE(c)) 433 break; 434 putchar(c); 435 } 436 putchar('\n'); 437 } 438 } 439 _exit1: 440 free(obuf); 441 return (0); 442 } 443 444 #define USAGE_MESSAGE "\ 445 Usage: %s [options] [file...]\n\ 446 Print contiguous sequences of printable characters.\n\n\ 447 Options:\n\ 448 -a | --all Scan the entire file for strings.\n\ 449 -e ENC | --encoding=ENC Select the character encoding to use.\n\ 450 -f | --print-file-name Print the file name before each string.\n\ 451 -h | --help Print a help message and exit.\n\ 452 -n N | --bytes=N | -N Print sequences with 'N' or more characters.\n\ 453 -o Print offsets in octal.\n\ 454 -t R | --radix=R Print offsets using the radix named by 'R'.\n\ 455 -v | --version Print a version identifier and exit.\n" 456 457 void 458 usage(void) 459 { 460 461 fprintf(stderr, USAGE_MESSAGE, ELFTC_GETPROGNAME()); 462 exit(EXIT_FAILURE); 463 } 464 465 void 466 show_version(void) 467 { 468 469 printf("%s (%s)\n", ELFTC_GETPROGNAME(), elftc_version()); 470 exit(EXIT_SUCCESS); 471 } 472