/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet * http://www.illumos.org/license/CDDL. */ /* * Copyright 2010 Nexenta Systems, Inc. All rights reserved. */ /* * od - octal dump. Not really just octal anymore; read the POSIX * specification for it -- its more complex than you think! * * NB: We followed the POSIX semantics fairly strictly, where the * legacy code's behavior was in conflict. In many cases the legacy * Solaris code was so completely broken as to be completely unusable. * (For example, the long double support was broken beyond * imagination!) Note that GNU coreutils violates POSIX in a few * interesting ways, such as changing the numbering of the addresses * when skipping. (Address starts should always be at 0, according to * the sample output in the Open Group man page.) */ #include #include #include #include #include #include #include #include #include #define _(x) gettext(x) /* address format */ static char *afmt = "%07llo"; static char *cfmt = " "; static FILE *input = NULL; static size_t lcm = 1; static size_t blocksize = 16; static int numfiles = 0; static int curfile = 0; static char **files = NULL; static off_t limit = -1; /* * This structure describes our ring buffer. Its always a power of 2 * in size to make wrap around calculations fast using a mask instead * of doing modulo. * * The size is calculated thusly: We need three "blocks" of data, as * we process a block at a time (one block == one line of od output.) * * We need lookahead of an extra block to support multibyte chars. We * also have a look behind so that we can avoid printing lines that * are identical to what we've already printed. Finally, we need the * current block. * * The block size is determined by the least common multiple of the * data items being displayed. Usually it will be 16, but sometimes * it is 24 (when 12-byte long doubles are presented.) * * The data buffer is allocaed via memalign to make sure it is * properly aligned. */ typedef struct buffer { char *data; /* data buffer */ int prod; /* producer index */ int cons; /* consumer index */ int mask; /* buffer size - 1, wraparound index */ int navail; /* total bytes avail */ } buffer_t; /* * This structure is used to provide information on a specific output * format. We link them together in a list representing the output * formats that the user has selected. */ typedef struct output { int width; /* bytes consumed per call */ void (*func)(buffer_t *, int); /* output function */ struct output *next; /* link node */ } output_t; /* * Specifiers */ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; typedef char s8; typedef short s16; typedef int s32; typedef long long s64; typedef float fF; typedef double fD; typedef long double fL; static void usage(void) { (void) fprintf(stderr, _("usage: od [-bcCdDfFoOsSvxX] " "[-t types ]... [-A base] [-j skip] [-N count] [file]...\n")); exit(1); } #define DECL_GET(typ) \ static typ \ get_ ## typ(buffer_t *b, int index) \ { \ typ val = *(typ *)(void *)(b->data + index); \ return (val); \ } DECL_GET(u8) DECL_GET(u16) DECL_GET(u32) DECL_GET(u64) DECL_GET(s8) DECL_GET(s16) DECL_GET(s32) DECL_GET(s64) DECL_GET(fF) DECL_GET(fD) DECL_GET(fL) #define DECL_OUT(nm, typ, fmt) \ static void \ do_ ## nm(buffer_t *buf, int index) \ { \ typ v = get_ ## typ(buf, index); \ (void) printf(fmt, v); \ } \ \ static output_t output_ ## nm = { \ sizeof (typ), do_ ## nm \ }; DECL_OUT(oct_b, u8, " %03o") DECL_OUT(oct_w, u16, " %06ho") DECL_OUT(oct_d, u32, " %011o") DECL_OUT(oct_q, u64, " %022llo") DECL_OUT(dec_b, u8, " %03u") DECL_OUT(dec_w, u16, " %05hu") DECL_OUT(dec_d, u32, " %010u") DECL_OUT(dec_q, u64, " %020llu") DECL_OUT(sig_b, s8, " %03d") DECL_OUT(sig_w, s16, " %6.05hd") DECL_OUT(sig_d, s32, " %11.010d") DECL_OUT(sig_q, s64, " %20.019lld") DECL_OUT(hex_b, u8, " %02x") DECL_OUT(hex_w, u16, " %04hx") DECL_OUT(hex_d, s32, " %08x") DECL_OUT(hex_q, s64, " %016llx") DECL_OUT(float, fF, " %14.7e") DECL_OUT(double, fD, " %21.14e") DECL_OUT(ldouble, fL, " %24.14Le") static char *ascii[] = { "nul", "soh", "stx", "etx", "eot", "enq", "ack", " be", " bs", " ht", " lf", " vt", " ff", " cr", " so", " si", "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", "can", " em", "sub", "esc", " fs", " gs", " rs", " us", " sp", " !", " \"", " #", " $", " %", " &", " '", " (", " )", " *", " +", " ,", " -", " .", " /", " 0", " 1", " 2", " 3", " 4", " 5", " 6", " 7", " 8", " 9", " :", " ;", " <", " =", " >", " ?", " @", " A", " B", " C", " D", " E", " F", " G", " H", " I", " J", " K", " L", " M", " N", " O", " P", " Q", " R", " S", " T", " U", " V", " W", " X", " Y", " Z", " [", " \\", " ]", " ^", " _", " `", " a", " b", " c", " d", " e", " f", " g", " h", " i", " j", " k", " l", " m", " n", " o", " p", " q", " r", " s", " t", " u", " v", " w", " x", " y", " z", " {", " |", " }", " ~", "del" }; static void do_ascii(buffer_t *buf, int index) { uint8_t v = get_u8(buf, index); (void) fputc(' ', stdout); (void) fputs(ascii[v & 0x7f], stdout); } static output_t output_ascii = { 1, do_ascii, }; static void do_char(buffer_t *buf, int index) { static int nresid = 0; static int printable = 0; int cnt; int avail; int nb; char scratch[10]; wchar_t wc; int which; uint8_t v = get_u8(buf, index); /* * If there were residual bytes from an earlier * character, then just display the ** continuation * indication. */ if (nresid) { if (printable) { (void) fputs(" **", stdout); } else { (void) printf(" %03o", v); } nresid--; return; } /* * Peek ahead up to MB_CUR_MAX characters. This has to be * done carefully because we might need to look into the next * block to really know for sure. */ scratch[0] = v; avail = buf->navail; if (avail > MB_CUR_MAX) avail = MB_CUR_MAX; for (cnt = 1, which = index + 1; cnt < avail; cnt++, which++) { scratch[cnt] = buf->data[which & buf->mask]; } /* now see if the value is a real character */ nresid = 0; wc = 0; nb = mbtowc(&wc, scratch, avail); if (nb < 0) { (void) printf(" %03o", v); return; } if (nb == 0) { (void) fputs(" \\0", stdout); return; } nresid = nb - 1; if (nb && iswprint(wc)) { scratch[nb] = 0; (void) fputs(" ", stdout); (void) fputs(scratch, stdout); printable = 1; return; } printable = 0; if (wc == 0) { (void) fputs(" \\0", stdout); } else if (wc == '\b') { (void) fputs(" \\b", stdout); } else if (wc == '\f') { (void) fputs(" \\f", stdout); } else if (wc == '\n') { (void) fputs(" \\n", stdout); } else if (wc == '\r') { (void) fputs(" \\r", stdout); } else if (wc == '\t') { (void) fputs(" \\t", stdout); } else { (void) printf(" %03o", v); } } static output_t output_char = { 1, do_char, }; /* * List of output formatting structures. */ static output_t *head = NULL; static output_t **tailp = &head; static void add_out(output_t *src) { output_t *out; int m; if ((out = calloc(1, sizeof (*src))) == NULL) { err(1, "malloc"); } m = lcm; while ((m % src->width) != 0) { m += lcm; } lcm = m; blocksize = lcm; while (blocksize < 16) blocksize *= 2; (void) memcpy(out, src, sizeof (*src)); *tailp = out; tailp = &out->next; } static FILE * next_input(void) { for (;;) { if (curfile >= numfiles) return (NULL); if (input != NULL) { if ((input = freopen(files[curfile], "r", input)) != NULL) { curfile++; return (input); } } else { if ((input = fopen(files[curfile], "r")) != NULL) { curfile++; return (input); } } warn("open: %s", files[curfile]); curfile++; } } static void refill(buffer_t *b) { int n; int want; int zero; /* * If we have 2 blocks of bytes available, we're done. Note * that each iteration usually loads up 16 bytes, unless we * run out of data. */ while ((input != NULL) && (b->navail < (2 * blocksize))) { /* we preload the next one in advance */ if (limit == 0) { (void) fclose(input); input = NULL; continue; } /* we want to read a whole block if possible */ want = blocksize; if ((limit >= 0) && (want > limit)) { want = limit; } zero = blocksize; while (want && input) { int c; b->prod &= b->mask; c = (b->prod + want > (b->mask + 1)) ? b->mask - b->prod : want; n = fread(b->data + b->prod, 1, c, input); if (n < 0) { warn("read: %s", files ? files[curfile-1] : "stdin"); input = next_input(); continue; } if (n == 0) { input = next_input(); continue; } if (limit >= 0) limit -= n; b->navail += n; b->prod += n; want -= n; zero -= n; } while (zero) { b->data[b->prod & b->mask] = 0; b->prod++; b->prod &= b->mask; zero--; } } } #define STR1 "C1" #define STR2 "S2" #ifdef _LP64 #define STR8 "L8" #define STR4 "I4" #else #define STR8 "8" #define STR4 "IL4" #endif static void do_type_string(char *typestr) { if (*typestr == 0) { errx(1, _("missing type string")); } while (*typestr) { switch (*typestr) { case 'a': typestr++; add_out(&output_ascii); break; case 'c': add_out(&output_char); typestr++; break; case 'f': typestr++; switch (*typestr) { case 'F': case '4': add_out(&output_float); typestr++; break; case '8': case 'D': add_out(&output_double); typestr++; break; case 'L': add_out(&output_ldouble); typestr++; break; default: add_out(&output_float); break; } break; case 'd': typestr++; if (strchr(STR1, *typestr)) { typestr++; add_out(&output_sig_b); } else if (strchr(STR2, *typestr)) { typestr++; add_out(&output_sig_w); } else if (strchr(STR4, *typestr)) { typestr++; add_out(&output_sig_d); } else if (strchr(STR8, *typestr)) { typestr++; add_out(&output_sig_q); } else { add_out(&output_sig_d); } break; case 'u': typestr++; if (strchr(STR1, *typestr)) { typestr++; add_out(&output_dec_b); } else if (strchr(STR2, *typestr)) { typestr++; add_out(&output_dec_w); } else if (strchr(STR4, *typestr)) { typestr++; add_out(&output_dec_d); } else if (strchr(STR8, *typestr)) { typestr++; add_out(&output_dec_q); } else { add_out(&output_dec_d); } break; case 'o': typestr++; if (strchr(STR1, *typestr)) { typestr++; add_out(&output_oct_b); } else if (strchr(STR2, *typestr)) { typestr++; add_out(&output_oct_w); } else if (strchr(STR4, *typestr)) { typestr++; add_out(&output_oct_d); } else if (strchr(STR8, *typestr)) { typestr++; add_out(&output_oct_q); } else { add_out(&output_oct_d); } break; case 'x': typestr++; if (strchr(STR1, *typestr)) { typestr++; add_out(&output_hex_b); } else if (strchr(STR2, *typestr)) { typestr++; add_out(&output_hex_w); } else if (strchr(STR4, *typestr)) { typestr++; add_out(&output_hex_d); } else if (strchr(STR8, *typestr)) { typestr++; add_out(&output_hex_q); } else { add_out(&output_hex_d); } break; default: errx(1, _("unrecognized type string character: %c"), *typestr); exit(1); } } } int main(int argc, char **argv) { int c; int i; buffer_t buffer; boolean_t first = B_TRUE; boolean_t doall = B_FALSE; boolean_t same = B_FALSE; boolean_t newarg = B_FALSE; off_t offset = 0; off_t skip = 0; char *eptr; char *offstr = 0; input = stdin; (void) setlocale(LC_ALL, ""); while ((c = getopt(argc, argv, "A:bCcdDfFj:N:oOsSxXvt:")) != EOF) { switch (c) { case 'A': newarg = B_TRUE; if (strlen(optarg) > 1) { afmt = NULL; } switch (*optarg) { case 'o': afmt = "%07llo"; cfmt = " "; break; case 'd': afmt = "%07lld"; cfmt = " "; break; case 'x': afmt = "%07llx"; cfmt = " "; break; case 'n': /* * You could argue that the code should * use the same 7 spaces. Legacy uses 8 * though. Oh well. Better to avoid * gratuitous change. */ afmt = " "; cfmt = " "; break; default: afmt = NULL; break; } if (strlen(optarg) != 1) { afmt = NULL; } if (afmt == NULL) warnx(_("invalid address base, " "must be o, d, x, or n")); break; case 'b': add_out(&output_oct_b); break; case 'c': case 'C': add_out(&output_char); break; case 'f': add_out(&output_float); break; case 'F': add_out(&output_double); break; case 'd': add_out(&output_dec_w); break; case 'D': add_out(&output_dec_d); break; case 't': newarg = B_TRUE; do_type_string(optarg); break; case 'o': add_out(&output_oct_w); break; case 'O': add_out(&output_oct_d); break; case 's': add_out(&output_sig_w); break; case 'S': add_out(&output_sig_d); break; case 'x': add_out(&output_hex_w); break; case 'X': add_out(&output_hex_d); break; case 'v': doall = B_TRUE; break; case 'j': newarg = B_TRUE; skip = strtoll(optarg, &eptr, 0); if (*eptr == 'b') { skip <<= 9; /* 512 bytes */ eptr++; } else if (*eptr == 'k') { skip <<= 10; /* 1k */ eptr++; } else if (*eptr == 'm') { skip <<= 20; /* 1m */ eptr++; } else if (*eptr == 'g') { skip <<= 30; /* 1g */ eptr++; } if ((skip < 0) || (eptr[0] != 0)) { warnx(_("invalid skip count '%s' specified"), optarg); exit(1); } break; case 'N': newarg = B_TRUE; limit = strtoll(optarg, &eptr, 0); /* * POSIX doesn't specify this, but I think these * may be helpful. */ if (*eptr == 'b') { limit <<= 9; eptr++; } else if (*eptr == 'k') { limit <<= 10; eptr++; } else if (*eptr == 'm') { limit <<= 20; eptr++; } else if (*eptr == 'g') { limit <<= 30; eptr++; } if ((limit < 0) || (eptr[0] != 0)) { warnx(_("invalid byte count '%s' specified"), optarg); exit(1); } break; default: usage(); break; } } /* this finds the smallest power of two size we can use */ buffer.mask = (1 << (ffs(blocksize * 3) + 1)) - 1; buffer.data = memalign(16, buffer.mask + 1); if (buffer.data == NULL) { err(1, "memalign"); } /* * Wow. This option parsing is hideous. * * If the we've not seen a new option, and there is just one * operand, if it starts with a "+", then treat it as an * offset. Otherwise if two operands, and the second operand * starts with + or a digit, then it is an offset. */ if (!newarg) { if (((argc - optind) == 1) && (argv[optind][0] == '+')) { offstr = argv[optind]; argc--; } else if (((argc - optind) == 2) && (strchr("+0123456789", (argv[optind + 1][0])) != NULL)) { offstr = argv[optind + 1]; argc--; } } if (offstr) { int base = 0; int mult = 1; int l; if (*offstr == '+') { offstr++; } l = strlen(offstr); if ((strncmp(offstr, "0x", 2) == 0)) { afmt = "%07llx"; base = 16; offstr += 2; if (offstr[l - 1] == 'B') { offstr[l - 1] = 0; l--; mult = 512; } } else { base = 8; afmt = "%07llo"; if ((offstr[l - 1] == 'B') || (offstr[l - 1] == 'b')) { offstr[l - 1] = 0; l--; mult = 512; } if (offstr[l - 1] == '.') { offstr[l - 1] = 0; base = 10; afmt = "%07lld"; } } skip = strtoll(offstr, &eptr, base); if (*eptr != '\0') { errx(1, _("invalid offset string specified")); } skip *= mult; offset += skip; } /* * Allocate an array for all the input files. */ if (argc > optind) { files = calloc(sizeof (char *), argc - optind); for (i = 0; i < argc - optind; i++) { files[i] = argv[optind + i]; numfiles++; } input = next_input(); } else { input = stdin; } /* * We need to seek ahead. fseek would be faster. */ while (skip && (input != NULL)) { struct stat sbuf; /* * Only fseek() on regular files. (Others * we have to read(). */ if (fstat(fileno(input), &sbuf) < 0) { warn("fstat: %s", files[curfile-1]); input = next_input(); continue; } if (S_ISREG(sbuf.st_mode)) { /* * No point in seeking a file that is too * short to begin with. */ if (sbuf.st_size < skip) { skip -= sbuf.st_size; input = next_input(); continue; } if (fseeko(input, skip, SEEK_SET) < 0) { err(1, "fseek:%s", files[curfile-1]); } /* Done seeking. */ skip = 0; break; } /* * fgetc seems like it would be slow, but it uses * buffered I/O, so it should be fast enough. */ flockfile(input); while (skip) { if (getc_unlocked(input) == EOF) { funlockfile(input); if (ferror(input)) { warn("read: %s", files[curfile-1]); } input = next_input(); if (input != NULL) { flockfile(input); } break; } skip--; } if (input != NULL) funlockfile(input); } if (head == NULL) { add_out(&output_oct_w); } buffer.navail = 0; buffer.prod = 0; buffer.cons = 0; for (refill(&buffer); buffer.navail > 0; refill(&buffer)) { output_t *out; int mx; int j, k; /* * If this buffer was the same as last, then just * dump an asterisk. */ if ((!first) && (buffer.navail >= blocksize) && (!doall)) { j = buffer.cons; k = j - blocksize; for (i = 0; i < blocksize; i++) { if (buffer.data[j & buffer.mask] != buffer.data[k & buffer.mask]) { break; } j++; k++; } if (i == blocksize) { if (!same) { (void) fputs("*\n", stdout); same = B_TRUE; } buffer.navail -= blocksize; offset += blocksize; buffer.cons += blocksize; buffer.cons &= buffer.mask; continue; } } first = B_FALSE; same = B_FALSE; mx = (buffer.navail > blocksize) ? blocksize : buffer.navail; for (out = head; out != NULL; out = out->next) { if (out == head) { /*LINTED E_SEC_PRINTF_VAR_FMT*/ (void) printf(afmt, offset); } else { (void) fputs(cfmt, stdout); } for (i = 0, j = buffer.cons; i < mx; i += out->width) { out->func(&buffer, j); j += out->width; j &= buffer.mask; } (void) fputs("\n", stdout); } buffer.cons += mx; buffer.cons &= buffer.mask; offset += mx; buffer.navail -= mx; } /*LINTED E_SEC_PRINTF_VAR_FMT*/ (void) printf(afmt, offset); (void) fputs("\n", stdout); return (0); }