1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1992-2010 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * * 20 ***********************************************************************/ 21 #pragma prototyped 22 /* 23 * David Korn 24 * Glenn Fowler 25 * AT&T Bell Laboratories 26 * 27 * cat 28 */ 29 30 #include <cmd.h> 31 #include <fcntl.h> 32 33 static const char usage[] = 34 "[-?\n@(#)$Id: cat (AT&T Research) 2009-03-31 $\n]" 35 USAGE_LICENSE 36 "[+NAME?cat - concatenate files]" 37 "[+DESCRIPTION?\bcat\b copies each \afile\a in sequence to the standard" 38 " output. If no \afile\a is given, or if the \afile\a is \b-\b," 39 " \bcat\b copies from standard input starting at the current location.]" 40 41 "[b:number-nonblank?Number lines as with \b-n\b but omit line numbers from" 42 " blank lines.]" 43 "[d:dos-input?Input files are opened in \atext\amode which removes carriage" 44 " returns in front of new-lines on some systems.]" 45 "[e?Equivalent to \b-vE\b.]" 46 "[n:number?Causes a line number to be inserted at the beginning of each line.]" 47 "[s?Equivalent to \b-S\b for \aatt\a universe and \b-B\b otherwise.]" 48 "[t?Equivalent to \b-vT\b.]" 49 "[u:unbuffer?The output is not delayed by buffering.]" 50 "[v:show-nonprinting?Causes non-printing characters (whith the exception of" 51 " tabs, new-lines, and form-feeds) to be output as printable charater" 52 " sequences. ASCII control characters are printed as \b^\b\an\a," 53 " where \an\a is the corresponding ASCII character in the range" 54 " octal 100-137. The DEL character (octal 0177) is copied" 55 " as \b^?\b. Other non-printable characters are copied as \bM-\b\ax\a" 56 " where \ax\a is the ASCII character specified by the low-order seven" 57 " bits. Multibyte characters in the current locale are treated as" 58 " printable characters.]" 59 "[A:show-all?Equivalent to \b-vET\b.]" 60 "[B:squeeze-blank?Multiple adjacent new-line characters are replace by one" 61 " new-line.]" 62 "[D:dos-output?Output files are opened in \atext\amode which inserts carriage" 63 " returns in front of new-lines on some systems.]" 64 "[E:show-ends?Causes a \b$\b to be inserted before each new-line.]" 65 "[R:regress?Regression test defaults: \b-v\b buffer size 4.]" 66 "[S:silent?\bcat\b is silent about non-existent files.]" 67 "[T:show-blank?Causes tabs to be copied as \b^I\b and formfeeds as \b^L\b.]" 68 69 "\n" 70 "\n[file ...]\n" 71 "\n" 72 73 "[+SEE ALSO?\bcp\b(1), \bgetconf\b(1), \bpr\b(1)]" 74 ; 75 76 #define RUBOUT 0177 77 78 /* control flags */ 79 #define B_FLAG (1<<0) 80 #define E_FLAG (1<<1) 81 #define F_FLAG (1<<2) 82 #define N_FLAG (1<<3) 83 #define S_FLAG (1<<4) 84 #define T_FLAG (1<<5) 85 #define U_FLAG (1<<6) 86 #define V_FLAG (1<<7) 87 #define D_FLAG (1<<8) 88 #define d_FLAG (1<<9) 89 90 /* character types */ 91 #define T_ERROR 1 92 #define T_EOF 2 93 #define T_ENDBUF 3 94 #define T_NEWLINE 4 95 #define T_CONTROL 5 96 #define T_EIGHTBIT 6 97 #define T_CNTL8BIT 7 98 99 #define printof(c) ((c)^0100) 100 101 typedef void* (*Reserve_f)(Sfio_t*, ssize_t, int); 102 103 #ifndef sfvalue 104 #define sfvalue(f) ((f)->_val) 105 #endif 106 107 static void* 108 regress(Sfio_t* sp, ssize_t n, int f) 109 { 110 void* r; 111 112 if (!(r = sfreserve(sp, 4, f))) 113 r = sfreserve(sp, n, f); 114 else if (sfvalue(sp) > 4) 115 sfvalue(sp) = 4; 116 return r; 117 } 118 119 /* 120 * called for any special output processing 121 */ 122 123 static int 124 vcat(register char* states, Sfio_t* ip, Sfio_t* op, Reserve_f reserve, int flags) 125 { 126 register unsigned char* cp; 127 register unsigned char* pp; 128 unsigned char* cur; 129 unsigned char* end; 130 unsigned char* buf; 131 unsigned char* nxt; 132 register int n; 133 register int line; 134 register int raw; 135 int last; 136 int c; 137 int m; 138 int any; 139 int header; 140 141 unsigned char meta[4]; 142 unsigned char tmp[32]; 143 144 meta[0] = 'M'; 145 meta[1] = '-'; 146 last = -1; 147 *(cp = buf = end = tmp) = 0; 148 any = 0; 149 header = flags & (B_FLAG|N_FLAG); 150 line = 1; 151 states[0] = T_ENDBUF; 152 raw = !mbwide(); 153 for (;;) 154 { 155 cur = cp; 156 if (raw) 157 while (!(n = states[*cp++])); 158 else 159 for (;;) 160 { 161 while (!(n = states[*cp++])); 162 if (n < T_CONTROL) 163 break; 164 if ((m = mbsize(pp = cp - 1)) > 1) 165 cp += m - 1; 166 else 167 { 168 if (m <= 0) 169 { 170 if (cur == pp) 171 { 172 if (last > 0) 173 { 174 *end = last; 175 last = -1; 176 c = end - pp + 1; 177 if ((m = mbsize(pp)) == c) 178 { 179 any = 1; 180 if (header) 181 { 182 header = 0; 183 sfprintf(op, "%6d\t", line); 184 } 185 sfwrite(op, cur, m); 186 *(cp = cur = end) = 0; 187 } 188 else 189 { 190 memcpy(tmp, pp, c); 191 if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0))) 192 { 193 states[0] = sfvalue(ip) ? T_ERROR : T_EOF; 194 *(cp = end = tmp + sizeof(tmp) - 1) = 0; 195 last = -1; 196 } 197 else if ((n = sfvalue(ip)) <= 0) 198 { 199 states[0] = n ? T_ERROR : T_EOF; 200 *(cp = end = tmp + sizeof(tmp) - 1) = 0; 201 last = -1; 202 } 203 else 204 { 205 cp = buf = nxt; 206 end = buf + n - 1; 207 last = *end; 208 *end = 0; 209 } 210 mb: 211 if ((n = end - cp + 1) >= (sizeof(tmp) - c)) 212 n = sizeof(tmp) - c - 1; 213 memcpy(tmp + c, cp, n); 214 if ((m = mbsize(tmp)) >= c) 215 { 216 any = 1; 217 if (header) 218 { 219 header = 0; 220 sfprintf(op, "%6d\t", line); 221 } 222 sfwrite(op, tmp, m); 223 cur = cp += m - c; 224 } 225 } 226 continue; 227 } 228 } 229 else 230 { 231 cp = pp + 1; 232 n = 0; 233 } 234 } 235 break; 236 } 237 } 238 c = *--cp; 239 if ((m = cp - cur) || n >= T_CONTROL) 240 { 241 flush: 242 any = 1; 243 if (header) 244 { 245 header = 0; 246 sfprintf(op, "%6d\t", line); 247 } 248 if (m) 249 sfwrite(op, cur, m); 250 } 251 special: 252 switch (n) 253 { 254 case T_ERROR: 255 if (cp != end) 256 { 257 n = T_CONTROL; 258 goto flush; 259 } 260 return -1; 261 case T_EOF: 262 if (cp != end) 263 { 264 n = T_CONTROL; 265 goto flush; 266 } 267 return 0; 268 case T_ENDBUF: 269 if (cp != end) 270 { 271 n = T_CONTROL; 272 goto flush; 273 } 274 c = last; 275 if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0))) 276 { 277 *(cp = end = tmp) = 0; 278 states[0] = sfvalue(ip) ? T_ERROR : T_EOF; 279 last = -1; 280 } 281 else if ((m = sfvalue(ip)) <= 0) 282 { 283 *(cp = end = tmp) = 0; 284 states[0] = m ? T_ERROR : T_EOF; 285 last = -1; 286 } 287 else 288 { 289 buf = nxt; 290 end = buf + m - 1; 291 last = *end; 292 *end = 0; 293 cp = buf; 294 } 295 if (c >= 0) 296 { 297 if (!(n = states[c])) 298 { 299 *(cur = tmp) = c; 300 m = 1; 301 goto flush; 302 } 303 if (raw || n < T_CONTROL) 304 { 305 cp--; 306 goto special; 307 } 308 tmp[0] = c; 309 c = 1; 310 goto mb; 311 } 312 break; 313 case T_CONTROL: 314 do 315 { 316 sfputc(op, '^'); 317 sfputc(op, printof(c)); 318 } while (states[c = *++cp] == T_CONTROL); 319 break; 320 case T_CNTL8BIT: 321 meta[2] = '^'; 322 do 323 { 324 n = c & ~0200; 325 meta[3] = printof(n); 326 sfwrite(op, (char*)meta, 4); 327 } while (states[c = *++cp] == T_CNTL8BIT && raw); 328 break; 329 case T_EIGHTBIT: 330 do 331 { 332 meta[2] = c & ~0200; 333 sfwrite(op, (char*)meta, 3); 334 } while (states[c = *++cp] == T_EIGHTBIT && raw); 335 break; 336 case T_NEWLINE: 337 if (header && !(flags & B_FLAG)) 338 sfprintf(op, "%6d\t", line); 339 if (flags & E_FLAG) 340 sfputc(op, '$'); 341 sfputc(op, '\n'); 342 if (!header || !(flags & B_FLAG)) 343 line++; 344 header = !(flags & S_FLAG); 345 for (;;) 346 { 347 if ((n = states[*++cp]) == T_ENDBUF) 348 { 349 if (cp != end || last != '\n') 350 break; 351 if (!(nxt = (unsigned char*)(*reserve)(ip, SF_UNBOUND, 0))) 352 { 353 states[0] = sfvalue(ip) ? T_ERROR : T_EOF; 354 cp = end = tmp; 355 *cp-- = 0; 356 last = -1; 357 } 358 else if ((n = sfvalue(ip)) <= 0) 359 { 360 states[0] = n ? T_ERROR : T_EOF; 361 cp = end = tmp; 362 *cp-- = 0; 363 last = -1; 364 } 365 else 366 { 367 buf = nxt; 368 end = buf + n - 1; 369 last = *end; 370 *end = 0; 371 cp = buf - 1; 372 } 373 } 374 else if (n != T_NEWLINE) 375 break; 376 if (!(flags & S_FLAG) || any || header) 377 { 378 any = 0; 379 header = 0; 380 if ((flags & (B_FLAG|N_FLAG)) == N_FLAG) 381 sfprintf(op, "%6d\t", line); 382 if (flags & E_FLAG) 383 sfputc(op, '$'); 384 sfputc(op, '\n'); 385 } 386 if (!(flags & B_FLAG)) 387 line++; 388 } 389 header = flags & (B_FLAG|N_FLAG); 390 break; 391 } 392 } 393 } 394 395 int 396 b_cat(int argc, char** argv, void* context) 397 { 398 register int n; 399 register int flags = 0; 400 register char* cp; 401 register Sfio_t* fp; 402 char* mode; 403 Reserve_f reserve = sfreserve; 404 int att; 405 int dovcat = 0; 406 char states[UCHAR_MAX+1]; 407 408 cmdinit(argc, argv, context, ERROR_CATALOG, 0); 409 att = !strcmp(astconf("UNIVERSE", NiL, NiL), "att"); 410 mode = "r"; 411 for (;;) 412 { 413 n = 0; 414 switch (optget(argv, usage)) 415 { 416 case 'A': 417 n = T_FLAG|E_FLAG|V_FLAG; 418 break; 419 case 'B': 420 n = S_FLAG; 421 break; 422 case 'b': 423 n = B_FLAG; 424 break; 425 case 'd': 426 mode = opt_info.num ? "rt" : "r"; 427 continue; 428 case 'D': 429 n = d_FLAG; 430 break; 431 case 'E': 432 n = E_FLAG; 433 break; 434 case 'e': 435 n = E_FLAG|V_FLAG; 436 break; 437 case 'n': 438 n = N_FLAG; 439 break; 440 case 'R': 441 reserve = opt_info.num ? regress : sfreserve; 442 continue; 443 case 's': 444 n = att ? F_FLAG : S_FLAG; 445 break; 446 case 'S': 447 n = F_FLAG; 448 break; 449 case 'T': 450 n = T_FLAG; 451 break; 452 case 't': 453 n = T_FLAG|V_FLAG; 454 break; 455 case 'u': 456 n = U_FLAG; 457 break; 458 case 'v': 459 n = V_FLAG; 460 break; 461 case ':': 462 error(2, "%s", opt_info.arg); 463 break; 464 case '?': 465 error(ERROR_usage(2), "%s", opt_info.arg); 466 break; 467 } 468 if (!n) 469 break; 470 if (opt_info.num) 471 flags |= n; 472 else 473 flags &= ~n; 474 } 475 argv += opt_info.index; 476 if (error_info.errors) 477 error(ERROR_usage(2), "%s", optusage(NiL)); 478 memset(states, 0, sizeof(states)); 479 if (flags&V_FLAG) 480 { 481 memset(states, T_CONTROL, ' '); 482 states[RUBOUT] = T_CONTROL; 483 memset(states+0200, T_EIGHTBIT, 0200); 484 memset(states+0200, T_CNTL8BIT, ' '); 485 states[RUBOUT|0200] = T_CNTL8BIT; 486 states['\n'] = 0; 487 } 488 if (flags&T_FLAG) 489 states['\t'] = T_CONTROL; 490 states[0] = T_ENDBUF; 491 if (att) 492 { 493 if (flags&V_FLAG) 494 { 495 states['\n'|0200] = T_EIGHTBIT; 496 if (!(flags&T_FLAG)) 497 { 498 states['\t'] = states['\f'] = 0; 499 states['\t'|0200] = states['\f'|0200] = T_EIGHTBIT; 500 } 501 } 502 } 503 else if (flags) 504 { 505 if (!(flags&T_FLAG)) 506 states['\t'] = 0; 507 } 508 if (flags&(V_FLAG|T_FLAG|N_FLAG|E_FLAG|B_FLAG|S_FLAG)) 509 { 510 states['\n'] = T_NEWLINE; 511 dovcat = 1; 512 } 513 if (flags&d_FLAG) 514 sfopen(sfstdout, NiL, "wt"); 515 if (cp = *argv) 516 argv++; 517 do 518 { 519 if (!cp || streq(cp, "-")) 520 { 521 fp = sfstdin; 522 if (flags&D_FLAG) 523 sfopen(fp, NiL, mode); 524 } 525 else if (!(fp = sfopen(NiL, cp, mode))) 526 { 527 if (!(flags&F_FLAG)) 528 error(ERROR_system(0), "%s: cannot open", cp); 529 error_info.errors = 1; 530 continue; 531 } 532 if (flags&U_FLAG) 533 sfsetbuf(fp, (void*)fp, -1); 534 if (dovcat) 535 n = vcat(states, fp, sfstdout, reserve, flags); 536 else if (sfmove(fp, sfstdout, SF_UNBOUND, -1) >= 0 && sfeof(fp)) 537 n = 0; 538 else 539 n = -1; 540 if (fp != sfstdin) 541 sfclose(fp); 542 if (n < 0 && errno != EPIPE) 543 { 544 if (cp) 545 error(ERROR_system(0), "%s: read error", cp); 546 else 547 error(ERROR_system(0), "read error"); 548 } 549 if (sferror(sfstdout)) 550 break; 551 } while (cp = *argv++); 552 if (sfsync(sfstdout)) 553 error(ERROR_system(0), "write error"); 554 if (flags&d_FLAG) 555 sfopen(sfstdout, NiL, "w"); 556 return error_info.errors; 557 } 558