1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of the Memorial University of Newfoundland. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #ifndef lint 34 static const char copyright[] = 35 "@(#) Copyright (c) 1990, 1993, 1994\n\ 36 The Regents of the University of California. All rights reserved.\n"; 37 #endif 38 39 #if 0 40 #ifndef lint 41 static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95"; 42 #endif 43 #endif 44 45 #include <sys/cdefs.h> 46 __FBSDID("$FreeBSD$"); 47 48 #include <sys/capsicum.h> 49 50 #include <err.h> 51 #include <errno.h> 52 #include <locale.h> 53 #include <stdio.h> 54 #include <stdlib.h> 55 #include <string.h> 56 #include <termios.h> 57 #include <unistd.h> 58 #include <wchar.h> 59 #include <wctype.h> 60 61 #define BS '\b' /* backspace */ 62 #define TAB '\t' /* tab */ 63 #define SPACE ' ' /* space */ 64 #define NL '\n' /* newline */ 65 #define CR '\r' /* carriage return */ 66 #define ESC '\033' /* escape */ 67 #define SI '\017' /* shift in to normal character set */ 68 #define SO '\016' /* shift out to alternate character set */ 69 #define VT '\013' /* vertical tab (aka reverse line feed) */ 70 #define RLF '7' /* ESC-7 reverse line feed */ 71 #define RHLF '8' /* ESC-8 reverse half-line feed */ 72 #define FHLF '9' /* ESC-9 forward half-line feed */ 73 74 /* build up at least this many lines before flushing them out */ 75 #define BUFFER_MARGIN 32 76 77 typedef char CSET; 78 79 typedef struct char_str { 80 #define CS_NORMAL 1 81 #define CS_ALTERNATE 2 82 short c_column; /* column character is in */ 83 CSET c_set; /* character set (currently only 2) */ 84 wchar_t c_char; /* character in question */ 85 int c_width; /* character width */ 86 } CHAR; 87 88 typedef struct line_str LINE; 89 struct line_str { 90 CHAR *l_line; /* characters on the line */ 91 LINE *l_prev; /* previous line */ 92 LINE *l_next; /* next line */ 93 int l_lsize; /* allocated sizeof l_line */ 94 int l_line_len; /* strlen(l_line) */ 95 int l_needs_sort; /* set if chars went in out of order */ 96 int l_max_col; /* max column in the line */ 97 }; 98 99 static void addto_lineno(int *, int); 100 static LINE *alloc_line(void); 101 static void dowarn(int); 102 static void flush_line(LINE *); 103 static void flush_lines(int); 104 static void flush_blanks(void); 105 static void free_line(LINE *); 106 static void usage(void); 107 108 static CSET last_set; /* char_set of last char printed */ 109 static LINE *lines; 110 static int compress_spaces; /* if doing space -> tab conversion */ 111 static int fine; /* if `fine' resolution (half lines) */ 112 static int max_bufd_lines; /* max # of half lines to keep in memory */ 113 static int nblank_lines; /* # blanks after last flushed line */ 114 static int no_backspaces; /* if not to output any backspaces */ 115 static int pass_unknown_seqs; /* pass unknown control sequences */ 116 117 #define PUTC(ch) \ 118 do { \ 119 if (putwchar(ch) == WEOF) \ 120 errx(1, "write error"); \ 121 } while (0) 122 123 int 124 main(int argc, char **argv) 125 { 126 wint_t ch; 127 CHAR *c; 128 CSET cur_set; /* current character set */ 129 LINE *l; /* current line */ 130 int extra_lines; /* # of lines above first line */ 131 int cur_col; /* current column */ 132 int cur_line; /* line number of current position */ 133 int max_line; /* max value of cur_line */ 134 int this_line; /* line l points to */ 135 int nflushd_lines; /* number of lines that were flushed */ 136 int adjust, opt, warned, width; 137 const char *errstr; 138 cap_rights_t rights; 139 unsigned long cmd; 140 141 (void)setlocale(LC_CTYPE, ""); 142 143 cap_rights_init(&rights, CAP_FSTAT, CAP_READ); 144 if (cap_rights_limit(STDIN_FILENO, &rights) < 0 && errno != ENOSYS) 145 err(1, "unable to limit rights for stdin"); 146 cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE, CAP_IOCTL); 147 if (cap_rights_limit(STDOUT_FILENO, &rights) < 0 && errno != ENOSYS) 148 err(1, "unable to limit rights for stdout"); 149 cmd = TIOCGETA; /* required by isatty(3) in printf(3) */ 150 if (cap_ioctls_limit(STDOUT_FILENO, &cmd, 1) < 0 && errno != ENOSYS) 151 err(1, "unable to limit ioctls for stdout"); 152 153 if (cap_enter() < 0 && errno != ENOSYS) 154 err(1, "unable to enter capability mode"); 155 156 max_bufd_lines = 256; 157 compress_spaces = 1; /* compress spaces into tabs */ 158 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 159 switch (opt) { 160 case 'b': /* do not output backspaces */ 161 no_backspaces = 1; 162 break; 163 case 'f': /* allow half forward line feeds */ 164 fine = 1; 165 break; 166 case 'h': /* compress spaces into tabs */ 167 compress_spaces = 1; 168 break; 169 case 'l': /* buffered line count */ 170 max_bufd_lines = strtonum(optarg, 1, 171 (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2; 172 if (errstr != NULL) 173 errx(1, "bad -l argument, %s: %s", errstr, 174 optarg); 175 break; 176 case 'p': /* pass unknown control sequences */ 177 pass_unknown_seqs = 1; 178 break; 179 case 'x': /* do not compress spaces into tabs */ 180 compress_spaces = 0; 181 break; 182 case '?': 183 default: 184 usage(); 185 } 186 187 if (optind != argc) 188 usage(); 189 190 adjust = cur_col = extra_lines = warned = 0; 191 cur_line = max_line = nflushd_lines = this_line = 0; 192 cur_set = last_set = CS_NORMAL; 193 lines = l = alloc_line(); 194 195 while ((ch = getwchar()) != WEOF) { 196 if (!iswgraph(ch)) { 197 switch (ch) { 198 case BS: /* can't go back further */ 199 if (cur_col == 0) 200 continue; 201 --cur_col; 202 continue; 203 case CR: 204 cur_col = 0; 205 continue; 206 case ESC: /* just ignore EOF */ 207 switch(getwchar()) { 208 /* 209 * In the input stream, accept both the 210 * XPG5 sequences ESC-digit and the 211 * traditional BSD sequences ESC-ctrl. 212 */ 213 case '\007': 214 /* FALLTHROUGH */ 215 case RLF: 216 addto_lineno(&cur_line, -2); 217 break; 218 case '\010': 219 /* FALLTHROUGH */ 220 case RHLF: 221 addto_lineno(&cur_line, -1); 222 break; 223 case '\011': 224 /* FALLTHROUGH */ 225 case FHLF: 226 addto_lineno(&cur_line, 1); 227 if (cur_line > max_line) 228 max_line = cur_line; 229 } 230 continue; 231 case NL: 232 addto_lineno(&cur_line, 2); 233 if (cur_line > max_line) 234 max_line = cur_line; 235 cur_col = 0; 236 continue; 237 case SPACE: 238 ++cur_col; 239 continue; 240 case SI: 241 cur_set = CS_NORMAL; 242 continue; 243 case SO: 244 cur_set = CS_ALTERNATE; 245 continue; 246 case TAB: /* adjust column */ 247 cur_col |= 7; 248 ++cur_col; 249 continue; 250 case VT: 251 addto_lineno(&cur_line, -2); 252 continue; 253 } 254 if (iswspace(ch)) { 255 if ((width = wcwidth(ch)) > 0) 256 cur_col += width; 257 continue; 258 } 259 if (!pass_unknown_seqs) 260 continue; 261 } 262 263 /* Must stuff ch in a line - are we at the right one? */ 264 if (cur_line + adjust != this_line) { 265 LINE *lnew; 266 267 /* round up to next line */ 268 adjust = !fine && (cur_line & 1); 269 270 if (cur_line + adjust < this_line) { 271 while (cur_line + adjust < this_line && 272 l->l_prev != NULL) { 273 l = l->l_prev; 274 this_line--; 275 } 276 if (cur_line + adjust < this_line) { 277 if (nflushd_lines == 0) { 278 /* 279 * Allow backup past first 280 * line if nothing has been 281 * flushed yet. 282 */ 283 while (cur_line + adjust 284 < this_line) { 285 lnew = alloc_line(); 286 l->l_prev = lnew; 287 lnew->l_next = l; 288 l = lines = lnew; 289 extra_lines++; 290 this_line--; 291 } 292 } else { 293 if (!warned++) 294 dowarn(cur_line); 295 cur_line = this_line - adjust; 296 } 297 } 298 } else { 299 /* may need to allocate here */ 300 while (cur_line + adjust > this_line) { 301 if (l->l_next == NULL) { 302 l->l_next = alloc_line(); 303 l->l_next->l_prev = l; 304 } 305 l = l->l_next; 306 this_line++; 307 } 308 } 309 if (this_line > nflushd_lines && 310 this_line - nflushd_lines >= 311 max_bufd_lines + BUFFER_MARGIN) { 312 if (extra_lines) { 313 flush_lines(extra_lines); 314 extra_lines = 0; 315 } 316 flush_lines(this_line - nflushd_lines - 317 max_bufd_lines); 318 nflushd_lines = this_line - max_bufd_lines; 319 } 320 } 321 /* grow line's buffer? */ 322 if (l->l_line_len + 1 >= l->l_lsize) { 323 int need; 324 325 need = l->l_lsize ? l->l_lsize * 2 : 90; 326 if ((l->l_line = realloc(l->l_line, 327 (unsigned)need * sizeof(CHAR))) == NULL) 328 err(1, NULL); 329 l->l_lsize = need; 330 } 331 c = &l->l_line[l->l_line_len++]; 332 c->c_char = ch; 333 c->c_set = cur_set; 334 c->c_column = cur_col; 335 c->c_width = wcwidth(ch); 336 /* 337 * If things are put in out of order, they will need sorting 338 * when it is flushed. 339 */ 340 if (cur_col < l->l_max_col) 341 l->l_needs_sort = 1; 342 else 343 l->l_max_col = cur_col; 344 if (c->c_width > 0) 345 cur_col += c->c_width; 346 } 347 if (ferror(stdin)) 348 err(1, NULL); 349 if (extra_lines) 350 flush_lines(extra_lines); 351 352 /* goto the last line that had a character on it */ 353 for (; l->l_next; l = l->l_next) 354 this_line++; 355 flush_lines(this_line - nflushd_lines + 1); 356 357 /* make sure we leave things in a sane state */ 358 if (last_set != CS_NORMAL) 359 PUTC(SI); 360 361 /* flush out the last few blank lines */ 362 if (max_line > this_line) 363 nblank_lines = max_line - this_line; 364 if (max_line & 1) 365 nblank_lines++; 366 flush_blanks(); 367 exit(0); 368 } 369 370 static void 371 flush_lines(int nflush) 372 { 373 LINE *l; 374 375 while (--nflush >= 0) { 376 l = lines; 377 lines = l->l_next; 378 if (l->l_line) { 379 flush_blanks(); 380 flush_line(l); 381 } 382 if (l->l_line || l->l_next) 383 nblank_lines++; 384 if (l->l_line) 385 (void)free(l->l_line); 386 free_line(l); 387 } 388 if (lines) 389 lines->l_prev = NULL; 390 } 391 392 /* 393 * Print a number of newline/half newlines. If fine flag is set, nblank_lines 394 * is the number of half line feeds, otherwise it is the number of whole line 395 * feeds. 396 */ 397 static void 398 flush_blanks(void) 399 { 400 int half, i, nb; 401 402 half = 0; 403 nb = nblank_lines; 404 if (nb & 1) { 405 if (fine) 406 half = 1; 407 else 408 nb++; 409 } 410 nb /= 2; 411 for (i = nb; --i >= 0;) 412 PUTC('\n'); 413 if (half) { 414 PUTC(ESC); 415 PUTC(FHLF); 416 if (!nb) 417 PUTC('\r'); 418 } 419 nblank_lines = 0; 420 } 421 422 /* 423 * Write a line to stdout taking care of space to tab conversion (-h flag) 424 * and character set shifts. 425 */ 426 static void 427 flush_line(LINE *l) 428 { 429 CHAR *c, *endc; 430 int i, j, nchars, last_col, save, this_col, tot; 431 432 last_col = 0; 433 nchars = l->l_line_len; 434 435 if (l->l_needs_sort) { 436 static CHAR *sorted; 437 static int count_size, *count, sorted_size; 438 439 /* 440 * Do an O(n) sort on l->l_line by column being careful to 441 * preserve the order of characters in the same column. 442 */ 443 if (l->l_lsize > sorted_size) { 444 sorted_size = l->l_lsize; 445 if ((sorted = realloc(sorted, 446 (unsigned)sizeof(CHAR) * sorted_size)) == NULL) 447 err(1, NULL); 448 } 449 if (l->l_max_col >= count_size) { 450 count_size = l->l_max_col + 1; 451 if ((count = realloc(count, 452 (unsigned)sizeof(int) * count_size)) == NULL) 453 err(1, NULL); 454 } 455 memset(count, 0, sizeof(int) * l->l_max_col + 1); 456 for (i = nchars, c = l->l_line; --i >= 0; c++) 457 count[c->c_column]++; 458 459 /* 460 * calculate running total (shifted down by 1) to use as 461 * indices into new line. 462 */ 463 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 464 save = count[i]; 465 count[i] = tot; 466 tot += save; 467 } 468 469 for (i = nchars, c = l->l_line; --i >= 0; c++) 470 sorted[count[c->c_column]++] = *c; 471 c = sorted; 472 } else 473 c = l->l_line; 474 while (nchars > 0) { 475 this_col = c->c_column; 476 endc = c; 477 do { 478 ++endc; 479 } while (--nchars > 0 && this_col == endc->c_column); 480 481 /* if -b only print last character */ 482 if (no_backspaces) { 483 c = endc - 1; 484 if (nchars > 0 && 485 this_col + c->c_width > endc->c_column) 486 continue; 487 } 488 489 if (this_col > last_col) { 490 int nspace = this_col - last_col; 491 492 if (compress_spaces && nspace > 1) { 493 while (1) { 494 int tab_col, tab_size; 495 496 tab_col = (last_col + 8) & ~7; 497 if (tab_col > this_col) 498 break; 499 tab_size = tab_col - last_col; 500 if (tab_size == 1) 501 PUTC(' '); 502 else 503 PUTC('\t'); 504 nspace -= tab_size; 505 last_col = tab_col; 506 } 507 } 508 while (--nspace >= 0) 509 PUTC(' '); 510 last_col = this_col; 511 } 512 513 for (;;) { 514 if (c->c_set != last_set) { 515 switch (c->c_set) { 516 case CS_NORMAL: 517 PUTC(SI); 518 break; 519 case CS_ALTERNATE: 520 PUTC(SO); 521 } 522 last_set = c->c_set; 523 } 524 PUTC(c->c_char); 525 if ((c + 1) < endc) 526 for (j = 0; j < c->c_width; j++) 527 PUTC('\b'); 528 if (++c >= endc) 529 break; 530 } 531 last_col += (c - 1)->c_width; 532 } 533 } 534 535 /* 536 * Increment or decrement a line number, checking for overflow. 537 * Stop one below INT_MAX such that the adjust variable is safe. 538 */ 539 void 540 addto_lineno(int *lno, int offset) 541 { 542 if (offset > 0) { 543 if (*lno >= INT_MAX - offset) 544 errx(1, "too many lines"); 545 } else { 546 if (*lno < INT_MIN - offset) 547 errx(1, "too many reverse line feeds"); 548 } 549 *lno += offset; 550 } 551 552 #define NALLOC 64 553 554 static LINE *line_freelist; 555 556 static LINE * 557 alloc_line(void) 558 { 559 LINE *l; 560 int i; 561 562 if (!line_freelist) { 563 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL) 564 err(1, NULL); 565 line_freelist = l; 566 for (i = 1; i < NALLOC; i++, l++) 567 l->l_next = l + 1; 568 l->l_next = NULL; 569 } 570 l = line_freelist; 571 line_freelist = l->l_next; 572 573 memset(l, 0, sizeof(LINE)); 574 return (l); 575 } 576 577 static void 578 free_line(LINE *l) 579 { 580 581 l->l_next = line_freelist; 582 line_freelist = l; 583 } 584 585 static void 586 usage(void) 587 { 588 589 (void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n"); 590 exit(1); 591 } 592 593 static void 594 dowarn(int line) 595 { 596 597 warnx("warning: can't back up %s", 598 line < 0 ? "past first line" : "-- line already flushed"); 599 } 600