1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1990, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Michael Rendell of the Memorial University of Newfoundland. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #ifndef lint 36 static const char copyright[] = 37 "@(#) Copyright (c) 1990, 1993, 1994\n\ 38 The Regents of the University of California. All rights reserved.\n"; 39 #endif 40 41 #if 0 42 #endif 43 44 #include <sys/cdefs.h> 45 #include <sys/capsicum.h> 46 47 #include <capsicum_helpers.h> 48 #include <err.h> 49 #include <errno.h> 50 #include <locale.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <termios.h> 55 #include <unistd.h> 56 #include <wchar.h> 57 #include <wctype.h> 58 59 #define BS '\b' /* backspace */ 60 #define TAB '\t' /* tab */ 61 #define SPACE ' ' /* space */ 62 #define NL '\n' /* newline */ 63 #define CR '\r' /* carriage return */ 64 #define ESC '\033' /* escape */ 65 #define SI '\017' /* shift in to normal character set */ 66 #define SO '\016' /* shift out to alternate character set */ 67 #define VT '\013' /* vertical tab (aka reverse line feed) */ 68 #define RLF '7' /* ESC-7 reverse line feed */ 69 #define RHLF '8' /* ESC-8 reverse half-line feed */ 70 #define FHLF '9' /* ESC-9 forward half-line feed */ 71 72 /* build up at least this many lines before flushing them out */ 73 #define BUFFER_MARGIN 32 74 75 typedef char CSET; 76 77 typedef struct char_str { 78 #define CS_NORMAL 1 79 #define CS_ALTERNATE 2 80 short c_column; /* column character is in */ 81 CSET c_set; /* character set (currently only 2) */ 82 wchar_t c_char; /* character in question */ 83 int c_width; /* character width */ 84 } CHAR; 85 86 typedef struct line_str LINE; 87 struct line_str { 88 CHAR *l_line; /* characters on the line */ 89 LINE *l_prev; /* previous line */ 90 LINE *l_next; /* next line */ 91 int l_lsize; /* allocated sizeof l_line */ 92 int l_line_len; /* strlen(l_line) */ 93 int l_needs_sort; /* set if chars went in out of order */ 94 int l_max_col; /* max column in the line */ 95 }; 96 97 static void addto_lineno(int *, int); 98 static LINE *alloc_line(void); 99 static void dowarn(int); 100 static void flush_line(LINE *); 101 static void flush_lines(int); 102 static void flush_blanks(void); 103 static void free_line(LINE *); 104 static void usage(void); 105 106 static CSET last_set; /* char_set of last char printed */ 107 static LINE *lines; 108 static int compress_spaces; /* if doing space -> tab conversion */ 109 static int fine; /* if `fine' resolution (half lines) */ 110 static int max_bufd_lines; /* max # of half lines to keep in memory */ 111 static int nblank_lines; /* # blanks after last flushed line */ 112 static int no_backspaces; /* if not to output any backspaces */ 113 static int pass_unknown_seqs; /* pass unknown control sequences */ 114 115 #define PUTC(ch) \ 116 do { \ 117 if (putwchar(ch) == WEOF) \ 118 errx(1, "write error"); \ 119 } while (0) 120 121 int 122 main(int argc, char **argv) 123 { 124 wint_t ch; 125 CHAR *c; 126 CSET cur_set; /* current character set */ 127 LINE *l; /* current line */ 128 int extra_lines; /* # of lines above first line */ 129 int cur_col; /* current column */ 130 int cur_line; /* line number of current position */ 131 int max_line; /* max value of cur_line */ 132 int this_line; /* line l points to */ 133 int nflushd_lines; /* number of lines that were flushed */ 134 int adjust, opt, warned, width; 135 const char *errstr; 136 137 (void)setlocale(LC_CTYPE, ""); 138 139 if (caph_limit_stdio() == -1) 140 err(1, "unable to limit stdio"); 141 142 if (caph_enter() < 0) 143 err(1, "unable to enter capability mode"); 144 145 max_bufd_lines = 256; 146 compress_spaces = 1; /* compress spaces into tabs */ 147 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 148 switch (opt) { 149 case 'b': /* do not output backspaces */ 150 no_backspaces = 1; 151 break; 152 case 'f': /* allow half forward line feeds */ 153 fine = 1; 154 break; 155 case 'h': /* compress spaces into tabs */ 156 compress_spaces = 1; 157 break; 158 case 'l': /* buffered line count */ 159 max_bufd_lines = strtonum(optarg, 1, 160 (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2; 161 if (errstr != NULL) 162 errx(1, "bad -l argument, %s: %s", errstr, 163 optarg); 164 break; 165 case 'p': /* pass unknown control sequences */ 166 pass_unknown_seqs = 1; 167 break; 168 case 'x': /* do not compress spaces into tabs */ 169 compress_spaces = 0; 170 break; 171 case '?': 172 default: 173 usage(); 174 } 175 176 if (optind != argc) 177 usage(); 178 179 adjust = cur_col = extra_lines = warned = 0; 180 cur_line = max_line = nflushd_lines = this_line = 0; 181 cur_set = last_set = CS_NORMAL; 182 lines = l = alloc_line(); 183 184 while ((ch = getwchar()) != WEOF) { 185 if (!iswgraph(ch)) { 186 switch (ch) { 187 case BS: /* can't go back further */ 188 if (cur_col == 0) 189 continue; 190 --cur_col; 191 continue; 192 case CR: 193 cur_col = 0; 194 continue; 195 case ESC: /* just ignore EOF */ 196 switch(getwchar()) { 197 /* 198 * In the input stream, accept both the 199 * XPG5 sequences ESC-digit and the 200 * traditional BSD sequences ESC-ctrl. 201 */ 202 case '\007': 203 /* FALLTHROUGH */ 204 case RLF: 205 addto_lineno(&cur_line, -2); 206 break; 207 case '\010': 208 /* FALLTHROUGH */ 209 case RHLF: 210 addto_lineno(&cur_line, -1); 211 break; 212 case '\011': 213 /* FALLTHROUGH */ 214 case FHLF: 215 addto_lineno(&cur_line, 1); 216 if (cur_line > max_line) 217 max_line = cur_line; 218 } 219 continue; 220 case NL: 221 addto_lineno(&cur_line, 2); 222 if (cur_line > max_line) 223 max_line = cur_line; 224 cur_col = 0; 225 continue; 226 case SPACE: 227 ++cur_col; 228 continue; 229 case SI: 230 cur_set = CS_NORMAL; 231 continue; 232 case SO: 233 cur_set = CS_ALTERNATE; 234 continue; 235 case TAB: /* adjust column */ 236 cur_col |= 7; 237 ++cur_col; 238 continue; 239 case VT: 240 addto_lineno(&cur_line, -2); 241 continue; 242 } 243 if (iswspace(ch)) { 244 if ((width = wcwidth(ch)) > 0) 245 cur_col += width; 246 continue; 247 } 248 if (!pass_unknown_seqs) 249 continue; 250 } 251 252 /* Must stuff ch in a line - are we at the right one? */ 253 if (cur_line + adjust != this_line) { 254 LINE *lnew; 255 256 /* round up to next line */ 257 adjust = !fine && (cur_line & 1); 258 259 if (cur_line + adjust < this_line) { 260 while (cur_line + adjust < this_line && 261 l->l_prev != NULL) { 262 l = l->l_prev; 263 this_line--; 264 } 265 if (cur_line + adjust < this_line) { 266 if (nflushd_lines == 0) { 267 /* 268 * Allow backup past first 269 * line if nothing has been 270 * flushed yet. 271 */ 272 while (cur_line + adjust 273 < this_line) { 274 lnew = alloc_line(); 275 l->l_prev = lnew; 276 lnew->l_next = l; 277 l = lines = lnew; 278 extra_lines++; 279 this_line--; 280 } 281 } else { 282 if (!warned++) 283 dowarn(cur_line); 284 cur_line = this_line - adjust; 285 } 286 } 287 } else { 288 /* may need to allocate here */ 289 while (cur_line + adjust > this_line) { 290 if (l->l_next == NULL) { 291 l->l_next = alloc_line(); 292 l->l_next->l_prev = l; 293 } 294 l = l->l_next; 295 this_line++; 296 } 297 } 298 if (this_line > nflushd_lines && 299 this_line - nflushd_lines >= 300 max_bufd_lines + BUFFER_MARGIN) { 301 if (extra_lines) { 302 flush_lines(extra_lines); 303 extra_lines = 0; 304 } 305 flush_lines(this_line - nflushd_lines - 306 max_bufd_lines); 307 nflushd_lines = this_line - max_bufd_lines; 308 } 309 } 310 /* grow line's buffer? */ 311 if (l->l_line_len + 1 >= l->l_lsize) { 312 int need; 313 314 need = l->l_lsize ? l->l_lsize * 2 : 90; 315 if ((l->l_line = realloc(l->l_line, 316 (unsigned)need * sizeof(CHAR))) == NULL) 317 err(1, NULL); 318 l->l_lsize = need; 319 } 320 c = &l->l_line[l->l_line_len++]; 321 c->c_char = ch; 322 c->c_set = cur_set; 323 c->c_column = cur_col; 324 c->c_width = wcwidth(ch); 325 /* 326 * If things are put in out of order, they will need sorting 327 * when it is flushed. 328 */ 329 if (cur_col < l->l_max_col) 330 l->l_needs_sort = 1; 331 else 332 l->l_max_col = cur_col; 333 if (c->c_width > 0) 334 cur_col += c->c_width; 335 } 336 if (ferror(stdin)) 337 err(1, NULL); 338 if (extra_lines) { 339 /* 340 * Extra lines only exist if no lines have been flushed 341 * yet. This means that 'lines' must point to line zero 342 * after we flush the extra lines. 343 */ 344 flush_lines(extra_lines); 345 l = lines; 346 this_line = 0; 347 } 348 349 /* goto the last line that had a character on it */ 350 for (; l->l_next; l = l->l_next) 351 this_line++; 352 flush_lines(this_line - nflushd_lines + 1); 353 354 /* make sure we leave things in a sane state */ 355 if (last_set != CS_NORMAL) 356 PUTC(SI); 357 358 /* flush out the last few blank lines */ 359 if (max_line >= this_line) 360 nblank_lines = max_line - this_line + (max_line & 1); 361 if (nblank_lines == 0) 362 /* end with a newline even if the source doesn't */ 363 nblank_lines = 2; 364 flush_blanks(); 365 exit(0); 366 } 367 368 /* 369 * Prints the first 'nflush' lines. Printed lines are freed. 370 * After this function returns, 'lines' points to the first 371 * of the remaining lines, and 'nblank_lines' will have the 372 * number of half line feeds between the final flushed line 373 * and the first remaining line. 374 */ 375 static void 376 flush_lines(int nflush) 377 { 378 LINE *l; 379 380 while (--nflush >= 0) { 381 l = lines; 382 lines = l->l_next; 383 if (l->l_line) { 384 flush_blanks(); 385 flush_line(l); 386 free(l->l_line); 387 } 388 if (l->l_next) 389 nblank_lines++; 390 free_line(l); 391 } 392 if (lines) 393 lines->l_prev = NULL; 394 } 395 396 /* 397 * Print a number of newline/half newlines. 398 * nblank_lines is the number of half line feeds. 399 */ 400 static void 401 flush_blanks(void) 402 { 403 int half, i, nb; 404 405 half = 0; 406 nb = nblank_lines; 407 if (nb & 1) { 408 if (fine) 409 half = 1; 410 else 411 nb++; 412 } 413 nb /= 2; 414 for (i = nb; --i >= 0;) 415 PUTC('\n'); 416 if (half) { 417 PUTC(ESC); 418 PUTC(FHLF); 419 if (!nb) 420 PUTC('\r'); 421 } 422 nblank_lines = 0; 423 } 424 425 /* 426 * Write a line to stdout taking care of space to tab conversion (-h flag) 427 * and character set shifts. 428 */ 429 static void 430 flush_line(LINE *l) 431 { 432 CHAR *c, *endc; 433 int i, j, nchars, last_col, save, this_col, tot; 434 435 last_col = 0; 436 nchars = l->l_line_len; 437 438 if (l->l_needs_sort) { 439 static CHAR *sorted; 440 static int count_size, *count, sorted_size; 441 442 /* 443 * Do an O(n) sort on l->l_line by column being careful to 444 * preserve the order of characters in the same column. 445 */ 446 if (l->l_lsize > sorted_size) { 447 sorted_size = l->l_lsize; 448 if ((sorted = realloc(sorted, 449 (unsigned)sizeof(CHAR) * sorted_size)) == NULL) 450 err(1, NULL); 451 } 452 if (l->l_max_col >= count_size) { 453 count_size = l->l_max_col + 1; 454 if ((count = realloc(count, 455 (unsigned)sizeof(int) * count_size)) == NULL) 456 err(1, NULL); 457 } 458 memset(count, 0, sizeof(int) * l->l_max_col + 1); 459 for (i = nchars, c = l->l_line; --i >= 0; c++) 460 count[c->c_column]++; 461 462 /* 463 * calculate running total (shifted down by 1) to use as 464 * indices into new line. 465 */ 466 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 467 save = count[i]; 468 count[i] = tot; 469 tot += save; 470 } 471 472 for (i = nchars, c = l->l_line; --i >= 0; c++) 473 sorted[count[c->c_column]++] = *c; 474 c = sorted; 475 } else 476 c = l->l_line; 477 while (nchars > 0) { 478 this_col = c->c_column; 479 endc = c; 480 do { 481 ++endc; 482 } while (--nchars > 0 && this_col == endc->c_column); 483 484 /* if -b only print last character */ 485 if (no_backspaces) { 486 c = endc - 1; 487 if (nchars > 0 && 488 this_col + c->c_width > endc->c_column) 489 continue; 490 } 491 492 if (this_col > last_col) { 493 int nspace = this_col - last_col; 494 495 if (compress_spaces && nspace > 1) { 496 while (1) { 497 int tab_col, tab_size; 498 499 tab_col = (last_col + 8) & ~7; 500 if (tab_col > this_col) 501 break; 502 tab_size = tab_col - last_col; 503 if (tab_size == 1) 504 PUTC(' '); 505 else 506 PUTC('\t'); 507 nspace -= tab_size; 508 last_col = tab_col; 509 } 510 } 511 while (--nspace >= 0) 512 PUTC(' '); 513 last_col = this_col; 514 } 515 516 for (;;) { 517 if (c->c_set != last_set) { 518 switch (c->c_set) { 519 case CS_NORMAL: 520 PUTC(SI); 521 break; 522 case CS_ALTERNATE: 523 PUTC(SO); 524 } 525 last_set = c->c_set; 526 } 527 PUTC(c->c_char); 528 if ((c + 1) < endc) 529 for (j = 0; j < c->c_width; j++) 530 PUTC('\b'); 531 if (++c >= endc) 532 break; 533 } 534 last_col += (c - 1)->c_width; 535 } 536 } 537 538 /* 539 * Increment or decrement a line number, checking for overflow. 540 * Stop one below INT_MAX such that the adjust variable is safe. 541 */ 542 void 543 addto_lineno(int *lno, int offset) 544 { 545 if (offset > 0) { 546 if (*lno >= INT_MAX - offset) 547 errx(1, "too many lines"); 548 } else { 549 if (*lno < INT_MIN - offset) 550 errx(1, "too many reverse line feeds"); 551 } 552 *lno += offset; 553 } 554 555 #define NALLOC 64 556 557 static LINE *line_freelist; 558 559 static LINE * 560 alloc_line(void) 561 { 562 LINE *l; 563 int i; 564 565 if (!line_freelist) { 566 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL) 567 err(1, NULL); 568 line_freelist = l; 569 for (i = 1; i < NALLOC; i++, l++) 570 l->l_next = l + 1; 571 l->l_next = NULL; 572 } 573 l = line_freelist; 574 line_freelist = l->l_next; 575 576 memset(l, 0, sizeof(LINE)); 577 return (l); 578 } 579 580 static void 581 free_line(LINE *l) 582 { 583 584 l->l_next = line_freelist; 585 line_freelist = l; 586 } 587 588 static void 589 usage(void) 590 { 591 592 (void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n"); 593 exit(1); 594 } 595 596 static void 597 dowarn(int line) 598 { 599 600 warnx("warning: can't back up %s", 601 line < 0 ? "past first line" : "-- line already flushed"); 602 } 603