1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of the Memorial University of Newfoundland. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #ifndef lint 34 static const char copyright[] = 35 "@(#) Copyright (c) 1990, 1993, 1994\n\ 36 The Regents of the University of California. All rights reserved.\n"; 37 #endif 38 39 #if 0 40 #ifndef lint 41 static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95"; 42 #endif 43 #endif 44 45 #include <sys/cdefs.h> 46 __FBSDID("$FreeBSD$"); 47 48 #include <err.h> 49 #include <locale.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <unistd.h> 54 #include <wchar.h> 55 #include <wctype.h> 56 57 #define BS '\b' /* backspace */ 58 #define TAB '\t' /* tab */ 59 #define SPACE ' ' /* space */ 60 #define NL '\n' /* newline */ 61 #define CR '\r' /* carriage return */ 62 #define ESC '\033' /* escape */ 63 #define SI '\017' /* shift in to normal character set */ 64 #define SO '\016' /* shift out to alternate character set */ 65 #define VT '\013' /* vertical tab (aka reverse line feed) */ 66 #define RLF '\007' /* ESC-07 reverse line feed */ 67 #define RHLF '\010' /* ESC-010 reverse half-line feed */ 68 #define FHLF '\011' /* ESC-011 forward half-line feed */ 69 70 /* build up at least this many lines before flushing them out */ 71 #define BUFFER_MARGIN 32 72 73 typedef char CSET; 74 75 typedef struct char_str { 76 #define CS_NORMAL 1 77 #define CS_ALTERNATE 2 78 short c_column; /* column character is in */ 79 CSET c_set; /* character set (currently only 2) */ 80 wchar_t c_char; /* character in question */ 81 int c_width; /* character width */ 82 } CHAR; 83 84 typedef struct line_str LINE; 85 struct line_str { 86 CHAR *l_line; /* characters on the line */ 87 LINE *l_prev; /* previous line */ 88 LINE *l_next; /* next line */ 89 int l_lsize; /* allocated sizeof l_line */ 90 int l_line_len; /* strlen(l_line) */ 91 int l_needs_sort; /* set if chars went in out of order */ 92 int l_max_col; /* max column in the line */ 93 }; 94 95 static LINE *alloc_line(void); 96 static void dowarn(int); 97 static void flush_line(LINE *); 98 static void flush_lines(int); 99 static void flush_blanks(void); 100 static void free_line(LINE *); 101 static void usage(void); 102 103 static CSET last_set; /* char_set of last char printed */ 104 static LINE *lines; 105 static int compress_spaces; /* if doing space -> tab conversion */ 106 static int fine; /* if `fine' resolution (half lines) */ 107 static int max_bufd_lines; /* max # lines to keep in memory */ 108 static int nblank_lines; /* # blanks after last flushed line */ 109 static int no_backspaces; /* if not to output any backspaces */ 110 static int pass_unknown_seqs; /* pass unknown control sequences */ 111 112 #define PUTC(ch) \ 113 do { \ 114 if (putwchar(ch) == WEOF) \ 115 errx(1, "write error"); \ 116 } while (0) 117 118 int 119 main(int argc, char **argv) 120 { 121 wint_t ch; 122 CHAR *c; 123 CSET cur_set; /* current character set */ 124 LINE *l; /* current line */ 125 int extra_lines; /* # of lines above first line */ 126 int cur_col; /* current column */ 127 int cur_line; /* line number of current position */ 128 int max_line; /* max value of cur_line */ 129 int this_line; /* line l points to */ 130 int nflushd_lines; /* number of lines that were flushed */ 131 int adjust, opt, warned, width; 132 133 (void)setlocale(LC_CTYPE, ""); 134 135 max_bufd_lines = 128; 136 compress_spaces = 1; /* compress spaces into tabs */ 137 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 138 switch (opt) { 139 case 'b': /* do not output backspaces */ 140 no_backspaces = 1; 141 break; 142 case 'f': /* allow half forward line feeds */ 143 fine = 1; 144 break; 145 case 'h': /* compress spaces into tabs */ 146 compress_spaces = 1; 147 break; 148 case 'l': /* buffered line count */ 149 if ((max_bufd_lines = atoi(optarg)) <= 0) 150 errx(1, "bad -l argument %s", optarg); 151 break; 152 case 'p': /* pass unknown control sequences */ 153 pass_unknown_seqs = 1; 154 break; 155 case 'x': /* do not compress spaces into tabs */ 156 compress_spaces = 0; 157 break; 158 case '?': 159 default: 160 usage(); 161 } 162 163 if (optind != argc) 164 usage(); 165 166 /* this value is in half lines */ 167 max_bufd_lines *= 2; 168 169 adjust = cur_col = extra_lines = warned = 0; 170 cur_line = max_line = nflushd_lines = this_line = 0; 171 cur_set = last_set = CS_NORMAL; 172 lines = l = alloc_line(); 173 174 while ((ch = getwchar()) != WEOF) { 175 if (!iswgraph(ch)) { 176 switch (ch) { 177 case BS: /* can't go back further */ 178 if (cur_col == 0) 179 continue; 180 --cur_col; 181 continue; 182 case CR: 183 cur_col = 0; 184 continue; 185 case ESC: /* just ignore EOF */ 186 switch(getwchar()) { 187 case RLF: 188 cur_line -= 2; 189 break; 190 case RHLF: 191 cur_line--; 192 break; 193 case FHLF: 194 cur_line++; 195 if (cur_line > max_line) 196 max_line = cur_line; 197 } 198 continue; 199 case NL: 200 cur_line += 2; 201 if (cur_line > max_line) 202 max_line = cur_line; 203 cur_col = 0; 204 continue; 205 case SPACE: 206 ++cur_col; 207 continue; 208 case SI: 209 cur_set = CS_NORMAL; 210 continue; 211 case SO: 212 cur_set = CS_ALTERNATE; 213 continue; 214 case TAB: /* adjust column */ 215 cur_col |= 7; 216 ++cur_col; 217 continue; 218 case VT: 219 cur_line -= 2; 220 continue; 221 } 222 if (iswspace(ch)) { 223 if ((width = wcwidth(ch)) > 0) 224 cur_col += width; 225 continue; 226 } 227 if (!pass_unknown_seqs) 228 continue; 229 } 230 231 /* Must stuff ch in a line - are we at the right one? */ 232 if (cur_line != this_line - adjust) { 233 LINE *lnew; 234 int nmove; 235 236 adjust = 0; 237 nmove = cur_line - this_line; 238 if (!fine) { 239 /* round up to next line */ 240 if (cur_line & 1) { 241 adjust = 1; 242 nmove++; 243 } 244 } 245 if (nmove < 0) { 246 for (; nmove < 0 && l->l_prev; nmove++) 247 l = l->l_prev; 248 if (nmove) { 249 if (nflushd_lines == 0) { 250 /* 251 * Allow backup past first 252 * line if nothing has been 253 * flushed yet. 254 */ 255 for (; nmove < 0; nmove++) { 256 lnew = alloc_line(); 257 l->l_prev = lnew; 258 lnew->l_next = l; 259 l = lines = lnew; 260 extra_lines++; 261 } 262 } else { 263 if (!warned++) 264 dowarn(cur_line); 265 cur_line -= nmove; 266 } 267 } 268 } else { 269 /* may need to allocate here */ 270 for (; nmove > 0 && l->l_next; nmove--) 271 l = l->l_next; 272 for (; nmove > 0; nmove--) { 273 lnew = alloc_line(); 274 lnew->l_prev = l; 275 l->l_next = lnew; 276 l = lnew; 277 } 278 } 279 this_line = cur_line + adjust; 280 nmove = this_line - nflushd_lines; 281 if (nmove >= max_bufd_lines + BUFFER_MARGIN) { 282 nflushd_lines += nmove - max_bufd_lines; 283 flush_lines(nmove - max_bufd_lines); 284 } 285 } 286 /* grow line's buffer? */ 287 if (l->l_line_len + 1 >= l->l_lsize) { 288 int need; 289 290 need = l->l_lsize ? l->l_lsize * 2 : 90; 291 if ((l->l_line = realloc(l->l_line, 292 (unsigned)need * sizeof(CHAR))) == NULL) 293 err(1, (char *)NULL); 294 l->l_lsize = need; 295 } 296 c = &l->l_line[l->l_line_len++]; 297 c->c_char = ch; 298 c->c_set = cur_set; 299 c->c_column = cur_col; 300 c->c_width = wcwidth(ch); 301 /* 302 * If things are put in out of order, they will need sorting 303 * when it is flushed. 304 */ 305 if (cur_col < l->l_max_col) 306 l->l_needs_sort = 1; 307 else 308 l->l_max_col = cur_col; 309 if (c->c_width > 0) 310 cur_col += c->c_width; 311 } 312 if (ferror(stdin)) 313 err(1, NULL); 314 if (max_line == 0) 315 exit(0); /* no lines, so just exit */ 316 317 /* goto the last line that had a character on it */ 318 for (; l->l_next; l = l->l_next) 319 this_line++; 320 flush_lines(this_line - nflushd_lines + extra_lines + 1); 321 322 /* make sure we leave things in a sane state */ 323 if (last_set != CS_NORMAL) 324 PUTC('\017'); 325 326 /* flush out the last few blank lines */ 327 nblank_lines = max_line - this_line; 328 if (max_line & 1) 329 nblank_lines++; 330 else if (!nblank_lines) 331 /* missing a \n on the last line? */ 332 nblank_lines = 2; 333 flush_blanks(); 334 exit(0); 335 } 336 337 static void 338 flush_lines(int nflush) 339 { 340 LINE *l; 341 342 while (--nflush >= 0) { 343 l = lines; 344 lines = l->l_next; 345 if (l->l_line) { 346 flush_blanks(); 347 flush_line(l); 348 } 349 nblank_lines++; 350 if (l->l_line) 351 (void)free(l->l_line); 352 free_line(l); 353 } 354 if (lines) 355 lines->l_prev = NULL; 356 } 357 358 /* 359 * Print a number of newline/half newlines. If fine flag is set, nblank_lines 360 * is the number of half line feeds, otherwise it is the number of whole line 361 * feeds. 362 */ 363 static void 364 flush_blanks(void) 365 { 366 int half, i, nb; 367 368 half = 0; 369 nb = nblank_lines; 370 if (nb & 1) { 371 if (fine) 372 half = 1; 373 else 374 nb++; 375 } 376 nb /= 2; 377 for (i = nb; --i >= 0;) 378 PUTC('\n'); 379 if (half) { 380 PUTC('\033'); 381 PUTC('\011'); 382 if (!nb) 383 PUTC('\r'); 384 } 385 nblank_lines = 0; 386 } 387 388 /* 389 * Write a line to stdout taking care of space to tab conversion (-h flag) 390 * and character set shifts. 391 */ 392 static void 393 flush_line(LINE *l) 394 { 395 CHAR *c, *endc; 396 int i, j, nchars, last_col, save, this_col, tot; 397 398 last_col = 0; 399 nchars = l->l_line_len; 400 401 if (l->l_needs_sort) { 402 static CHAR *sorted; 403 static int count_size, *count, sorted_size; 404 405 /* 406 * Do an O(n) sort on l->l_line by column being careful to 407 * preserve the order of characters in the same column. 408 */ 409 if (l->l_lsize > sorted_size) { 410 sorted_size = l->l_lsize; 411 if ((sorted = realloc(sorted, 412 (unsigned)sizeof(CHAR) * sorted_size)) == NULL) 413 err(1, (char *)NULL); 414 } 415 if (l->l_max_col >= count_size) { 416 count_size = l->l_max_col + 1; 417 if ((count = realloc(count, 418 (unsigned)sizeof(int) * count_size)) == NULL) 419 err(1, (char *)NULL); 420 } 421 memset(count, 0, sizeof(int) * l->l_max_col + 1); 422 for (i = nchars, c = l->l_line; --i >= 0; c++) 423 count[c->c_column]++; 424 425 /* 426 * calculate running total (shifted down by 1) to use as 427 * indices into new line. 428 */ 429 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 430 save = count[i]; 431 count[i] = tot; 432 tot += save; 433 } 434 435 for (i = nchars, c = l->l_line; --i >= 0; c++) 436 sorted[count[c->c_column]++] = *c; 437 c = sorted; 438 } else 439 c = l->l_line; 440 while (nchars > 0) { 441 this_col = c->c_column; 442 endc = c; 443 do { 444 ++endc; 445 } while (--nchars > 0 && this_col == endc->c_column); 446 447 /* if -b only print last character */ 448 if (no_backspaces) { 449 c = endc - 1; 450 if (nchars > 0 && 451 this_col + c->c_width > endc->c_column) 452 continue; 453 } 454 455 if (this_col > last_col) { 456 int nspace = this_col - last_col; 457 458 if (compress_spaces && nspace > 1) { 459 while (1) { 460 int tab_col, tab_size; 461 462 tab_col = (last_col + 8) & ~7; 463 if (tab_col > this_col) 464 break; 465 tab_size = tab_col - last_col; 466 if (tab_size == 1) 467 PUTC(' '); 468 else 469 PUTC('\t'); 470 nspace -= tab_size; 471 last_col = tab_col; 472 } 473 } 474 while (--nspace >= 0) 475 PUTC(' '); 476 last_col = this_col; 477 } 478 479 for (;;) { 480 if (c->c_set != last_set) { 481 switch (c->c_set) { 482 case CS_NORMAL: 483 PUTC('\017'); 484 break; 485 case CS_ALTERNATE: 486 PUTC('\016'); 487 } 488 last_set = c->c_set; 489 } 490 PUTC(c->c_char); 491 if ((c + 1) < endc) 492 for (j = 0; j < c->c_width; j++) 493 PUTC('\b'); 494 if (++c >= endc) 495 break; 496 } 497 last_col += (c - 1)->c_width; 498 } 499 } 500 501 #define NALLOC 64 502 503 static LINE *line_freelist; 504 505 static LINE * 506 alloc_line(void) 507 { 508 LINE *l; 509 int i; 510 511 if (!line_freelist) { 512 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL) 513 err(1, (char *)NULL); 514 line_freelist = l; 515 for (i = 1; i < NALLOC; i++, l++) 516 l->l_next = l + 1; 517 l->l_next = NULL; 518 } 519 l = line_freelist; 520 line_freelist = l->l_next; 521 522 memset(l, 0, sizeof(LINE)); 523 return (l); 524 } 525 526 static void 527 free_line(LINE *l) 528 { 529 530 l->l_next = line_freelist; 531 line_freelist = l; 532 } 533 534 static void 535 usage(void) 536 { 537 538 (void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n"); 539 exit(1); 540 } 541 542 static void 543 dowarn(int line) 544 { 545 546 warnx("warning: can't back up %s", 547 line < 0 ? "past first line" : "-- line already flushed"); 548 } 549