1 /*- 2 * Copyright (c) 1990, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of the Memorial University of Newfoundland. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #ifndef lint 38 static const char copyright[] = 39 "@(#) Copyright (c) 1990, 1993, 1994\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41 #endif 42 43 #if 0 44 #ifndef lint 45 static char sccsid[] = "@(#)col.c 8.5 (Berkeley) 5/4/95"; 46 #endif 47 #endif 48 49 #include <sys/cdefs.h> 50 __FBSDID("$FreeBSD$"); 51 52 #include <err.h> 53 #include <locale.h> 54 #include <stdio.h> 55 #include <stdlib.h> 56 #include <string.h> 57 #include <unistd.h> 58 #include <wchar.h> 59 #include <wctype.h> 60 61 #define BS '\b' /* backspace */ 62 #define TAB '\t' /* tab */ 63 #define SPACE ' ' /* space */ 64 #define NL '\n' /* newline */ 65 #define CR '\r' /* carriage return */ 66 #define ESC '\033' /* escape */ 67 #define SI '\017' /* shift in to normal character set */ 68 #define SO '\016' /* shift out to alternate character set */ 69 #define VT '\013' /* vertical tab (aka reverse line feed) */ 70 #define RLF '\007' /* ESC-07 reverse line feed */ 71 #define RHLF '\010' /* ESC-010 reverse half-line feed */ 72 #define FHLF '\011' /* ESC-011 forward half-line feed */ 73 74 /* build up at least this many lines before flushing them out */ 75 #define BUFFER_MARGIN 32 76 77 typedef char CSET; 78 79 typedef struct char_str { 80 #define CS_NORMAL 1 81 #define CS_ALTERNATE 2 82 short c_column; /* column character is in */ 83 CSET c_set; /* character set (currently only 2) */ 84 wchar_t c_char; /* character in question */ 85 int c_width; /* character width */ 86 } CHAR; 87 88 typedef struct line_str LINE; 89 struct line_str { 90 CHAR *l_line; /* characters on the line */ 91 LINE *l_prev; /* previous line */ 92 LINE *l_next; /* next line */ 93 int l_lsize; /* allocated sizeof l_line */ 94 int l_line_len; /* strlen(l_line) */ 95 int l_needs_sort; /* set if chars went in out of order */ 96 int l_max_col; /* max column in the line */ 97 }; 98 99 LINE *alloc_line(void); 100 void dowarn(int); 101 void flush_line(LINE *); 102 void flush_lines(int); 103 void flush_blanks(void); 104 void free_line(LINE *); 105 void usage(void); 106 107 CSET last_set; /* char_set of last char printed */ 108 LINE *lines; 109 int compress_spaces; /* if doing space -> tab conversion */ 110 int fine; /* if `fine' resolution (half lines) */ 111 int max_bufd_lines; /* max # lines to keep in memory */ 112 int nblank_lines; /* # blanks after last flushed line */ 113 int no_backspaces; /* if not to output any backspaces */ 114 int pass_unknown_seqs; /* pass unknown control sequences */ 115 116 #define PUTC(ch) \ 117 do { \ 118 if (putwchar(ch) == WEOF) \ 119 errx(1, "write error"); \ 120 } while (0) 121 122 int 123 main(int argc, char **argv) 124 { 125 wint_t ch; 126 CHAR *c; 127 CSET cur_set; /* current character set */ 128 LINE *l; /* current line */ 129 int extra_lines; /* # of lines above first line */ 130 int cur_col; /* current column */ 131 int cur_line; /* line number of current position */ 132 int max_line; /* max value of cur_line */ 133 int this_line; /* line l points to */ 134 int nflushd_lines; /* number of lines that were flushed */ 135 int adjust, opt, warned, width; 136 137 (void)setlocale(LC_CTYPE, ""); 138 139 max_bufd_lines = 128; 140 compress_spaces = 1; /* compress spaces into tabs */ 141 while ((opt = getopt(argc, argv, "bfhl:px")) != -1) 142 switch (opt) { 143 case 'b': /* do not output backspaces */ 144 no_backspaces = 1; 145 break; 146 case 'f': /* allow half forward line feeds */ 147 fine = 1; 148 break; 149 case 'h': /* compress spaces into tabs */ 150 compress_spaces = 1; 151 break; 152 case 'l': /* buffered line count */ 153 if ((max_bufd_lines = atoi(optarg)) <= 0) 154 errx(1, "bad -l argument %s", optarg); 155 break; 156 case 'p': /* pass unknown control sequences */ 157 pass_unknown_seqs = 1; 158 break; 159 case 'x': /* do not compress spaces into tabs */ 160 compress_spaces = 0; 161 break; 162 case '?': 163 default: 164 usage(); 165 } 166 167 if (optind != argc) 168 usage(); 169 170 /* this value is in half lines */ 171 max_bufd_lines *= 2; 172 173 adjust = cur_col = extra_lines = warned = 0; 174 cur_line = max_line = nflushd_lines = this_line = 0; 175 cur_set = last_set = CS_NORMAL; 176 lines = l = alloc_line(); 177 178 while ((ch = getwchar()) != WEOF) { 179 if (!iswgraph(ch)) { 180 switch (ch) { 181 case BS: /* can't go back further */ 182 if (cur_col == 0) 183 continue; 184 --cur_col; 185 continue; 186 case CR: 187 cur_col = 0; 188 continue; 189 case ESC: /* just ignore EOF */ 190 switch(getwchar()) { 191 case RLF: 192 cur_line -= 2; 193 break; 194 case RHLF: 195 cur_line--; 196 break; 197 case FHLF: 198 cur_line++; 199 if (cur_line > max_line) 200 max_line = cur_line; 201 } 202 continue; 203 case NL: 204 cur_line += 2; 205 if (cur_line > max_line) 206 max_line = cur_line; 207 cur_col = 0; 208 continue; 209 case SPACE: 210 ++cur_col; 211 continue; 212 case SI: 213 cur_set = CS_NORMAL; 214 continue; 215 case SO: 216 cur_set = CS_ALTERNATE; 217 continue; 218 case TAB: /* adjust column */ 219 cur_col |= 7; 220 ++cur_col; 221 continue; 222 case VT: 223 cur_line -= 2; 224 continue; 225 } 226 if (iswspace(ch)) { 227 if ((width = wcwidth(ch)) > 0) 228 cur_col += width; 229 continue; 230 } 231 if (!pass_unknown_seqs) 232 continue; 233 } 234 235 /* Must stuff ch in a line - are we at the right one? */ 236 if (cur_line != this_line - adjust) { 237 LINE *lnew; 238 int nmove; 239 240 adjust = 0; 241 nmove = cur_line - this_line; 242 if (!fine) { 243 /* round up to next line */ 244 if (cur_line & 1) { 245 adjust = 1; 246 nmove++; 247 } 248 } 249 if (nmove < 0) { 250 for (; nmove < 0 && l->l_prev; nmove++) 251 l = l->l_prev; 252 if (nmove) { 253 if (nflushd_lines == 0) { 254 /* 255 * Allow backup past first 256 * line if nothing has been 257 * flushed yet. 258 */ 259 for (; nmove < 0; nmove++) { 260 lnew = alloc_line(); 261 l->l_prev = lnew; 262 lnew->l_next = l; 263 l = lines = lnew; 264 extra_lines++; 265 } 266 } else { 267 if (!warned++) 268 dowarn(cur_line); 269 cur_line -= nmove; 270 } 271 } 272 } else { 273 /* may need to allocate here */ 274 for (; nmove > 0 && l->l_next; nmove--) 275 l = l->l_next; 276 for (; nmove > 0; nmove--) { 277 lnew = alloc_line(); 278 lnew->l_prev = l; 279 l->l_next = lnew; 280 l = lnew; 281 } 282 } 283 this_line = cur_line + adjust; 284 nmove = this_line - nflushd_lines; 285 if (nmove >= max_bufd_lines + BUFFER_MARGIN) { 286 nflushd_lines += nmove - max_bufd_lines; 287 flush_lines(nmove - max_bufd_lines); 288 } 289 } 290 /* grow line's buffer? */ 291 if (l->l_line_len + 1 >= l->l_lsize) { 292 int need; 293 294 need = l->l_lsize ? l->l_lsize * 2 : 90; 295 if ((l->l_line = realloc(l->l_line, 296 (unsigned)need * sizeof(CHAR))) == NULL) 297 err(1, (char *)NULL); 298 l->l_lsize = need; 299 } 300 c = &l->l_line[l->l_line_len++]; 301 c->c_char = ch; 302 c->c_set = cur_set; 303 c->c_column = cur_col; 304 c->c_width = wcwidth(ch); 305 /* 306 * If things are put in out of order, they will need sorting 307 * when it is flushed. 308 */ 309 if (cur_col < l->l_max_col) 310 l->l_needs_sort = 1; 311 else 312 l->l_max_col = cur_col; 313 if (c->c_width > 0) 314 cur_col += c->c_width; 315 } 316 if (ferror(stdin)) 317 err(1, NULL); 318 if (max_line == 0) 319 exit(0); /* no lines, so just exit */ 320 321 /* goto the last line that had a character on it */ 322 for (; l->l_next; l = l->l_next) 323 this_line++; 324 flush_lines(this_line - nflushd_lines + extra_lines + 1); 325 326 /* make sure we leave things in a sane state */ 327 if (last_set != CS_NORMAL) 328 PUTC('\017'); 329 330 /* flush out the last few blank lines */ 331 nblank_lines = max_line - this_line; 332 if (max_line & 1) 333 nblank_lines++; 334 else if (!nblank_lines) 335 /* missing a \n on the last line? */ 336 nblank_lines = 2; 337 flush_blanks(); 338 exit(0); 339 } 340 341 void 342 flush_lines(int nflush) 343 { 344 LINE *l; 345 346 while (--nflush >= 0) { 347 l = lines; 348 lines = l->l_next; 349 if (l->l_line) { 350 flush_blanks(); 351 flush_line(l); 352 } 353 nblank_lines++; 354 if (l->l_line) 355 (void)free(l->l_line); 356 free_line(l); 357 } 358 if (lines) 359 lines->l_prev = NULL; 360 } 361 362 /* 363 * Print a number of newline/half newlines. If fine flag is set, nblank_lines 364 * is the number of half line feeds, otherwise it is the number of whole line 365 * feeds. 366 */ 367 void 368 flush_blanks(void) 369 { 370 int half, i, nb; 371 372 half = 0; 373 nb = nblank_lines; 374 if (nb & 1) { 375 if (fine) 376 half = 1; 377 else 378 nb++; 379 } 380 nb /= 2; 381 for (i = nb; --i >= 0;) 382 PUTC('\n'); 383 if (half) { 384 PUTC('\033'); 385 PUTC('9'); 386 if (!nb) 387 PUTC('\r'); 388 } 389 nblank_lines = 0; 390 } 391 392 /* 393 * Write a line to stdout taking care of space to tab conversion (-h flag) 394 * and character set shifts. 395 */ 396 void 397 flush_line(LINE *l) 398 { 399 CHAR *c, *endc; 400 int i, nchars, last_col, this_col; 401 402 last_col = 0; 403 nchars = l->l_line_len; 404 405 if (l->l_needs_sort) { 406 static CHAR *sorted; 407 static int count_size, *count, i, save, sorted_size, tot; 408 409 /* 410 * Do an O(n) sort on l->l_line by column being careful to 411 * preserve the order of characters in the same column. 412 */ 413 if (l->l_lsize > sorted_size) { 414 sorted_size = l->l_lsize; 415 if ((sorted = realloc(sorted, 416 (unsigned)sizeof(CHAR) * sorted_size)) == NULL) 417 err(1, (char *)NULL); 418 } 419 if (l->l_max_col >= count_size) { 420 count_size = l->l_max_col + 1; 421 if ((count = realloc(count, 422 (unsigned)sizeof(int) * count_size)) == NULL) 423 err(1, (char *)NULL); 424 } 425 memset(count, 0, sizeof(int) * l->l_max_col + 1); 426 for (i = nchars, c = l->l_line; --i >= 0; c++) 427 count[c->c_column]++; 428 429 /* 430 * calculate running total (shifted down by 1) to use as 431 * indices into new line. 432 */ 433 for (tot = 0, i = 0; i <= l->l_max_col; i++) { 434 save = count[i]; 435 count[i] = tot; 436 tot += save; 437 } 438 439 for (i = nchars, c = l->l_line; --i >= 0; c++) 440 sorted[count[c->c_column]++] = *c; 441 c = sorted; 442 } else 443 c = l->l_line; 444 while (nchars > 0) { 445 this_col = c->c_column; 446 endc = c; 447 do { 448 ++endc; 449 } while (--nchars > 0 && this_col == endc->c_column); 450 451 /* if -b only print last character */ 452 if (no_backspaces) { 453 c = endc - 1; 454 if (nchars > 0 && 455 this_col + c->c_width > endc->c_column) 456 continue; 457 } 458 459 if (this_col > last_col) { 460 int nspace = this_col - last_col; 461 462 if (compress_spaces && nspace > 1) { 463 while (1) { 464 int tab_col, tab_size;; 465 466 tab_col = (last_col + 8) & ~7; 467 if (tab_col > this_col) 468 break; 469 tab_size = tab_col - last_col; 470 if (tab_size == 1) 471 PUTC(' '); 472 else 473 PUTC('\t'); 474 nspace -= tab_size; 475 last_col = tab_col; 476 } 477 } 478 while (--nspace >= 0) 479 PUTC(' '); 480 last_col = this_col; 481 } 482 483 for (;;) { 484 if (c->c_set != last_set) { 485 switch (c->c_set) { 486 case CS_NORMAL: 487 PUTC('\017'); 488 break; 489 case CS_ALTERNATE: 490 PUTC('\016'); 491 } 492 last_set = c->c_set; 493 } 494 PUTC(c->c_char); 495 if ((c + 1) < endc) 496 for (i = 0; i < c->c_width; i++) 497 PUTC('\b'); 498 if (++c >= endc) 499 break; 500 } 501 last_col += (c - 1)->c_width; 502 } 503 } 504 505 #define NALLOC 64 506 507 static LINE *line_freelist; 508 509 LINE * 510 alloc_line(void) 511 { 512 LINE *l; 513 int i; 514 515 if (!line_freelist) { 516 if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL) 517 err(1, (char *)NULL); 518 line_freelist = l; 519 for (i = 1; i < NALLOC; i++, l++) 520 l->l_next = l + 1; 521 l->l_next = NULL; 522 } 523 l = line_freelist; 524 line_freelist = l->l_next; 525 526 memset(l, 0, sizeof(LINE)); 527 return (l); 528 } 529 530 void 531 free_line(LINE *l) 532 { 533 534 l->l_next = line_freelist; 535 line_freelist = l; 536 } 537 538 void 539 usage(void) 540 { 541 542 (void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n"); 543 exit(1); 544 } 545 546 void 547 dowarn(int line) 548 { 549 550 warnx("warning: can't back up %s", 551 line < 0 ? "past first line" : "-- line already flushed"); 552 } 553