1 /* $Id: term.c,v 1.291 2023/04/28 19:11:04 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "out.h" 32 #include "term.h" 33 #include "main.h" 34 35 static size_t cond_width(const struct termp *, int, int *); 36 static void adjbuf(struct termp_col *, size_t); 37 static void bufferc(struct termp *, char); 38 static void encode(struct termp *, const char *, size_t); 39 static void encode1(struct termp *, int); 40 static void endline(struct termp *); 41 static void term_field(struct termp *, size_t, size_t); 42 static void term_fill(struct termp *, size_t *, size_t *, 43 size_t); 44 45 46 void 47 term_setcol(struct termp *p, size_t maxtcol) 48 { 49 if (maxtcol > p->maxtcol) { 50 p->tcols = mandoc_recallocarray(p->tcols, 51 p->maxtcol, maxtcol, sizeof(*p->tcols)); 52 p->maxtcol = maxtcol; 53 } 54 p->lasttcol = maxtcol - 1; 55 p->tcol = p->tcols; 56 } 57 58 void 59 term_free(struct termp *p) 60 { 61 term_tab_free(); 62 for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++) 63 free(p->tcol->buf); 64 free(p->tcols); 65 free(p->fontq); 66 free(p); 67 } 68 69 void 70 term_begin(struct termp *p, term_margin head, 71 term_margin foot, const struct roff_meta *arg) 72 { 73 74 p->headf = head; 75 p->footf = foot; 76 p->argf = arg; 77 (*p->begin)(p); 78 } 79 80 void 81 term_end(struct termp *p) 82 { 83 84 (*p->end)(p); 85 } 86 87 /* 88 * Flush a chunk of text. By default, break the output line each time 89 * the right margin is reached, and continue output on the next line 90 * at the same offset as the chunk itself. By default, also break the 91 * output line at the end of the chunk. There are many flags modifying 92 * this behaviour, see the comments in the body of the function. 93 */ 94 void 95 term_flushln(struct termp *p) 96 { 97 size_t vbl; /* Number of blanks to prepend to the output. */ 98 size_t vbr; /* Actual visual position of the end of field. */ 99 size_t vfield; /* Desired visual field width. */ 100 size_t vtarget; /* Desired visual position of the right margin. */ 101 size_t ic; /* Character position in the input buffer. */ 102 size_t nbr; /* Number of characters to print in this field. */ 103 104 /* 105 * Normally, start writing at the left margin, but with the 106 * NOPAD flag, start writing at the current position instead. 107 */ 108 109 vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ? 110 0 : p->tcol->offset - p->viscol; 111 if (p->minbl && vbl < p->minbl) 112 vbl = p->minbl; 113 114 if ((p->flags & TERMP_MULTICOL) == 0) 115 p->tcol->col = 0; 116 117 /* Loop over output lines. */ 118 119 for (;;) { 120 vfield = p->tcol->rmargin > p->viscol + vbl ? 121 p->tcol->rmargin - p->viscol - vbl : 0; 122 123 /* 124 * Normally, break the line at the the right margin 125 * of the field, but with the NOBREAK flag, only 126 * break it at the max right margin of the screen, 127 * and with the BRNEVER flag, never break it at all. 128 */ 129 130 vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield : 131 p->maxrmargin > p->viscol + vbl ? 132 p->maxrmargin - p->viscol - vbl : 0; 133 134 /* 135 * Figure out how much text will fit in the field. 136 * If there is whitespace only, print nothing. 137 */ 138 139 term_fill(p, &nbr, &vbr, 140 p->flags & TERMP_BRNEVER ? SIZE_MAX : vtarget); 141 if (nbr == 0) 142 break; 143 144 /* 145 * With the CENTER or RIGHT flag, increase the indentation 146 * to center the text between the left and right margins 147 * or to adjust it to the right margin, respectively. 148 */ 149 150 if (vbr < vtarget) { 151 if (p->flags & TERMP_CENTER) 152 vbl += (vtarget - vbr) / 2; 153 else if (p->flags & TERMP_RIGHT) 154 vbl += vtarget - vbr; 155 } 156 157 /* Finally, print the field content. */ 158 159 term_field(p, vbl, nbr); 160 if (vbr < vtarget) 161 p->tcol->taboff += vbr; 162 else 163 p->tcol->taboff += vtarget; 164 p->tcol->taboff += (*p->width)(p, ' '); 165 166 /* 167 * If there is no text left in the field, exit the loop. 168 * If the BRTRSP flag is set, consider trailing 169 * whitespace significant when deciding whether 170 * the field fits or not. 171 */ 172 173 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) { 174 switch (p->tcol->buf[ic]) { 175 case '\t': 176 if (p->flags & TERMP_BRTRSP) 177 vbr = term_tab_next(vbr); 178 continue; 179 case ' ': 180 if (p->flags & TERMP_BRTRSP) 181 vbr += (*p->width)(p, ' '); 182 continue; 183 case '\n': 184 case ASCII_NBRZW: 185 case ASCII_BREAK: 186 case ASCII_TABREF: 187 continue; 188 default: 189 break; 190 } 191 break; 192 } 193 if (ic == p->tcol->lastcol) 194 break; 195 196 /* 197 * At the location of an automatic line break, input 198 * space characters are consumed by the line break. 199 */ 200 201 while (p->tcol->col < p->tcol->lastcol && 202 p->tcol->buf[p->tcol->col] == ' ') 203 p->tcol->col++; 204 205 /* 206 * In multi-column mode, leave the rest of the text 207 * in the buffer to be handled by a subsequent 208 * invocation, such that the other columns of the 209 * table can be handled first. 210 * In single-column mode, simply break the line. 211 */ 212 213 if (p->flags & TERMP_MULTICOL) 214 return; 215 216 endline(p); 217 218 /* 219 * Normally, start the next line at the same indentation 220 * as this one, but with the BRIND flag, start it at the 221 * right margin instead. This is used together with 222 * NOBREAK for the tags in various kinds of tagged lists. 223 */ 224 225 vbl = p->flags & TERMP_BRIND ? 226 p->tcol->rmargin : p->tcol->offset; 227 } 228 229 /* Reset output state in preparation for the next field. */ 230 231 p->col = p->tcol->col = p->tcol->lastcol = 0; 232 p->minbl = p->trailspace; 233 p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD); 234 235 if (p->flags & TERMP_MULTICOL) 236 return; 237 238 /* 239 * The HANG flag means that the next field 240 * always follows on the same line. 241 * The NOBREAK flag means that the next field 242 * follows on the same line unless the field was overrun. 243 * Normally, break the line at the end of each field. 244 */ 245 246 if ((p->flags & TERMP_HANG) == 0 && 247 ((p->flags & TERMP_NOBREAK) == 0 || 248 vbr + term_len(p, p->trailspace) > vfield)) 249 endline(p); 250 } 251 252 /* 253 * Store the number of input characters to print in this field in *nbr 254 * and their total visual width to print in *vbr. 255 * If there is only whitespace in the field, both remain zero. 256 * The desired visual width of the field is provided by vtarget. 257 * If the first word is longer, the field will be overrun. 258 */ 259 static void 260 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget) 261 { 262 size_t ic; /* Character position in the input buffer. */ 263 size_t vis; /* Visual position of the current character. */ 264 size_t vn; /* Visual position of the next character. */ 265 int breakline; /* Break at the end of this word. */ 266 int graph; /* Last character was non-blank. */ 267 int taboff; /* Temporary offset for literal tabs. */ 268 269 *nbr = *vbr = vis = 0; 270 breakline = graph = 0; 271 taboff = p->tcol->taboff; 272 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) { 273 switch (p->tcol->buf[ic]) { 274 case '\b': /* Escape \o (overstrike) or backspace markup. */ 275 assert(ic > 0); 276 vis -= (*p->width)(p, p->tcol->buf[ic - 1]); 277 continue; 278 279 case ' ': 280 case ASCII_BREAK: /* Escape \: (breakpoint). */ 281 vn = vis; 282 if (p->tcol->buf[ic] == ' ') 283 vn += (*p->width)(p, ' '); 284 /* Can break at the end of a word. */ 285 if (breakline || vn > vtarget) 286 break; 287 if (graph) { 288 *nbr = ic; 289 *vbr = vis; 290 graph = 0; 291 } 292 vis = vn; 293 continue; 294 295 case '\n': /* Escape \p (break at the end of the word). */ 296 breakline = 1; 297 continue; 298 299 case ASCII_HYPH: /* Breakable hyphen. */ 300 graph = 1; 301 /* 302 * We are about to decide whether to break the 303 * line or not, so we no longer need this hyphen 304 * to be marked as breakable. Put back a real 305 * hyphen such that we get the correct width. 306 */ 307 p->tcol->buf[ic] = '-'; 308 vis += (*p->width)(p, '-'); 309 if (vis > vtarget) { 310 ic++; 311 break; 312 } 313 *nbr = ic + 1; 314 *vbr = vis; 315 continue; 316 317 case ASCII_TABREF: 318 taboff = -vis - (*p->width)(p, ' '); 319 continue; 320 321 default: 322 switch (p->tcol->buf[ic]) { 323 case '\t': 324 if (taboff < 0 && (size_t)-taboff > vis) 325 vis = 0; 326 else 327 vis += taboff; 328 vis = term_tab_next(vis); 329 vis -= taboff; 330 break; 331 case ASCII_NBRZW: /* Non-breakable zero-width. */ 332 break; 333 case ASCII_NBRSP: /* Non-breakable space. */ 334 p->tcol->buf[ic] = ' '; 335 /* FALLTHROUGH */ 336 default: /* Printable character. */ 337 vis += (*p->width)(p, p->tcol->buf[ic]); 338 break; 339 } 340 graph = 1; 341 if (vis > vtarget && *nbr > 0) 342 return; 343 continue; 344 } 345 break; 346 } 347 348 /* 349 * If the last word extends to the end of the field without any 350 * trailing whitespace, the loop could not check yet whether it 351 * can remain on this line. So do the check now. 352 */ 353 354 if (graph && (vis <= vtarget || *nbr == 0)) { 355 *nbr = ic; 356 *vbr = vis; 357 } 358 } 359 360 /* 361 * Print the contents of one field 362 * with an indentation of vbl visual columns, 363 * and an input string length of nbr characters. 364 */ 365 static void 366 term_field(struct termp *p, size_t vbl, size_t nbr) 367 { 368 size_t ic; /* Character position in the input buffer. */ 369 size_t vis; /* Visual position of the current character. */ 370 size_t vt; /* Visual position including tab offset. */ 371 size_t dv; /* Visual width of the current character. */ 372 int taboff; /* Temporary offset for literal tabs. */ 373 374 vis = 0; 375 taboff = p->tcol->taboff; 376 for (ic = p->tcol->col; ic < nbr; ic++) { 377 378 /* 379 * To avoid the printing of trailing whitespace, 380 * do not print whitespace right away, only count it. 381 */ 382 383 switch (p->tcol->buf[ic]) { 384 case '\n': 385 case ASCII_BREAK: 386 case ASCII_NBRZW: 387 continue; 388 case ASCII_TABREF: 389 taboff = -vis - (*p->width)(p, ' '); 390 continue; 391 case '\t': 392 case ' ': 393 case ASCII_NBRSP: 394 if (p->tcol->buf[ic] == '\t') { 395 if (taboff < 0 && (size_t)-taboff > vis) 396 vt = 0; 397 else 398 vt = vis + taboff; 399 dv = term_tab_next(vt) - vt; 400 } else 401 dv = (*p->width)(p, ' '); 402 vbl += dv; 403 vis += dv; 404 continue; 405 default: 406 break; 407 } 408 409 /* 410 * We found a non-blank character to print, 411 * so write preceding white space now. 412 */ 413 414 if (vbl > 0) { 415 (*p->advance)(p, vbl); 416 p->viscol += vbl; 417 vbl = 0; 418 } 419 420 /* Print the character and adjust the visual position. */ 421 422 (*p->letter)(p, p->tcol->buf[ic]); 423 if (p->tcol->buf[ic] == '\b') { 424 dv = (*p->width)(p, p->tcol->buf[ic - 1]); 425 p->viscol -= dv; 426 vis -= dv; 427 } else { 428 dv = (*p->width)(p, p->tcol->buf[ic]); 429 p->viscol += dv; 430 vis += dv; 431 } 432 } 433 p->tcol->col = nbr; 434 } 435 436 static void 437 endline(struct termp *p) 438 { 439 if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) { 440 p->mc = NULL; 441 p->flags &= ~TERMP_ENDMC; 442 } 443 if (p->mc != NULL) { 444 if (p->viscol && p->maxrmargin >= p->viscol) 445 (*p->advance)(p, p->maxrmargin - p->viscol + 1); 446 p->flags |= TERMP_NOBUF | TERMP_NOSPACE; 447 term_word(p, p->mc); 448 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC); 449 } 450 p->viscol = 0; 451 p->minbl = 0; 452 (*p->endline)(p); 453 } 454 455 /* 456 * A newline only breaks an existing line; it won't assert vertical 457 * space. All data in the output buffer is flushed prior to the newline 458 * assertion. 459 */ 460 void 461 term_newln(struct termp *p) 462 { 463 p->flags |= TERMP_NOSPACE; 464 if (p->tcol->lastcol || p->viscol) 465 term_flushln(p); 466 p->tcol->taboff = 0; 467 } 468 469 /* 470 * Asserts a vertical space (a full, empty line-break between lines). 471 * Note that if used twice, this will cause two blank spaces and so on. 472 * All data in the output buffer is flushed prior to the newline 473 * assertion. 474 */ 475 void 476 term_vspace(struct termp *p) 477 { 478 479 term_newln(p); 480 p->viscol = 0; 481 p->minbl = 0; 482 if (0 < p->skipvsp) 483 p->skipvsp--; 484 else 485 (*p->endline)(p); 486 } 487 488 /* Swap current and previous font; for \fP and .ft P */ 489 void 490 term_fontlast(struct termp *p) 491 { 492 enum termfont f; 493 494 f = p->fontl; 495 p->fontl = p->fontq[p->fonti]; 496 p->fontq[p->fonti] = f; 497 } 498 499 /* Set font, save current, discard previous; for \f, .ft, .B etc. */ 500 void 501 term_fontrepl(struct termp *p, enum termfont f) 502 { 503 504 p->fontl = p->fontq[p->fonti]; 505 p->fontq[p->fonti] = f; 506 } 507 508 /* Set font, save previous. */ 509 void 510 term_fontpush(struct termp *p, enum termfont f) 511 { 512 513 p->fontl = p->fontq[p->fonti]; 514 if (++p->fonti == p->fontsz) { 515 p->fontsz += 8; 516 p->fontq = mandoc_reallocarray(p->fontq, 517 p->fontsz, sizeof(*p->fontq)); 518 } 519 p->fontq[p->fonti] = f; 520 } 521 522 /* Flush to make the saved pointer current again. */ 523 void 524 term_fontpopq(struct termp *p, int i) 525 { 526 527 assert(i >= 0); 528 if (p->fonti > i) 529 p->fonti = i; 530 } 531 532 /* Pop one font off the stack. */ 533 void 534 term_fontpop(struct termp *p) 535 { 536 537 assert(p->fonti); 538 p->fonti--; 539 } 540 541 /* 542 * Handle pwords, partial words, which may be either a single word or a 543 * phrase that cannot be broken down (such as a literal string). This 544 * handles word styling. 545 */ 546 void 547 term_word(struct termp *p, const char *word) 548 { 549 struct roffsu su; 550 const char nbrsp[2] = { ASCII_NBRSP, 0 }; 551 const char *seq, *cp; 552 int sz, uc; 553 size_t csz, lsz, ssz; 554 enum mandoc_esc esc; 555 556 if ((p->flags & TERMP_NOBUF) == 0) { 557 if ((p->flags & TERMP_NOSPACE) == 0) { 558 if ((p->flags & TERMP_KEEP) == 0) { 559 bufferc(p, ' '); 560 if (p->flags & TERMP_SENTENCE) 561 bufferc(p, ' '); 562 } else 563 bufferc(p, ASCII_NBRSP); 564 } 565 if (p->flags & TERMP_PREKEEP) 566 p->flags |= TERMP_KEEP; 567 if (p->flags & TERMP_NONOSPACE) 568 p->flags |= TERMP_NOSPACE; 569 else 570 p->flags &= ~TERMP_NOSPACE; 571 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); 572 p->skipvsp = 0; 573 } 574 575 while ('\0' != *word) { 576 if ('\\' != *word) { 577 if (TERMP_NBRWORD & p->flags) { 578 if (' ' == *word) { 579 encode(p, nbrsp, 1); 580 word++; 581 continue; 582 } 583 ssz = strcspn(word, "\\ "); 584 } else 585 ssz = strcspn(word, "\\"); 586 encode(p, word, ssz); 587 word += (int)ssz; 588 continue; 589 } 590 591 word++; 592 esc = mandoc_escape(&word, &seq, &sz); 593 switch (esc) { 594 case ESCAPE_UNICODE: 595 uc = mchars_num2uc(seq + 1, sz - 1); 596 break; 597 case ESCAPE_NUMBERED: 598 uc = mchars_num2char(seq, sz); 599 if (uc >= 0) 600 break; 601 bufferc(p, ASCII_NBRZW); 602 continue; 603 case ESCAPE_SPECIAL: 604 if (p->enc == TERMENC_ASCII) { 605 cp = mchars_spec2str(seq, sz, &ssz); 606 if (cp != NULL) 607 encode(p, cp, ssz); 608 else 609 bufferc(p, ASCII_NBRZW); 610 } else { 611 uc = mchars_spec2cp(seq, sz); 612 if (uc > 0) 613 encode1(p, uc); 614 else 615 bufferc(p, ASCII_NBRZW); 616 } 617 continue; 618 case ESCAPE_UNDEF: 619 uc = *seq; 620 break; 621 case ESCAPE_FONTBOLD: 622 case ESCAPE_FONTCB: 623 term_fontrepl(p, TERMFONT_BOLD); 624 continue; 625 case ESCAPE_FONTITALIC: 626 case ESCAPE_FONTCI: 627 term_fontrepl(p, TERMFONT_UNDER); 628 continue; 629 case ESCAPE_FONTBI: 630 term_fontrepl(p, TERMFONT_BI); 631 continue; 632 case ESCAPE_FONT: 633 case ESCAPE_FONTCR: 634 case ESCAPE_FONTROMAN: 635 term_fontrepl(p, TERMFONT_NONE); 636 continue; 637 case ESCAPE_FONTPREV: 638 term_fontlast(p); 639 continue; 640 case ESCAPE_BREAK: 641 bufferc(p, '\n'); 642 continue; 643 case ESCAPE_NOSPACE: 644 if (p->flags & TERMP_BACKAFTER) 645 p->flags &= ~TERMP_BACKAFTER; 646 else if (*word == '\0') 647 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); 648 continue; 649 case ESCAPE_DEVICE: 650 if (p->type == TERMTYPE_PDF) 651 encode(p, "pdf", 3); 652 else if (p->type == TERMTYPE_PS) 653 encode(p, "ps", 2); 654 else if (p->enc == TERMENC_ASCII) 655 encode(p, "ascii", 5); 656 else 657 encode(p, "utf8", 4); 658 continue; 659 case ESCAPE_HORIZ: 660 if (p->flags & TERMP_BACKAFTER) { 661 p->flags &= ~TERMP_BACKAFTER; 662 continue; 663 } 664 if (*seq == '|') { 665 seq++; 666 uc = -p->col; 667 } else 668 uc = 0; 669 if (a2roffsu(seq, &su, SCALE_EM) == NULL) 670 continue; 671 uc += term_hen(p, &su); 672 if (uc >= 0) { 673 while (uc > 0) { 674 uc -= term_len(p, 1); 675 if (p->flags & TERMP_BACKBEFORE) 676 p->flags &= ~TERMP_BACKBEFORE; 677 else 678 bufferc(p, ASCII_NBRSP); 679 } 680 continue; 681 } 682 if (p->flags & TERMP_BACKBEFORE) { 683 p->flags &= ~TERMP_BACKBEFORE; 684 assert(p->col > 0); 685 p->col--; 686 } 687 if (p->col >= (size_t)(-uc)) { 688 p->col += uc; 689 } else { 690 uc += p->col; 691 p->col = 0; 692 if (p->tcol->offset > (size_t)(-uc)) { 693 p->ti += uc; 694 p->tcol->offset += uc; 695 } else { 696 p->ti -= p->tcol->offset; 697 p->tcol->offset = 0; 698 } 699 } 700 continue; 701 case ESCAPE_HLINE: 702 if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL) 703 continue; 704 uc = term_hen(p, &su); 705 if (uc <= 0) { 706 if (p->tcol->rmargin <= p->tcol->offset) 707 continue; 708 lsz = p->tcol->rmargin - p->tcol->offset; 709 } else 710 lsz = uc; 711 if (*cp == seq[-1]) 712 uc = -1; 713 else if (*cp == '\\') { 714 seq = cp + 1; 715 esc = mandoc_escape(&seq, &cp, &sz); 716 switch (esc) { 717 case ESCAPE_UNICODE: 718 uc = mchars_num2uc(cp + 1, sz - 1); 719 break; 720 case ESCAPE_NUMBERED: 721 uc = mchars_num2char(cp, sz); 722 break; 723 case ESCAPE_SPECIAL: 724 uc = mchars_spec2cp(cp, sz); 725 break; 726 case ESCAPE_UNDEF: 727 uc = *seq; 728 break; 729 default: 730 uc = -1; 731 break; 732 } 733 } else 734 uc = *cp; 735 if (uc < 0x20 || (uc > 0x7E && uc < 0xA0)) 736 uc = '_'; 737 if (p->enc == TERMENC_ASCII) { 738 cp = ascii_uc2str(uc); 739 csz = term_strlen(p, cp); 740 ssz = strlen(cp); 741 } else 742 csz = (*p->width)(p, uc); 743 while (lsz >= csz) { 744 if (p->enc == TERMENC_ASCII) 745 encode(p, cp, ssz); 746 else 747 encode1(p, uc); 748 lsz -= csz; 749 } 750 continue; 751 case ESCAPE_SKIPCHAR: 752 p->flags |= TERMP_BACKAFTER; 753 continue; 754 case ESCAPE_OVERSTRIKE: 755 cp = seq + sz; 756 while (seq < cp) { 757 if (*seq == '\\') { 758 mandoc_escape(&seq, NULL, NULL); 759 continue; 760 } 761 encode1(p, *seq++); 762 if (seq < cp) { 763 if (p->flags & TERMP_BACKBEFORE) 764 p->flags |= TERMP_BACKAFTER; 765 else 766 p->flags |= TERMP_BACKBEFORE; 767 } 768 } 769 /* Trim trailing backspace/blank pair. */ 770 if (p->tcol->lastcol > 2 && 771 (p->tcol->buf[p->tcol->lastcol - 1] == ' ' || 772 p->tcol->buf[p->tcol->lastcol - 1] == '\t')) 773 p->tcol->lastcol -= 2; 774 if (p->col > p->tcol->lastcol) 775 p->col = p->tcol->lastcol; 776 continue; 777 case ESCAPE_IGNORE: 778 bufferc(p, ASCII_NBRZW); 779 continue; 780 default: 781 continue; 782 } 783 784 /* 785 * Common handling for Unicode and numbered 786 * character escape sequences. 787 */ 788 789 if (p->enc == TERMENC_ASCII) { 790 cp = ascii_uc2str(uc); 791 encode(p, cp, strlen(cp)); 792 } else { 793 if ((uc < 0x20 && uc != 0x09) || 794 (uc > 0x7E && uc < 0xA0)) 795 uc = 0xFFFD; 796 encode1(p, uc); 797 } 798 } 799 p->flags &= ~TERMP_NBRWORD; 800 } 801 802 static void 803 adjbuf(struct termp_col *c, size_t sz) 804 { 805 if (c->maxcols == 0) 806 c->maxcols = 1024; 807 while (c->maxcols <= sz) 808 c->maxcols <<= 2; 809 c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf)); 810 } 811 812 static void 813 bufferc(struct termp *p, char c) 814 { 815 if (p->flags & TERMP_NOBUF) { 816 (*p->letter)(p, c); 817 return; 818 } 819 if (p->col + 1 >= p->tcol->maxcols) 820 adjbuf(p->tcol, p->col + 1); 821 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) 822 p->tcol->buf[p->col] = c; 823 if (p->tcol->lastcol < ++p->col) 824 p->tcol->lastcol = p->col; 825 } 826 827 void 828 term_tab_ref(struct termp *p) 829 { 830 if (p->tcol->lastcol && p->tcol->lastcol <= p->col && 831 (p->flags & TERMP_NOBUF) == 0) 832 bufferc(p, ASCII_TABREF); 833 } 834 835 /* 836 * See encode(). 837 * Do this for a single (probably unicode) value. 838 * Does not check for non-decorated glyphs. 839 */ 840 static void 841 encode1(struct termp *p, int c) 842 { 843 enum termfont f; 844 845 if (p->flags & TERMP_NOBUF) { 846 (*p->letter)(p, c); 847 return; 848 } 849 850 if (p->col + 7 >= p->tcol->maxcols) 851 adjbuf(p->tcol, p->col + 7); 852 853 f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ? 854 p->fontq[p->fonti] : TERMFONT_NONE; 855 856 if (p->flags & TERMP_BACKBEFORE) { 857 if (p->tcol->buf[p->col - 1] == ' ' || 858 p->tcol->buf[p->col - 1] == '\t') 859 p->col--; 860 else 861 p->tcol->buf[p->col++] = '\b'; 862 p->flags &= ~TERMP_BACKBEFORE; 863 } 864 if (f == TERMFONT_UNDER || f == TERMFONT_BI) { 865 p->tcol->buf[p->col++] = '_'; 866 p->tcol->buf[p->col++] = '\b'; 867 } 868 if (f == TERMFONT_BOLD || f == TERMFONT_BI) { 869 if (c == ASCII_HYPH) 870 p->tcol->buf[p->col++] = '-'; 871 else 872 p->tcol->buf[p->col++] = c; 873 p->tcol->buf[p->col++] = '\b'; 874 } 875 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) 876 p->tcol->buf[p->col] = c; 877 if (p->tcol->lastcol < ++p->col) 878 p->tcol->lastcol = p->col; 879 if (p->flags & TERMP_BACKAFTER) { 880 p->flags |= TERMP_BACKBEFORE; 881 p->flags &= ~TERMP_BACKAFTER; 882 } 883 } 884 885 static void 886 encode(struct termp *p, const char *word, size_t sz) 887 { 888 size_t i; 889 890 if (p->flags & TERMP_NOBUF) { 891 for (i = 0; i < sz; i++) 892 (*p->letter)(p, word[i]); 893 return; 894 } 895 896 if (p->col + 2 + (sz * 5) >= p->tcol->maxcols) 897 adjbuf(p->tcol, p->col + 2 + (sz * 5)); 898 899 for (i = 0; i < sz; i++) { 900 if (ASCII_HYPH == word[i] || 901 isgraph((unsigned char)word[i])) 902 encode1(p, word[i]); 903 else { 904 if (p->tcol->lastcol <= p->col || 905 (word[i] != ' ' && word[i] != ASCII_NBRSP)) 906 p->tcol->buf[p->col] = word[i]; 907 p->col++; 908 909 /* 910 * Postpone the effect of \z while handling 911 * an overstrike sequence from ascii_uc2str(). 912 */ 913 914 if (word[i] == '\b' && 915 (p->flags & TERMP_BACKBEFORE)) { 916 p->flags &= ~TERMP_BACKBEFORE; 917 p->flags |= TERMP_BACKAFTER; 918 } 919 } 920 } 921 if (p->tcol->lastcol < p->col) 922 p->tcol->lastcol = p->col; 923 } 924 925 void 926 term_setwidth(struct termp *p, const char *wstr) 927 { 928 struct roffsu su; 929 int iop, width; 930 931 iop = 0; 932 width = 0; 933 if (NULL != wstr) { 934 switch (*wstr) { 935 case '+': 936 iop = 1; 937 wstr++; 938 break; 939 case '-': 940 iop = -1; 941 wstr++; 942 break; 943 default: 944 break; 945 } 946 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL) 947 width = term_hspan(p, &su); 948 else 949 iop = 0; 950 } 951 (*p->setwidth)(p, iop, width); 952 } 953 954 size_t 955 term_len(const struct termp *p, size_t sz) 956 { 957 958 return (*p->width)(p, ' ') * sz; 959 } 960 961 static size_t 962 cond_width(const struct termp *p, int c, int *skip) 963 { 964 965 if (*skip) { 966 (*skip) = 0; 967 return 0; 968 } else 969 return (*p->width)(p, c); 970 } 971 972 size_t 973 term_strlen(const struct termp *p, const char *cp) 974 { 975 size_t sz, rsz, i; 976 int ssz, skip, uc; 977 const char *seq, *rhs; 978 enum mandoc_esc esc; 979 static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW, 980 ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' }; 981 982 /* 983 * Account for escaped sequences within string length 984 * calculations. This follows the logic in term_word() as we 985 * must calculate the width of produced strings. 986 */ 987 988 sz = 0; 989 skip = 0; 990 while ('\0' != *cp) { 991 rsz = strcspn(cp, rej); 992 for (i = 0; i < rsz; i++) 993 sz += cond_width(p, *cp++, &skip); 994 995 switch (*cp) { 996 case '\\': 997 cp++; 998 rhs = NULL; 999 esc = mandoc_escape(&cp, &seq, &ssz); 1000 switch (esc) { 1001 case ESCAPE_UNICODE: 1002 uc = mchars_num2uc(seq + 1, ssz - 1); 1003 break; 1004 case ESCAPE_NUMBERED: 1005 uc = mchars_num2char(seq, ssz); 1006 if (uc < 0) 1007 continue; 1008 break; 1009 case ESCAPE_SPECIAL: 1010 if (p->enc == TERMENC_ASCII) { 1011 rhs = mchars_spec2str(seq, ssz, &rsz); 1012 if (rhs != NULL) 1013 break; 1014 } else { 1015 uc = mchars_spec2cp(seq, ssz); 1016 if (uc > 0) 1017 sz += cond_width(p, uc, &skip); 1018 } 1019 continue; 1020 case ESCAPE_UNDEF: 1021 uc = *seq; 1022 break; 1023 case ESCAPE_DEVICE: 1024 if (p->type == TERMTYPE_PDF) { 1025 rhs = "pdf"; 1026 rsz = 3; 1027 } else if (p->type == TERMTYPE_PS) { 1028 rhs = "ps"; 1029 rsz = 2; 1030 } else if (p->enc == TERMENC_ASCII) { 1031 rhs = "ascii"; 1032 rsz = 5; 1033 } else { 1034 rhs = "utf8"; 1035 rsz = 4; 1036 } 1037 break; 1038 case ESCAPE_SKIPCHAR: 1039 skip = 1; 1040 continue; 1041 case ESCAPE_OVERSTRIKE: 1042 rsz = 0; 1043 rhs = seq + ssz; 1044 while (seq < rhs) { 1045 if (*seq == '\\') { 1046 mandoc_escape(&seq, NULL, NULL); 1047 continue; 1048 } 1049 i = (*p->width)(p, *seq++); 1050 if (rsz < i) 1051 rsz = i; 1052 } 1053 sz += rsz; 1054 continue; 1055 default: 1056 continue; 1057 } 1058 1059 /* 1060 * Common handling for Unicode and numbered 1061 * character escape sequences. 1062 */ 1063 1064 if (rhs == NULL) { 1065 if (p->enc == TERMENC_ASCII) { 1066 rhs = ascii_uc2str(uc); 1067 rsz = strlen(rhs); 1068 } else { 1069 if ((uc < 0x20 && uc != 0x09) || 1070 (uc > 0x7E && uc < 0xA0)) 1071 uc = 0xFFFD; 1072 sz += cond_width(p, uc, &skip); 1073 continue; 1074 } 1075 } 1076 1077 if (skip) { 1078 skip = 0; 1079 break; 1080 } 1081 1082 /* 1083 * Common handling for all escape sequences 1084 * printing more than one character. 1085 */ 1086 1087 for (i = 0; i < rsz; i++) 1088 sz += (*p->width)(p, *rhs++); 1089 break; 1090 case ASCII_NBRSP: 1091 sz += cond_width(p, ' ', &skip); 1092 cp++; 1093 break; 1094 case ASCII_HYPH: 1095 sz += cond_width(p, '-', &skip); 1096 cp++; 1097 break; 1098 default: 1099 break; 1100 } 1101 } 1102 1103 return sz; 1104 } 1105 1106 int 1107 term_vspan(const struct termp *p, const struct roffsu *su) 1108 { 1109 double r; 1110 int ri; 1111 1112 switch (su->unit) { 1113 case SCALE_BU: 1114 r = su->scale / 40.0; 1115 break; 1116 case SCALE_CM: 1117 r = su->scale * 6.0 / 2.54; 1118 break; 1119 case SCALE_FS: 1120 r = su->scale * 65536.0 / 40.0; 1121 break; 1122 case SCALE_IN: 1123 r = su->scale * 6.0; 1124 break; 1125 case SCALE_MM: 1126 r = su->scale * 0.006; 1127 break; 1128 case SCALE_PC: 1129 r = su->scale; 1130 break; 1131 case SCALE_PT: 1132 r = su->scale / 12.0; 1133 break; 1134 case SCALE_EN: 1135 case SCALE_EM: 1136 r = su->scale * 0.6; 1137 break; 1138 case SCALE_VS: 1139 r = su->scale; 1140 break; 1141 default: 1142 abort(); 1143 } 1144 ri = r > 0.0 ? r + 0.4995 : r - 0.4995; 1145 return ri < 66 ? ri : 1; 1146 } 1147 1148 /* 1149 * Convert a scaling width to basic units, rounding towards 0. 1150 */ 1151 int 1152 term_hspan(const struct termp *p, const struct roffsu *su) 1153 { 1154 1155 return (*p->hspan)(p, su); 1156 } 1157 1158 /* 1159 * Convert a scaling width to basic units, rounding to closest. 1160 */ 1161 int 1162 term_hen(const struct termp *p, const struct roffsu *su) 1163 { 1164 int bu; 1165 1166 if ((bu = (*p->hspan)(p, su)) >= 0) 1167 return (bu + 11) / 24; 1168 else 1169 return -((-bu + 11) / 24); 1170 } 1171