1 /* $Id: term.c,v 1.294 2025/08/01 14:59:39 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010-2022, 2025 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdint.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "out.h" 32 #include "term.h" 33 #include "main.h" 34 35 static size_t cond_width(const struct termp *, int, int *); 36 static void adjbuf(struct termp_col *, size_t); 37 static void bufferc(struct termp *, char); 38 static void encode(struct termp *, const char *, size_t); 39 static void encode1(struct termp *, int); 40 static void endline(struct termp *); 41 static void term_field(struct termp *, size_t, size_t); 42 static void term_fill(struct termp *, size_t *, size_t *, 43 size_t); 44 45 46 void 47 term_setcol(struct termp *p, size_t maxtcol) 48 { 49 if (maxtcol > p->maxtcol) { 50 p->tcols = mandoc_recallocarray(p->tcols, 51 p->maxtcol, maxtcol, sizeof(*p->tcols)); 52 p->maxtcol = maxtcol; 53 } 54 p->lasttcol = maxtcol - 1; 55 p->tcol = p->tcols; 56 } 57 58 void 59 term_free(struct termp *p) 60 { 61 term_tab_free(); 62 for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++) 63 free(p->tcol->buf); 64 free(p->tcols); 65 free(p->fontq); 66 free(p); 67 } 68 69 void 70 term_begin(struct termp *p, term_margin head, 71 term_margin foot, const struct roff_meta *arg) 72 { 73 74 p->headf = head; 75 p->footf = foot; 76 p->argf = arg; 77 (*p->begin)(p); 78 } 79 80 void 81 term_end(struct termp *p) 82 { 83 84 (*p->end)(p); 85 } 86 87 /* 88 * Flush a chunk of text. By default, break the output line each time 89 * the right margin is reached, and continue output on the next line 90 * at the same offset as the chunk itself. By default, also break the 91 * output line at the end of the chunk. There are many flags modifying 92 * this behaviour, see the comments in the body of the function. 93 */ 94 void 95 term_flushln(struct termp *p) 96 { 97 /* Widths in basic units. */ 98 size_t vbl; /* Whitespace to prepend to the output. */ 99 size_t vbr; /* Actual visual position of the end of field. */ 100 size_t vfield; /* Desired visual field width. */ 101 size_t vtarget; /* Desired visual position of the right margin. */ 102 103 /* Bytes. */ 104 size_t ic; /* Byte index in the input buffer. */ 105 size_t nbr; /* Number of bytes to print in this field. */ 106 107 /* 108 * Normally, start writing at the left margin, but with the 109 * NOPAD flag, start writing at the current position instead. 110 */ 111 112 vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ? 113 0 : p->tcol->offset - p->viscol; 114 if (p->minbl > 0 && vbl < term_len(p, p->minbl)) 115 vbl = term_len(p, p->minbl); 116 117 if ((p->flags & TERMP_MULTICOL) == 0) 118 p->tcol->col = 0; 119 120 /* Loop over output lines. */ 121 122 for (;;) { 123 vfield = p->tcol->rmargin > p->viscol + vbl ? 124 p->tcol->rmargin - p->viscol - vbl : 0; 125 126 /* 127 * Normally, break the line at the the right margin 128 * of the field, but with the NOBREAK flag, only 129 * break it at the max right margin of the screen, 130 * and with the BRNEVER flag, never break it at all. 131 */ 132 133 vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield : 134 p->maxrmargin > p->viscol + vbl ? 135 p->maxrmargin - p->viscol - vbl : 0; 136 137 /* 138 * Figure out how much text will fit in the field. 139 * If there is whitespace only, print nothing. 140 */ 141 142 term_fill(p, &nbr, &vbr, 143 p->flags & TERMP_BRNEVER ? SIZE_MAX / 2 : vtarget); 144 if (nbr == 0) 145 break; 146 147 /* 148 * With the CENTER or RIGHT flag, increase the indentation 149 * to center the text between the left and right margins 150 * or to adjust it to the right margin, respectively. 151 */ 152 153 if (vbr < vtarget) { 154 if (p->flags & TERMP_CENTER) 155 vbl += (vtarget - vbr) / 2; 156 else if (p->flags & TERMP_RIGHT) 157 vbl += vtarget - vbr; 158 } 159 160 /* Finally, print the field content. */ 161 162 term_field(p, vbl, nbr); 163 if (vbr < vtarget) 164 p->tcol->taboff += vbr; 165 else 166 p->tcol->taboff += vtarget; 167 p->tcol->taboff += term_len(p, 1); 168 169 /* 170 * If there is no text left in the field, exit the loop. 171 * If the BRTRSP flag is set, consider trailing 172 * whitespace significant when deciding whether 173 * the field fits or not. 174 */ 175 176 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) { 177 switch (p->tcol->buf[ic]) { 178 case '\t': 179 if (p->flags & TERMP_BRTRSP) 180 vbr = term_tab_next(vbr); 181 continue; 182 case ' ': 183 if (p->flags & TERMP_BRTRSP) 184 vbr += term_len(p, 1); 185 continue; 186 case '\n': 187 case ASCII_NBRZW: 188 case ASCII_BREAK: 189 case ASCII_TABREF: 190 continue; 191 default: 192 break; 193 } 194 break; 195 } 196 if (ic == p->tcol->lastcol) 197 break; 198 199 /* 200 * At the location of an automatic line break, input 201 * space characters are consumed by the line break. 202 */ 203 204 while (p->tcol->col < p->tcol->lastcol && 205 p->tcol->buf[p->tcol->col] == ' ') 206 p->tcol->col++; 207 208 /* 209 * In multi-column mode, leave the rest of the text 210 * in the buffer to be handled by a subsequent 211 * invocation, such that the other columns of the 212 * table can be handled first. 213 * In single-column mode, simply break the line. 214 */ 215 216 if (p->flags & TERMP_MULTICOL) 217 return; 218 219 endline(p); 220 221 /* 222 * Normally, start the next line at the same indentation 223 * as this one, but with the BRIND flag, start it at the 224 * right margin instead. This is used together with 225 * NOBREAK for the tags in various kinds of tagged lists. 226 */ 227 228 vbl = p->flags & TERMP_BRIND ? 229 p->tcol->rmargin : p->tcol->offset; 230 } 231 232 /* Reset output state in preparation for the next field. */ 233 234 p->col = p->tcol->col = p->tcol->lastcol = 0; 235 p->minbl = p->trailspace; 236 p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD); 237 238 if (p->flags & TERMP_MULTICOL) 239 return; 240 241 /* 242 * The HANG flag means that the next field 243 * always follows on the same line. 244 * The NOBREAK flag means that the next field 245 * follows on the same line unless the field was overrun. 246 * Normally, break the line at the end of each field. 247 */ 248 249 if ((p->flags & TERMP_HANG) == 0 && 250 ((p->flags & TERMP_NOBREAK) == 0 || 251 vbr + term_len(p, p->trailspace) > vfield + term_len(p, 1) / 2)) 252 endline(p); 253 } 254 255 /* 256 * Store the number of input bytes to print in this field in *nbr 257 * and their total visual width in basic units in *vbr. 258 * If there is only whitespace in the field, both remain zero. 259 * The desired visual width of the field is provided by vtarget. 260 * If the first word is longer, the field will be overrun. 261 */ 262 static void 263 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget) 264 { 265 /* Widths in basic units. */ 266 size_t vis; /* Visual position of the current character. */ 267 size_t vn; /* Visual position of the next character. */ 268 size_t enw; /* Width of an EN unit. */ 269 int taboff; /* Temporary offset for literal tabs. */ 270 271 size_t ic; /* Byte index in the input buffer. */ 272 int breakline; /* Break at the end of this word. */ 273 int graph; /* Last character was non-blank. */ 274 275 *nbr = *vbr = vis = 0; 276 breakline = graph = 0; 277 taboff = p->tcol->taboff; 278 enw = (*p->getwidth)(p, ' '); 279 vtarget += enw / 2; 280 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) { 281 switch (p->tcol->buf[ic]) { 282 case '\b': /* Escape \o (overstrike) or backspace markup. */ 283 assert(ic > 0); 284 vis -= (*p->getwidth)(p, p->tcol->buf[ic - 1]); 285 continue; 286 287 case ' ': 288 case ASCII_BREAK: /* Escape \: (breakpoint). */ 289 vn = vis; 290 if (p->tcol->buf[ic] == ' ') 291 vn += enw; 292 /* Can break at the end of a word. */ 293 if (breakline || vn > vtarget) 294 break; 295 if (graph) { 296 *nbr = ic; 297 *vbr = vis; 298 graph = 0; 299 } 300 vis = vn; 301 continue; 302 303 case '\n': /* Escape \p (break at the end of the word). */ 304 breakline = 1; 305 continue; 306 307 case ASCII_HYPH: /* Breakable hyphen. */ 308 graph = 1; 309 /* 310 * We are about to decide whether to break the 311 * line or not, so we no longer need this hyphen 312 * to be marked as breakable. Put back a real 313 * hyphen such that we get the correct width. 314 */ 315 p->tcol->buf[ic] = '-'; 316 vis += (*p->getwidth)(p, '-'); 317 if (vis > vtarget) { 318 ic++; 319 break; 320 } 321 *nbr = ic + 1; 322 *vbr = vis; 323 continue; 324 325 case ASCII_TABREF: 326 taboff = -vis - enw; 327 continue; 328 329 default: 330 switch (p->tcol->buf[ic]) { 331 case '\t': 332 if (taboff < 0 && (size_t)-taboff > vis) 333 vis = 0; 334 else 335 vis += taboff; 336 vis = term_tab_next(vis); 337 vis -= taboff; 338 break; 339 case ASCII_NBRZW: /* Non-breakable zero-width. */ 340 break; 341 case ASCII_NBRSP: /* Non-breakable space. */ 342 p->tcol->buf[ic] = ' '; 343 /* FALLTHROUGH */ 344 default: /* Printable character. */ 345 vis += (*p->getwidth)(p, p->tcol->buf[ic]); 346 break; 347 } 348 graph = 1; 349 if (vis > vtarget && *nbr > 0) 350 return; 351 continue; 352 } 353 break; 354 } 355 356 /* 357 * If the last word extends to the end of the field without any 358 * trailing whitespace, the loop could not check yet whether it 359 * can remain on this line. So do the check now. 360 */ 361 362 if (graph && (vis <= vtarget || *nbr == 0)) { 363 *nbr = ic; 364 *vbr = vis; 365 } 366 } 367 368 /* 369 * Print the contents of one field 370 * with an indentation of vbl basic units 371 * and an input string length of nbr bytes. 372 */ 373 static void 374 term_field(struct termp *p, size_t vbl, size_t nbr) 375 { 376 /* Widths in basic units. */ 377 size_t vis; /* Visual position of the current character. */ 378 size_t vt; /* Visual position including tab offset. */ 379 size_t dv; /* Visual width of the current character. */ 380 int taboff; /* Temporary offset for literal tabs. */ 381 382 size_t ic; /* Byte position in the input buffer. */ 383 384 vis = 0; 385 taboff = p->tcol->taboff; 386 for (ic = p->tcol->col; ic < nbr; ic++) { 387 388 /* 389 * To avoid the printing of trailing whitespace, 390 * do not print whitespace right away, only count it. 391 */ 392 393 switch (p->tcol->buf[ic]) { 394 case '\n': 395 case ASCII_BREAK: 396 case ASCII_NBRZW: 397 continue; 398 case ASCII_TABREF: 399 taboff = -vis - (*p->getwidth)(p, ' '); 400 continue; 401 case '\t': 402 case ' ': 403 case ASCII_NBRSP: 404 if (p->tcol->buf[ic] == '\t') { 405 if (taboff < 0 && (size_t)-taboff > vis) 406 vt = 0; 407 else 408 vt = vis + taboff; 409 dv = term_tab_next(vt) - vt; 410 } else 411 dv = (*p->getwidth)(p, ' '); 412 vbl += dv; 413 vis += dv; 414 continue; 415 default: 416 break; 417 } 418 419 /* 420 * We found a non-blank character to print, 421 * so write preceding white space now. 422 */ 423 424 if (vbl > 0) { 425 (*p->advance)(p, vbl); 426 vbl = 0; 427 } 428 429 /* Print the character and adjust the visual position. */ 430 431 (*p->letter)(p, p->tcol->buf[ic]); 432 if (p->tcol->buf[ic] == '\b') { 433 dv = (*p->getwidth)(p, p->tcol->buf[ic - 1]); 434 p->viscol -= dv; 435 vis -= dv; 436 } else { 437 dv = (*p->getwidth)(p, p->tcol->buf[ic]); 438 p->viscol += dv; 439 vis += dv; 440 } 441 } 442 p->tcol->col = nbr; 443 } 444 445 /* 446 * Print the margin character, if one is configured, 447 * and end the output line. 448 */ 449 static void 450 endline(struct termp *p) 451 { 452 if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) { 453 p->mc = NULL; 454 p->flags &= ~TERMP_ENDMC; 455 } 456 if (p->mc != NULL) { 457 if (p->viscol > 0 && p->viscol <= p->maxrmargin) 458 (*p->advance)(p, 459 p->maxrmargin - p->viscol + term_len(p, 1)); 460 p->flags |= TERMP_NOBUF | TERMP_NOSPACE; 461 term_word(p, p->mc); 462 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC); 463 } 464 (*p->endline)(p); 465 } 466 467 /* 468 * A newline only breaks an existing line; it won't assert vertical 469 * space. All data in the output buffer is flushed prior to the newline 470 * assertion. 471 */ 472 void 473 term_newln(struct termp *p) 474 { 475 p->flags |= TERMP_NOSPACE; 476 if (p->tcol->lastcol || p->viscol) 477 term_flushln(p); 478 p->tcol->taboff = 0; 479 } 480 481 /* 482 * Asserts a vertical space (a full, empty line-break between lines). 483 * Note that if used twice, this will cause two blank spaces and so on. 484 * All data in the output buffer is flushed prior to the newline 485 * assertion. 486 */ 487 void 488 term_vspace(struct termp *p) 489 { 490 491 term_newln(p); 492 if (0 < p->skipvsp) 493 p->skipvsp--; 494 else 495 (*p->endline)(p); 496 } 497 498 /* Swap current and previous font; for \fP and .ft P */ 499 void 500 term_fontlast(struct termp *p) 501 { 502 enum termfont f; 503 504 f = p->fontl; 505 p->fontl = p->fontq[p->fonti]; 506 p->fontq[p->fonti] = f; 507 } 508 509 /* Set font, save current, discard previous; for \f, .ft, and man(7). */ 510 void 511 term_fontrepl(struct termp *p, enum termfont f) 512 { 513 p->fontl = p->fontq[p->fonti]; 514 if (p->fontibi && f == TERMFONT_UNDER) 515 f = TERMFONT_BI; 516 p->fontq[p->fonti] = f; 517 } 518 519 /* Set font, save previous; for mdoc(7), eqn(7), and tbl(7). */ 520 void 521 term_fontpush(struct termp *p, enum termfont f) 522 { 523 enum termfont fl; 524 525 fl = p->fontq[p->fonti]; 526 if (++p->fonti == p->fontsz) { 527 p->fontsz += 8; 528 p->fontq = mandoc_reallocarray(p->fontq, 529 p->fontsz, sizeof(*p->fontq)); 530 } 531 p->fontq[p->fonti] = fl; 532 term_fontrepl(p, f); 533 } 534 535 /* Flush to make the saved pointer current again. */ 536 void 537 term_fontpopq(struct termp *p, int i) 538 { 539 assert(i >= 0); 540 if (p->fonti > i) 541 p->fonti = i; 542 } 543 544 /* Pop one font off the stack. */ 545 void 546 term_fontpop(struct termp *p) 547 { 548 assert(p->fonti > 0); 549 p->fonti--; 550 } 551 552 /* 553 * Handle pwords, partial words, which may be either a single word or a 554 * phrase that cannot be broken down (such as a literal string). This 555 * handles word styling. 556 */ 557 void 558 term_word(struct termp *p, const char *word) 559 { 560 struct roffsu su; 561 const char nbrsp[2] = { ASCII_NBRSP, 0 }; 562 const char *seq; /* Escape sequence argument. */ 563 const char *cp; /* String to be printed. */ 564 size_t csz; /* String length in basic units. */ 565 size_t lsz; /* Line width in basic units. */ 566 size_t ssz; /* Substring length in bytes. */ 567 int sz; /* Argument length in bytes. */ 568 int uc; /* Unicode codepoint number. */ 569 int bu; /* Width in basic units. */ 570 enum mandoc_esc esc; 571 572 if ((p->flags & TERMP_NOBUF) == 0) { 573 if ((p->flags & TERMP_NOSPACE) == 0) { 574 if ((p->flags & TERMP_KEEP) == 0) { 575 bufferc(p, ' '); 576 if (p->flags & TERMP_SENTENCE) 577 bufferc(p, ' '); 578 } else 579 bufferc(p, ASCII_NBRSP); 580 } 581 if (p->flags & TERMP_PREKEEP) 582 p->flags |= TERMP_KEEP; 583 if (p->flags & TERMP_NONOSPACE) 584 p->flags |= TERMP_NOSPACE; 585 else 586 p->flags &= ~TERMP_NOSPACE; 587 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE); 588 p->skipvsp = 0; 589 } 590 591 while ('\0' != *word) { 592 if ('\\' != *word) { 593 if (TERMP_NBRWORD & p->flags) { 594 if (' ' == *word) { 595 encode(p, nbrsp, 1); 596 word++; 597 continue; 598 } 599 ssz = strcspn(word, "\\ "); 600 } else 601 ssz = strcspn(word, "\\"); 602 encode(p, word, ssz); 603 word += (int)ssz; 604 continue; 605 } 606 607 word++; 608 esc = mandoc_escape(&word, &seq, &sz); 609 switch (esc) { 610 case ESCAPE_UNICODE: 611 uc = mchars_num2uc(seq + 1, sz - 1); 612 break; 613 case ESCAPE_NUMBERED: 614 uc = mchars_num2char(seq, sz); 615 if (uc >= 0) 616 break; 617 bufferc(p, ASCII_NBRZW); 618 continue; 619 case ESCAPE_SPECIAL: 620 if (p->enc == TERMENC_ASCII) { 621 cp = mchars_spec2str(seq, sz, &ssz); 622 if (cp != NULL) 623 encode(p, cp, ssz); 624 else 625 bufferc(p, ASCII_NBRZW); 626 } else { 627 uc = mchars_spec2cp(seq, sz); 628 if (uc > 0) 629 encode1(p, uc); 630 else 631 bufferc(p, ASCII_NBRZW); 632 } 633 continue; 634 case ESCAPE_UNDEF: 635 uc = *seq; 636 break; 637 case ESCAPE_FONTBOLD: 638 case ESCAPE_FONTCB: 639 term_fontrepl(p, TERMFONT_BOLD); 640 continue; 641 case ESCAPE_FONTITALIC: 642 case ESCAPE_FONTCI: 643 term_fontrepl(p, TERMFONT_UNDER); 644 continue; 645 case ESCAPE_FONTBI: 646 term_fontrepl(p, TERMFONT_BI); 647 continue; 648 case ESCAPE_FONT: 649 case ESCAPE_FONTCR: 650 case ESCAPE_FONTROMAN: 651 term_fontrepl(p, TERMFONT_NONE); 652 continue; 653 case ESCAPE_FONTPREV: 654 term_fontlast(p); 655 continue; 656 case ESCAPE_BREAK: 657 bufferc(p, '\n'); 658 continue; 659 case ESCAPE_NOSPACE: 660 if (p->flags & TERMP_BACKAFTER) 661 p->flags &= ~TERMP_BACKAFTER; 662 else if (*word == '\0') 663 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE); 664 continue; 665 case ESCAPE_DEVICE: 666 if (p->type == TERMTYPE_PDF) 667 encode(p, "pdf", 3); 668 else if (p->type == TERMTYPE_PS) 669 encode(p, "ps", 2); 670 else if (p->enc == TERMENC_ASCII) 671 encode(p, "ascii", 5); 672 else 673 encode(p, "utf8", 4); 674 continue; 675 case ESCAPE_HORIZ: 676 if (p->flags & TERMP_BACKAFTER) { 677 p->flags &= ~TERMP_BACKAFTER; 678 continue; 679 } 680 if (*seq == '|') { 681 seq++; 682 bu = -term_len(p, p->col); 683 } else 684 bu = 0; 685 if (a2roffsu(seq, &su, SCALE_EM) == NULL) 686 continue; 687 bu += term_hspan(p, &su); 688 if (bu >= 0) { 689 while (bu > 0) { 690 bu -= term_len(p, 1); 691 if (p->flags & TERMP_BACKBEFORE) 692 p->flags &= ~TERMP_BACKBEFORE; 693 else 694 bufferc(p, ASCII_NBRSP); 695 } 696 continue; 697 } 698 if (p->flags & TERMP_BACKBEFORE) { 699 p->flags &= ~TERMP_BACKBEFORE; 700 assert(p->col > 1); 701 p->col--; 702 } 703 if (term_len(p, p->col) >= (size_t)(-bu)) { 704 p->col -= -bu / term_len(p, 1); 705 } else { 706 bu += term_len(p, p->col); 707 p->col = 0; 708 if (p->tcol->offset > (size_t)(-bu)) { 709 p->ti += bu; 710 p->tcol->offset += bu; 711 } else { 712 p->ti -= p->tcol->offset; 713 p->tcol->offset = 0; 714 } 715 } 716 continue; 717 case ESCAPE_HLINE: 718 if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL) 719 continue; 720 bu = term_hspan(p, &su); 721 if (bu <= 0) { 722 if (p->tcol->rmargin <= p->tcol->offset) 723 continue; 724 lsz = p->tcol->rmargin - p->tcol->offset; 725 } else 726 lsz = bu; 727 if (*cp == seq[-1]) 728 uc = -1; 729 else if (*cp == '\\') { 730 seq = cp + 1; 731 esc = mandoc_escape(&seq, &cp, &sz); 732 switch (esc) { 733 case ESCAPE_UNICODE: 734 uc = mchars_num2uc(cp + 1, sz - 1); 735 break; 736 case ESCAPE_NUMBERED: 737 uc = mchars_num2char(cp, sz); 738 break; 739 case ESCAPE_SPECIAL: 740 uc = mchars_spec2cp(cp, sz); 741 break; 742 case ESCAPE_UNDEF: 743 uc = *seq; 744 break; 745 default: 746 uc = -1; 747 break; 748 } 749 } else 750 uc = *cp; 751 if (uc < 0x20 || (uc > 0x7E && uc < 0xA0)) 752 uc = '_'; 753 if (p->enc == TERMENC_ASCII) { 754 cp = ascii_uc2str(uc); 755 csz = term_strlen(p, cp); 756 ssz = strlen(cp); 757 } else 758 csz = (*p->getwidth)(p, uc); 759 while (lsz > 0) { 760 if (p->enc == TERMENC_ASCII) 761 encode(p, cp, ssz); 762 else 763 encode1(p, uc); 764 if (lsz > csz) 765 lsz -= csz; 766 else 767 lsz = 0; 768 } 769 continue; 770 case ESCAPE_SKIPCHAR: 771 p->flags |= TERMP_BACKAFTER; 772 continue; 773 case ESCAPE_OVERSTRIKE: 774 cp = seq + sz; 775 while (seq < cp) { 776 if (*seq == '\\') { 777 mandoc_escape(&seq, NULL, NULL); 778 continue; 779 } 780 encode1(p, *seq++); 781 if (seq < cp) { 782 if (p->flags & TERMP_BACKBEFORE) 783 p->flags |= TERMP_BACKAFTER; 784 else 785 p->flags |= TERMP_BACKBEFORE; 786 } 787 } 788 /* Trim trailing backspace/blank pair. */ 789 if (p->tcol->lastcol > 2 && 790 (p->tcol->buf[p->tcol->lastcol - 1] == ' ' || 791 p->tcol->buf[p->tcol->lastcol - 1] == '\t')) 792 p->tcol->lastcol -= 2; 793 if (p->col > p->tcol->lastcol) 794 p->col = p->tcol->lastcol; 795 continue; 796 case ESCAPE_IGNORE: 797 bufferc(p, ASCII_NBRZW); 798 continue; 799 default: 800 continue; 801 } 802 803 /* 804 * Common handling for Unicode and numbered 805 * character escape sequences. 806 */ 807 808 if (p->enc == TERMENC_ASCII) { 809 cp = ascii_uc2str(uc); 810 encode(p, cp, strlen(cp)); 811 } else { 812 if ((uc < 0x20 && uc != 0x09) || 813 (uc > 0x7E && uc < 0xA0)) 814 uc = 0xFFFD; 815 encode1(p, uc); 816 } 817 } 818 p->flags &= ~TERMP_NBRWORD; 819 } 820 821 static void 822 adjbuf(struct termp_col *c, size_t sz) 823 { 824 if (c->maxcols == 0) 825 c->maxcols = 1024; 826 while (c->maxcols <= sz) 827 c->maxcols <<= 2; 828 c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf)); 829 } 830 831 static void 832 bufferc(struct termp *p, char c) 833 { 834 if (p->flags & TERMP_NOBUF) { 835 (*p->letter)(p, c); 836 return; 837 } 838 if (p->col + 1 >= p->tcol->maxcols) 839 adjbuf(p->tcol, p->col + 1); 840 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) 841 p->tcol->buf[p->col] = c; 842 if (p->tcol->lastcol < ++p->col) 843 p->tcol->lastcol = p->col; 844 } 845 846 void 847 term_tab_ref(struct termp *p) 848 { 849 if (p->tcol->lastcol && p->tcol->lastcol <= p->col && 850 (p->flags & TERMP_NOBUF) == 0) 851 bufferc(p, ASCII_TABREF); 852 } 853 854 /* 855 * See encode(). 856 * Do this for a single (probably unicode) value. 857 * Does not check for non-decorated glyphs. 858 */ 859 static void 860 encode1(struct termp *p, int c) 861 { 862 enum termfont f; 863 864 if (p->flags & TERMP_NOBUF) { 865 (*p->letter)(p, c); 866 return; 867 } 868 869 if (p->col + 7 >= p->tcol->maxcols) 870 adjbuf(p->tcol, p->col + 7); 871 872 f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ? 873 p->fontq[p->fonti] : TERMFONT_NONE; 874 875 if (p->flags & TERMP_BACKBEFORE) { 876 if (p->tcol->buf[p->col - 1] == ' ' || 877 p->tcol->buf[p->col - 1] == '\t') 878 p->col--; 879 else 880 p->tcol->buf[p->col++] = '\b'; 881 p->flags &= ~TERMP_BACKBEFORE; 882 } 883 if (f == TERMFONT_UNDER || f == TERMFONT_BI) { 884 p->tcol->buf[p->col++] = '_'; 885 p->tcol->buf[p->col++] = '\b'; 886 } 887 if (f == TERMFONT_BOLD || f == TERMFONT_BI) { 888 if (c == ASCII_HYPH) 889 p->tcol->buf[p->col++] = '-'; 890 else 891 p->tcol->buf[p->col++] = c; 892 p->tcol->buf[p->col++] = '\b'; 893 } 894 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP)) 895 p->tcol->buf[p->col] = c; 896 if (p->tcol->lastcol < ++p->col) 897 p->tcol->lastcol = p->col; 898 if (p->flags & TERMP_BACKAFTER) { 899 p->flags |= TERMP_BACKBEFORE; 900 p->flags &= ~TERMP_BACKAFTER; 901 } 902 } 903 904 static void 905 encode(struct termp *p, const char *word, size_t sz) 906 { 907 size_t i; 908 909 if (p->flags & TERMP_NOBUF) { 910 for (i = 0; i < sz; i++) 911 (*p->letter)(p, word[i]); 912 return; 913 } 914 915 if (p->col + 2 + (sz * 5) >= p->tcol->maxcols) 916 adjbuf(p->tcol, p->col + 2 + (sz * 5)); 917 918 for (i = 0; i < sz; i++) { 919 if (ASCII_HYPH == word[i] || 920 isgraph((unsigned char)word[i])) 921 encode1(p, word[i]); 922 else { 923 if (p->tcol->lastcol <= p->col || 924 (word[i] != ' ' && word[i] != ASCII_NBRSP)) 925 p->tcol->buf[p->col] = word[i]; 926 p->col++; 927 928 /* 929 * Postpone the effect of \z while handling 930 * an overstrike sequence from ascii_uc2str(). 931 */ 932 933 if (word[i] == '\b' && 934 (p->flags & TERMP_BACKBEFORE)) { 935 p->flags &= ~TERMP_BACKBEFORE; 936 p->flags |= TERMP_BACKAFTER; 937 } 938 } 939 } 940 if (p->tcol->lastcol < p->col) 941 p->tcol->lastcol = p->col; 942 } 943 944 void 945 term_setwidth(struct termp *p, const char *wstr) 946 { 947 struct roffsu su; 948 int iop, width; 949 950 iop = 0; 951 width = 0; 952 if (NULL != wstr) { 953 switch (*wstr) { 954 case '+': 955 iop = 1; 956 wstr++; 957 break; 958 case '-': 959 iop = -1; 960 wstr++; 961 break; 962 default: 963 break; 964 } 965 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL) 966 width = term_hspan(p, &su); 967 else 968 iop = 0; 969 } 970 (*p->setwidth)(p, iop, width); 971 } 972 973 size_t 974 term_len(const struct termp *p, size_t sz) 975 { 976 return (*p->getwidth)(p, ' ') * sz; 977 } 978 979 static size_t 980 cond_width(const struct termp *p, int c, int *skip) 981 { 982 if (*skip) { 983 (*skip) = 0; 984 return 0; 985 } else 986 return (*p->getwidth)(p, c); 987 } 988 989 size_t 990 term_strlen(const struct termp *p, const char *cp) 991 { 992 const char *seq; /* Escape sequence argument. */ 993 const char *rhs; /* String to be printed. */ 994 995 /* Widths in basic units. */ 996 size_t sz; /* Return value. */ 997 size_t this_sz; /* Individual char for overstrike. */ 998 size_t max_sz; /* Result of overstrike. */ 999 1000 /* Numbers of bytes. */ 1001 size_t rsz; /* Substring length in bytes. */ 1002 size_t i; /* Byte index in substring. */ 1003 int ssz; /* Argument length in bytes. */ 1004 int skip; /* Number of bytes to skip. */ 1005 1006 int uc; /* Unicode codepoint number. */ 1007 enum mandoc_esc esc; 1008 1009 static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW, 1010 ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' }; 1011 1012 /* 1013 * Account for escaped sequences within string length 1014 * calculations. This follows the logic in term_word() as we 1015 * must calculate the width of produced strings. 1016 */ 1017 1018 sz = 0; 1019 skip = 0; 1020 while ('\0' != *cp) { 1021 rsz = strcspn(cp, rej); 1022 for (i = 0; i < rsz; i++) 1023 sz += cond_width(p, *cp++, &skip); 1024 1025 switch (*cp) { 1026 case '\\': 1027 cp++; 1028 rhs = NULL; 1029 esc = mandoc_escape(&cp, &seq, &ssz); 1030 switch (esc) { 1031 case ESCAPE_UNICODE: 1032 uc = mchars_num2uc(seq + 1, ssz - 1); 1033 break; 1034 case ESCAPE_NUMBERED: 1035 uc = mchars_num2char(seq, ssz); 1036 if (uc < 0) 1037 continue; 1038 break; 1039 case ESCAPE_SPECIAL: 1040 if (p->enc == TERMENC_ASCII) { 1041 rhs = mchars_spec2str(seq, ssz, &rsz); 1042 if (rhs != NULL) 1043 break; 1044 } else { 1045 uc = mchars_spec2cp(seq, ssz); 1046 if (uc > 0) 1047 sz += cond_width(p, uc, &skip); 1048 } 1049 continue; 1050 case ESCAPE_UNDEF: 1051 uc = *seq; 1052 break; 1053 case ESCAPE_DEVICE: 1054 if (p->type == TERMTYPE_PDF) { 1055 rhs = "pdf"; 1056 rsz = 3; 1057 } else if (p->type == TERMTYPE_PS) { 1058 rhs = "ps"; 1059 rsz = 2; 1060 } else if (p->enc == TERMENC_ASCII) { 1061 rhs = "ascii"; 1062 rsz = 5; 1063 } else { 1064 rhs = "utf8"; 1065 rsz = 4; 1066 } 1067 break; 1068 case ESCAPE_SKIPCHAR: 1069 skip = 1; 1070 continue; 1071 case ESCAPE_OVERSTRIKE: 1072 max_sz = 0; 1073 rhs = seq + ssz; 1074 while (seq < rhs) { 1075 if (*seq == '\\') { 1076 mandoc_escape(&seq, NULL, NULL); 1077 continue; 1078 } 1079 this_sz = (*p->getwidth)(p, *seq++); 1080 if (max_sz < this_sz) 1081 max_sz = this_sz; 1082 } 1083 sz += max_sz; 1084 continue; 1085 default: 1086 continue; 1087 } 1088 1089 /* 1090 * Common handling for Unicode and numbered 1091 * character escape sequences. 1092 */ 1093 1094 if (rhs == NULL) { 1095 if (p->enc == TERMENC_ASCII) { 1096 rhs = ascii_uc2str(uc); 1097 rsz = strlen(rhs); 1098 } else { 1099 if ((uc < 0x20 && uc != 0x09) || 1100 (uc > 0x7E && uc < 0xA0)) 1101 uc = 0xFFFD; 1102 sz += cond_width(p, uc, &skip); 1103 continue; 1104 } 1105 } 1106 1107 if (skip) { 1108 skip = 0; 1109 break; 1110 } 1111 1112 /* 1113 * Common handling for all escape sequences 1114 * printing more than one character. 1115 */ 1116 1117 for (i = 0; i < rsz; i++) 1118 sz += (*p->getwidth)(p, *rhs++); 1119 break; 1120 case ASCII_NBRSP: 1121 sz += cond_width(p, ' ', &skip); 1122 cp++; 1123 break; 1124 case ASCII_HYPH: 1125 sz += cond_width(p, '-', &skip); 1126 cp++; 1127 break; 1128 default: 1129 break; 1130 } 1131 } 1132 1133 return sz; 1134 } 1135 1136 int 1137 term_vspan(const struct termp *p, const struct roffsu *su) 1138 { 1139 double r; 1140 int ri; 1141 1142 switch (su->unit) { 1143 case SCALE_BU: 1144 r = su->scale / 40.0; 1145 break; 1146 case SCALE_CM: 1147 r = su->scale * 6.0 / 2.54; 1148 break; 1149 case SCALE_FS: 1150 r = su->scale * 65536.0 / 40.0; 1151 break; 1152 case SCALE_IN: 1153 r = su->scale * 6.0; 1154 break; 1155 case SCALE_MM: 1156 r = su->scale * 0.006; 1157 break; 1158 case SCALE_PC: 1159 r = su->scale; 1160 break; 1161 case SCALE_PT: 1162 r = su->scale / 12.0; 1163 break; 1164 case SCALE_EN: 1165 case SCALE_EM: 1166 r = su->scale * 0.6; 1167 break; 1168 case SCALE_VS: 1169 r = su->scale; 1170 break; 1171 default: 1172 abort(); 1173 } 1174 ri = r > 0.0 ? r + 0.4995 : r - 0.4995; 1175 return ri < 66 ? ri : 1; 1176 } 1177 1178 /* 1179 * Convert a scaling width to basic units. 1180 */ 1181 int 1182 term_hspan(const struct termp *p, const struct roffsu *su) 1183 { 1184 return (*p->hspan)(p, su); 1185 } 1186