1 /* $Id: term.c,v 1.291 2023/04/28 19:11:04 schwarze Exp $ */
2 /*
3 * Copyright (c) 2010-2022 Ingo Schwarze <schwarze@openbsd.org>
4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "out.h"
32 #include "term.h"
33 #include "main.h"
34
35 static size_t cond_width(const struct termp *, int, int *);
36 static void adjbuf(struct termp_col *, size_t);
37 static void bufferc(struct termp *, char);
38 static void encode(struct termp *, const char *, size_t);
39 static void encode1(struct termp *, int);
40 static void endline(struct termp *);
41 static void term_field(struct termp *, size_t, size_t);
42 static void term_fill(struct termp *, size_t *, size_t *,
43 size_t);
44
45
46 void
term_setcol(struct termp * p,size_t maxtcol)47 term_setcol(struct termp *p, size_t maxtcol)
48 {
49 if (maxtcol > p->maxtcol) {
50 p->tcols = mandoc_recallocarray(p->tcols,
51 p->maxtcol, maxtcol, sizeof(*p->tcols));
52 p->maxtcol = maxtcol;
53 }
54 p->lasttcol = maxtcol - 1;
55 p->tcol = p->tcols;
56 }
57
58 void
term_free(struct termp * p)59 term_free(struct termp *p)
60 {
61 term_tab_free();
62 for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
63 free(p->tcol->buf);
64 free(p->tcols);
65 free(p->fontq);
66 free(p);
67 }
68
69 void
term_begin(struct termp * p,term_margin head,term_margin foot,const struct roff_meta * arg)70 term_begin(struct termp *p, term_margin head,
71 term_margin foot, const struct roff_meta *arg)
72 {
73
74 p->headf = head;
75 p->footf = foot;
76 p->argf = arg;
77 (*p->begin)(p);
78 }
79
80 void
term_end(struct termp * p)81 term_end(struct termp *p)
82 {
83
84 (*p->end)(p);
85 }
86
87 /*
88 * Flush a chunk of text. By default, break the output line each time
89 * the right margin is reached, and continue output on the next line
90 * at the same offset as the chunk itself. By default, also break the
91 * output line at the end of the chunk. There are many flags modifying
92 * this behaviour, see the comments in the body of the function.
93 */
94 void
term_flushln(struct termp * p)95 term_flushln(struct termp *p)
96 {
97 size_t vbl; /* Number of blanks to prepend to the output. */
98 size_t vbr; /* Actual visual position of the end of field. */
99 size_t vfield; /* Desired visual field width. */
100 size_t vtarget; /* Desired visual position of the right margin. */
101 size_t ic; /* Character position in the input buffer. */
102 size_t nbr; /* Number of characters to print in this field. */
103
104 /*
105 * Normally, start writing at the left margin, but with the
106 * NOPAD flag, start writing at the current position instead.
107 */
108
109 vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
110 0 : p->tcol->offset - p->viscol;
111 if (p->minbl && vbl < p->minbl)
112 vbl = p->minbl;
113
114 if ((p->flags & TERMP_MULTICOL) == 0)
115 p->tcol->col = 0;
116
117 /* Loop over output lines. */
118
119 for (;;) {
120 vfield = p->tcol->rmargin > p->viscol + vbl ?
121 p->tcol->rmargin - p->viscol - vbl : 0;
122
123 /*
124 * Normally, break the line at the the right margin
125 * of the field, but with the NOBREAK flag, only
126 * break it at the max right margin of the screen,
127 * and with the BRNEVER flag, never break it at all.
128 */
129
130 vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield :
131 p->maxrmargin > p->viscol + vbl ?
132 p->maxrmargin - p->viscol - vbl : 0;
133
134 /*
135 * Figure out how much text will fit in the field.
136 * If there is whitespace only, print nothing.
137 */
138
139 term_fill(p, &nbr, &vbr,
140 p->flags & TERMP_BRNEVER ? SIZE_MAX : vtarget);
141 if (nbr == 0)
142 break;
143
144 /*
145 * With the CENTER or RIGHT flag, increase the indentation
146 * to center the text between the left and right margins
147 * or to adjust it to the right margin, respectively.
148 */
149
150 if (vbr < vtarget) {
151 if (p->flags & TERMP_CENTER)
152 vbl += (vtarget - vbr) / 2;
153 else if (p->flags & TERMP_RIGHT)
154 vbl += vtarget - vbr;
155 }
156
157 /* Finally, print the field content. */
158
159 term_field(p, vbl, nbr);
160 if (vbr < vtarget)
161 p->tcol->taboff += vbr;
162 else
163 p->tcol->taboff += vtarget;
164 p->tcol->taboff += (*p->width)(p, ' ');
165
166 /*
167 * If there is no text left in the field, exit the loop.
168 * If the BRTRSP flag is set, consider trailing
169 * whitespace significant when deciding whether
170 * the field fits or not.
171 */
172
173 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
174 switch (p->tcol->buf[ic]) {
175 case '\t':
176 if (p->flags & TERMP_BRTRSP)
177 vbr = term_tab_next(vbr);
178 continue;
179 case ' ':
180 if (p->flags & TERMP_BRTRSP)
181 vbr += (*p->width)(p, ' ');
182 continue;
183 case '\n':
184 case ASCII_NBRZW:
185 case ASCII_BREAK:
186 case ASCII_TABREF:
187 continue;
188 default:
189 break;
190 }
191 break;
192 }
193 if (ic == p->tcol->lastcol)
194 break;
195
196 /*
197 * At the location of an automatic line break, input
198 * space characters are consumed by the line break.
199 */
200
201 while (p->tcol->col < p->tcol->lastcol &&
202 p->tcol->buf[p->tcol->col] == ' ')
203 p->tcol->col++;
204
205 /*
206 * In multi-column mode, leave the rest of the text
207 * in the buffer to be handled by a subsequent
208 * invocation, such that the other columns of the
209 * table can be handled first.
210 * In single-column mode, simply break the line.
211 */
212
213 if (p->flags & TERMP_MULTICOL)
214 return;
215
216 endline(p);
217
218 /*
219 * Normally, start the next line at the same indentation
220 * as this one, but with the BRIND flag, start it at the
221 * right margin instead. This is used together with
222 * NOBREAK for the tags in various kinds of tagged lists.
223 */
224
225 vbl = p->flags & TERMP_BRIND ?
226 p->tcol->rmargin : p->tcol->offset;
227 }
228
229 /* Reset output state in preparation for the next field. */
230
231 p->col = p->tcol->col = p->tcol->lastcol = 0;
232 p->minbl = p->trailspace;
233 p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
234
235 if (p->flags & TERMP_MULTICOL)
236 return;
237
238 /*
239 * The HANG flag means that the next field
240 * always follows on the same line.
241 * The NOBREAK flag means that the next field
242 * follows on the same line unless the field was overrun.
243 * Normally, break the line at the end of each field.
244 */
245
246 if ((p->flags & TERMP_HANG) == 0 &&
247 ((p->flags & TERMP_NOBREAK) == 0 ||
248 vbr + term_len(p, p->trailspace) > vfield))
249 endline(p);
250 }
251
252 /*
253 * Store the number of input characters to print in this field in *nbr
254 * and their total visual width to print in *vbr.
255 * If there is only whitespace in the field, both remain zero.
256 * The desired visual width of the field is provided by vtarget.
257 * If the first word is longer, the field will be overrun.
258 */
259 static void
term_fill(struct termp * p,size_t * nbr,size_t * vbr,size_t vtarget)260 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
261 {
262 size_t ic; /* Character position in the input buffer. */
263 size_t vis; /* Visual position of the current character. */
264 size_t vn; /* Visual position of the next character. */
265 int breakline; /* Break at the end of this word. */
266 int graph; /* Last character was non-blank. */
267 int taboff; /* Temporary offset for literal tabs. */
268
269 *nbr = *vbr = vis = 0;
270 breakline = graph = 0;
271 taboff = p->tcol->taboff;
272 for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
273 switch (p->tcol->buf[ic]) {
274 case '\b': /* Escape \o (overstrike) or backspace markup. */
275 assert(ic > 0);
276 vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
277 continue;
278
279 case ' ':
280 case ASCII_BREAK: /* Escape \: (breakpoint). */
281 vn = vis;
282 if (p->tcol->buf[ic] == ' ')
283 vn += (*p->width)(p, ' ');
284 /* Can break at the end of a word. */
285 if (breakline || vn > vtarget)
286 break;
287 if (graph) {
288 *nbr = ic;
289 *vbr = vis;
290 graph = 0;
291 }
292 vis = vn;
293 continue;
294
295 case '\n': /* Escape \p (break at the end of the word). */
296 breakline = 1;
297 continue;
298
299 case ASCII_HYPH: /* Breakable hyphen. */
300 graph = 1;
301 /*
302 * We are about to decide whether to break the
303 * line or not, so we no longer need this hyphen
304 * to be marked as breakable. Put back a real
305 * hyphen such that we get the correct width.
306 */
307 p->tcol->buf[ic] = '-';
308 vis += (*p->width)(p, '-');
309 if (vis > vtarget) {
310 ic++;
311 break;
312 }
313 *nbr = ic + 1;
314 *vbr = vis;
315 continue;
316
317 case ASCII_TABREF:
318 taboff = -vis - (*p->width)(p, ' ');
319 continue;
320
321 default:
322 switch (p->tcol->buf[ic]) {
323 case '\t':
324 if (taboff < 0 && (size_t)-taboff > vis)
325 vis = 0;
326 else
327 vis += taboff;
328 vis = term_tab_next(vis);
329 vis -= taboff;
330 break;
331 case ASCII_NBRZW: /* Non-breakable zero-width. */
332 break;
333 case ASCII_NBRSP: /* Non-breakable space. */
334 p->tcol->buf[ic] = ' ';
335 /* FALLTHROUGH */
336 default: /* Printable character. */
337 vis += (*p->width)(p, p->tcol->buf[ic]);
338 break;
339 }
340 graph = 1;
341 if (vis > vtarget && *nbr > 0)
342 return;
343 continue;
344 }
345 break;
346 }
347
348 /*
349 * If the last word extends to the end of the field without any
350 * trailing whitespace, the loop could not check yet whether it
351 * can remain on this line. So do the check now.
352 */
353
354 if (graph && (vis <= vtarget || *nbr == 0)) {
355 *nbr = ic;
356 *vbr = vis;
357 }
358 }
359
360 /*
361 * Print the contents of one field
362 * with an indentation of vbl visual columns,
363 * and an input string length of nbr characters.
364 */
365 static void
term_field(struct termp * p,size_t vbl,size_t nbr)366 term_field(struct termp *p, size_t vbl, size_t nbr)
367 {
368 size_t ic; /* Character position in the input buffer. */
369 size_t vis; /* Visual position of the current character. */
370 size_t vt; /* Visual position including tab offset. */
371 size_t dv; /* Visual width of the current character. */
372 int taboff; /* Temporary offset for literal tabs. */
373
374 vis = 0;
375 taboff = p->tcol->taboff;
376 for (ic = p->tcol->col; ic < nbr; ic++) {
377
378 /*
379 * To avoid the printing of trailing whitespace,
380 * do not print whitespace right away, only count it.
381 */
382
383 switch (p->tcol->buf[ic]) {
384 case '\n':
385 case ASCII_BREAK:
386 case ASCII_NBRZW:
387 continue;
388 case ASCII_TABREF:
389 taboff = -vis - (*p->width)(p, ' ');
390 continue;
391 case '\t':
392 case ' ':
393 case ASCII_NBRSP:
394 if (p->tcol->buf[ic] == '\t') {
395 if (taboff < 0 && (size_t)-taboff > vis)
396 vt = 0;
397 else
398 vt = vis + taboff;
399 dv = term_tab_next(vt) - vt;
400 } else
401 dv = (*p->width)(p, ' ');
402 vbl += dv;
403 vis += dv;
404 continue;
405 default:
406 break;
407 }
408
409 /*
410 * We found a non-blank character to print,
411 * so write preceding white space now.
412 */
413
414 if (vbl > 0) {
415 (*p->advance)(p, vbl);
416 p->viscol += vbl;
417 vbl = 0;
418 }
419
420 /* Print the character and adjust the visual position. */
421
422 (*p->letter)(p, p->tcol->buf[ic]);
423 if (p->tcol->buf[ic] == '\b') {
424 dv = (*p->width)(p, p->tcol->buf[ic - 1]);
425 p->viscol -= dv;
426 vis -= dv;
427 } else {
428 dv = (*p->width)(p, p->tcol->buf[ic]);
429 p->viscol += dv;
430 vis += dv;
431 }
432 }
433 p->tcol->col = nbr;
434 }
435
436 static void
endline(struct termp * p)437 endline(struct termp *p)
438 {
439 if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
440 p->mc = NULL;
441 p->flags &= ~TERMP_ENDMC;
442 }
443 if (p->mc != NULL) {
444 if (p->viscol && p->maxrmargin >= p->viscol)
445 (*p->advance)(p, p->maxrmargin - p->viscol + 1);
446 p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
447 term_word(p, p->mc);
448 p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
449 }
450 p->viscol = 0;
451 p->minbl = 0;
452 (*p->endline)(p);
453 }
454
455 /*
456 * A newline only breaks an existing line; it won't assert vertical
457 * space. All data in the output buffer is flushed prior to the newline
458 * assertion.
459 */
460 void
term_newln(struct termp * p)461 term_newln(struct termp *p)
462 {
463 p->flags |= TERMP_NOSPACE;
464 if (p->tcol->lastcol || p->viscol)
465 term_flushln(p);
466 p->tcol->taboff = 0;
467 }
468
469 /*
470 * Asserts a vertical space (a full, empty line-break between lines).
471 * Note that if used twice, this will cause two blank spaces and so on.
472 * All data in the output buffer is flushed prior to the newline
473 * assertion.
474 */
475 void
term_vspace(struct termp * p)476 term_vspace(struct termp *p)
477 {
478
479 term_newln(p);
480 p->viscol = 0;
481 p->minbl = 0;
482 if (0 < p->skipvsp)
483 p->skipvsp--;
484 else
485 (*p->endline)(p);
486 }
487
488 /* Swap current and previous font; for \fP and .ft P */
489 void
term_fontlast(struct termp * p)490 term_fontlast(struct termp *p)
491 {
492 enum termfont f;
493
494 f = p->fontl;
495 p->fontl = p->fontq[p->fonti];
496 p->fontq[p->fonti] = f;
497 }
498
499 /* Set font, save current, discard previous; for \f, .ft, .B etc. */
500 void
term_fontrepl(struct termp * p,enum termfont f)501 term_fontrepl(struct termp *p, enum termfont f)
502 {
503
504 p->fontl = p->fontq[p->fonti];
505 p->fontq[p->fonti] = f;
506 }
507
508 /* Set font, save previous. */
509 void
term_fontpush(struct termp * p,enum termfont f)510 term_fontpush(struct termp *p, enum termfont f)
511 {
512
513 p->fontl = p->fontq[p->fonti];
514 if (++p->fonti == p->fontsz) {
515 p->fontsz += 8;
516 p->fontq = mandoc_reallocarray(p->fontq,
517 p->fontsz, sizeof(*p->fontq));
518 }
519 p->fontq[p->fonti] = f;
520 }
521
522 /* Flush to make the saved pointer current again. */
523 void
term_fontpopq(struct termp * p,int i)524 term_fontpopq(struct termp *p, int i)
525 {
526
527 assert(i >= 0);
528 if (p->fonti > i)
529 p->fonti = i;
530 }
531
532 /* Pop one font off the stack. */
533 void
term_fontpop(struct termp * p)534 term_fontpop(struct termp *p)
535 {
536
537 assert(p->fonti);
538 p->fonti--;
539 }
540
541 /*
542 * Handle pwords, partial words, which may be either a single word or a
543 * phrase that cannot be broken down (such as a literal string). This
544 * handles word styling.
545 */
546 void
term_word(struct termp * p,const char * word)547 term_word(struct termp *p, const char *word)
548 {
549 struct roffsu su;
550 const char nbrsp[2] = { ASCII_NBRSP, 0 };
551 const char *seq, *cp;
552 int sz, uc;
553 size_t csz, lsz, ssz;
554 enum mandoc_esc esc;
555
556 if ((p->flags & TERMP_NOBUF) == 0) {
557 if ((p->flags & TERMP_NOSPACE) == 0) {
558 if ((p->flags & TERMP_KEEP) == 0) {
559 bufferc(p, ' ');
560 if (p->flags & TERMP_SENTENCE)
561 bufferc(p, ' ');
562 } else
563 bufferc(p, ASCII_NBRSP);
564 }
565 if (p->flags & TERMP_PREKEEP)
566 p->flags |= TERMP_KEEP;
567 if (p->flags & TERMP_NONOSPACE)
568 p->flags |= TERMP_NOSPACE;
569 else
570 p->flags &= ~TERMP_NOSPACE;
571 p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
572 p->skipvsp = 0;
573 }
574
575 while ('\0' != *word) {
576 if ('\\' != *word) {
577 if (TERMP_NBRWORD & p->flags) {
578 if (' ' == *word) {
579 encode(p, nbrsp, 1);
580 word++;
581 continue;
582 }
583 ssz = strcspn(word, "\\ ");
584 } else
585 ssz = strcspn(word, "\\");
586 encode(p, word, ssz);
587 word += (int)ssz;
588 continue;
589 }
590
591 word++;
592 esc = mandoc_escape(&word, &seq, &sz);
593 switch (esc) {
594 case ESCAPE_UNICODE:
595 uc = mchars_num2uc(seq + 1, sz - 1);
596 break;
597 case ESCAPE_NUMBERED:
598 uc = mchars_num2char(seq, sz);
599 if (uc >= 0)
600 break;
601 bufferc(p, ASCII_NBRZW);
602 continue;
603 case ESCAPE_SPECIAL:
604 if (p->enc == TERMENC_ASCII) {
605 cp = mchars_spec2str(seq, sz, &ssz);
606 if (cp != NULL)
607 encode(p, cp, ssz);
608 else
609 bufferc(p, ASCII_NBRZW);
610 } else {
611 uc = mchars_spec2cp(seq, sz);
612 if (uc > 0)
613 encode1(p, uc);
614 else
615 bufferc(p, ASCII_NBRZW);
616 }
617 continue;
618 case ESCAPE_UNDEF:
619 uc = *seq;
620 break;
621 case ESCAPE_FONTBOLD:
622 case ESCAPE_FONTCB:
623 term_fontrepl(p, TERMFONT_BOLD);
624 continue;
625 case ESCAPE_FONTITALIC:
626 case ESCAPE_FONTCI:
627 term_fontrepl(p, TERMFONT_UNDER);
628 continue;
629 case ESCAPE_FONTBI:
630 term_fontrepl(p, TERMFONT_BI);
631 continue;
632 case ESCAPE_FONT:
633 case ESCAPE_FONTCR:
634 case ESCAPE_FONTROMAN:
635 term_fontrepl(p, TERMFONT_NONE);
636 continue;
637 case ESCAPE_FONTPREV:
638 term_fontlast(p);
639 continue;
640 case ESCAPE_BREAK:
641 bufferc(p, '\n');
642 continue;
643 case ESCAPE_NOSPACE:
644 if (p->flags & TERMP_BACKAFTER)
645 p->flags &= ~TERMP_BACKAFTER;
646 else if (*word == '\0')
647 p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
648 continue;
649 case ESCAPE_DEVICE:
650 if (p->type == TERMTYPE_PDF)
651 encode(p, "pdf", 3);
652 else if (p->type == TERMTYPE_PS)
653 encode(p, "ps", 2);
654 else if (p->enc == TERMENC_ASCII)
655 encode(p, "ascii", 5);
656 else
657 encode(p, "utf8", 4);
658 continue;
659 case ESCAPE_HORIZ:
660 if (p->flags & TERMP_BACKAFTER) {
661 p->flags &= ~TERMP_BACKAFTER;
662 continue;
663 }
664 if (*seq == '|') {
665 seq++;
666 uc = -p->col;
667 } else
668 uc = 0;
669 if (a2roffsu(seq, &su, SCALE_EM) == NULL)
670 continue;
671 uc += term_hen(p, &su);
672 if (uc >= 0) {
673 while (uc > 0) {
674 uc -= term_len(p, 1);
675 if (p->flags & TERMP_BACKBEFORE)
676 p->flags &= ~TERMP_BACKBEFORE;
677 else
678 bufferc(p, ASCII_NBRSP);
679 }
680 continue;
681 }
682 if (p->flags & TERMP_BACKBEFORE) {
683 p->flags &= ~TERMP_BACKBEFORE;
684 assert(p->col > 0);
685 p->col--;
686 }
687 if (p->col >= (size_t)(-uc)) {
688 p->col += uc;
689 } else {
690 uc += p->col;
691 p->col = 0;
692 if (p->tcol->offset > (size_t)(-uc)) {
693 p->ti += uc;
694 p->tcol->offset += uc;
695 } else {
696 p->ti -= p->tcol->offset;
697 p->tcol->offset = 0;
698 }
699 }
700 continue;
701 case ESCAPE_HLINE:
702 if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
703 continue;
704 uc = term_hen(p, &su);
705 if (uc <= 0) {
706 if (p->tcol->rmargin <= p->tcol->offset)
707 continue;
708 lsz = p->tcol->rmargin - p->tcol->offset;
709 } else
710 lsz = uc;
711 if (*cp == seq[-1])
712 uc = -1;
713 else if (*cp == '\\') {
714 seq = cp + 1;
715 esc = mandoc_escape(&seq, &cp, &sz);
716 switch (esc) {
717 case ESCAPE_UNICODE:
718 uc = mchars_num2uc(cp + 1, sz - 1);
719 break;
720 case ESCAPE_NUMBERED:
721 uc = mchars_num2char(cp, sz);
722 break;
723 case ESCAPE_SPECIAL:
724 uc = mchars_spec2cp(cp, sz);
725 break;
726 case ESCAPE_UNDEF:
727 uc = *seq;
728 break;
729 default:
730 uc = -1;
731 break;
732 }
733 } else
734 uc = *cp;
735 if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
736 uc = '_';
737 if (p->enc == TERMENC_ASCII) {
738 cp = ascii_uc2str(uc);
739 csz = term_strlen(p, cp);
740 ssz = strlen(cp);
741 } else
742 csz = (*p->width)(p, uc);
743 while (lsz >= csz) {
744 if (p->enc == TERMENC_ASCII)
745 encode(p, cp, ssz);
746 else
747 encode1(p, uc);
748 lsz -= csz;
749 }
750 continue;
751 case ESCAPE_SKIPCHAR:
752 p->flags |= TERMP_BACKAFTER;
753 continue;
754 case ESCAPE_OVERSTRIKE:
755 cp = seq + sz;
756 while (seq < cp) {
757 if (*seq == '\\') {
758 mandoc_escape(&seq, NULL, NULL);
759 continue;
760 }
761 encode1(p, *seq++);
762 if (seq < cp) {
763 if (p->flags & TERMP_BACKBEFORE)
764 p->flags |= TERMP_BACKAFTER;
765 else
766 p->flags |= TERMP_BACKBEFORE;
767 }
768 }
769 /* Trim trailing backspace/blank pair. */
770 if (p->tcol->lastcol > 2 &&
771 (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
772 p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
773 p->tcol->lastcol -= 2;
774 if (p->col > p->tcol->lastcol)
775 p->col = p->tcol->lastcol;
776 continue;
777 case ESCAPE_IGNORE:
778 bufferc(p, ASCII_NBRZW);
779 continue;
780 default:
781 continue;
782 }
783
784 /*
785 * Common handling for Unicode and numbered
786 * character escape sequences.
787 */
788
789 if (p->enc == TERMENC_ASCII) {
790 cp = ascii_uc2str(uc);
791 encode(p, cp, strlen(cp));
792 } else {
793 if ((uc < 0x20 && uc != 0x09) ||
794 (uc > 0x7E && uc < 0xA0))
795 uc = 0xFFFD;
796 encode1(p, uc);
797 }
798 }
799 p->flags &= ~TERMP_NBRWORD;
800 }
801
802 static void
adjbuf(struct termp_col * c,size_t sz)803 adjbuf(struct termp_col *c, size_t sz)
804 {
805 if (c->maxcols == 0)
806 c->maxcols = 1024;
807 while (c->maxcols <= sz)
808 c->maxcols <<= 2;
809 c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
810 }
811
812 static void
bufferc(struct termp * p,char c)813 bufferc(struct termp *p, char c)
814 {
815 if (p->flags & TERMP_NOBUF) {
816 (*p->letter)(p, c);
817 return;
818 }
819 if (p->col + 1 >= p->tcol->maxcols)
820 adjbuf(p->tcol, p->col + 1);
821 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
822 p->tcol->buf[p->col] = c;
823 if (p->tcol->lastcol < ++p->col)
824 p->tcol->lastcol = p->col;
825 }
826
827 void
term_tab_ref(struct termp * p)828 term_tab_ref(struct termp *p)
829 {
830 if (p->tcol->lastcol && p->tcol->lastcol <= p->col &&
831 (p->flags & TERMP_NOBUF) == 0)
832 bufferc(p, ASCII_TABREF);
833 }
834
835 /*
836 * See encode().
837 * Do this for a single (probably unicode) value.
838 * Does not check for non-decorated glyphs.
839 */
840 static void
encode1(struct termp * p,int c)841 encode1(struct termp *p, int c)
842 {
843 enum termfont f;
844
845 if (p->flags & TERMP_NOBUF) {
846 (*p->letter)(p, c);
847 return;
848 }
849
850 if (p->col + 7 >= p->tcol->maxcols)
851 adjbuf(p->tcol, p->col + 7);
852
853 f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
854 p->fontq[p->fonti] : TERMFONT_NONE;
855
856 if (p->flags & TERMP_BACKBEFORE) {
857 if (p->tcol->buf[p->col - 1] == ' ' ||
858 p->tcol->buf[p->col - 1] == '\t')
859 p->col--;
860 else
861 p->tcol->buf[p->col++] = '\b';
862 p->flags &= ~TERMP_BACKBEFORE;
863 }
864 if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
865 p->tcol->buf[p->col++] = '_';
866 p->tcol->buf[p->col++] = '\b';
867 }
868 if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
869 if (c == ASCII_HYPH)
870 p->tcol->buf[p->col++] = '-';
871 else
872 p->tcol->buf[p->col++] = c;
873 p->tcol->buf[p->col++] = '\b';
874 }
875 if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
876 p->tcol->buf[p->col] = c;
877 if (p->tcol->lastcol < ++p->col)
878 p->tcol->lastcol = p->col;
879 if (p->flags & TERMP_BACKAFTER) {
880 p->flags |= TERMP_BACKBEFORE;
881 p->flags &= ~TERMP_BACKAFTER;
882 }
883 }
884
885 static void
encode(struct termp * p,const char * word,size_t sz)886 encode(struct termp *p, const char *word, size_t sz)
887 {
888 size_t i;
889
890 if (p->flags & TERMP_NOBUF) {
891 for (i = 0; i < sz; i++)
892 (*p->letter)(p, word[i]);
893 return;
894 }
895
896 if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
897 adjbuf(p->tcol, p->col + 2 + (sz * 5));
898
899 for (i = 0; i < sz; i++) {
900 if (ASCII_HYPH == word[i] ||
901 isgraph((unsigned char)word[i]))
902 encode1(p, word[i]);
903 else {
904 if (p->tcol->lastcol <= p->col ||
905 (word[i] != ' ' && word[i] != ASCII_NBRSP))
906 p->tcol->buf[p->col] = word[i];
907 p->col++;
908
909 /*
910 * Postpone the effect of \z while handling
911 * an overstrike sequence from ascii_uc2str().
912 */
913
914 if (word[i] == '\b' &&
915 (p->flags & TERMP_BACKBEFORE)) {
916 p->flags &= ~TERMP_BACKBEFORE;
917 p->flags |= TERMP_BACKAFTER;
918 }
919 }
920 }
921 if (p->tcol->lastcol < p->col)
922 p->tcol->lastcol = p->col;
923 }
924
925 void
term_setwidth(struct termp * p,const char * wstr)926 term_setwidth(struct termp *p, const char *wstr)
927 {
928 struct roffsu su;
929 int iop, width;
930
931 iop = 0;
932 width = 0;
933 if (NULL != wstr) {
934 switch (*wstr) {
935 case '+':
936 iop = 1;
937 wstr++;
938 break;
939 case '-':
940 iop = -1;
941 wstr++;
942 break;
943 default:
944 break;
945 }
946 if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
947 width = term_hspan(p, &su);
948 else
949 iop = 0;
950 }
951 (*p->setwidth)(p, iop, width);
952 }
953
954 size_t
term_len(const struct termp * p,size_t sz)955 term_len(const struct termp *p, size_t sz)
956 {
957
958 return (*p->width)(p, ' ') * sz;
959 }
960
961 static size_t
cond_width(const struct termp * p,int c,int * skip)962 cond_width(const struct termp *p, int c, int *skip)
963 {
964
965 if (*skip) {
966 (*skip) = 0;
967 return 0;
968 } else
969 return (*p->width)(p, c);
970 }
971
972 size_t
term_strlen(const struct termp * p,const char * cp)973 term_strlen(const struct termp *p, const char *cp)
974 {
975 size_t sz, rsz, i;
976 int ssz, skip, uc;
977 const char *seq, *rhs;
978 enum mandoc_esc esc;
979 static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW,
980 ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' };
981
982 /*
983 * Account for escaped sequences within string length
984 * calculations. This follows the logic in term_word() as we
985 * must calculate the width of produced strings.
986 */
987
988 sz = 0;
989 skip = 0;
990 while ('\0' != *cp) {
991 rsz = strcspn(cp, rej);
992 for (i = 0; i < rsz; i++)
993 sz += cond_width(p, *cp++, &skip);
994
995 switch (*cp) {
996 case '\\':
997 cp++;
998 rhs = NULL;
999 esc = mandoc_escape(&cp, &seq, &ssz);
1000 switch (esc) {
1001 case ESCAPE_UNICODE:
1002 uc = mchars_num2uc(seq + 1, ssz - 1);
1003 break;
1004 case ESCAPE_NUMBERED:
1005 uc = mchars_num2char(seq, ssz);
1006 if (uc < 0)
1007 continue;
1008 break;
1009 case ESCAPE_SPECIAL:
1010 if (p->enc == TERMENC_ASCII) {
1011 rhs = mchars_spec2str(seq, ssz, &rsz);
1012 if (rhs != NULL)
1013 break;
1014 } else {
1015 uc = mchars_spec2cp(seq, ssz);
1016 if (uc > 0)
1017 sz += cond_width(p, uc, &skip);
1018 }
1019 continue;
1020 case ESCAPE_UNDEF:
1021 uc = *seq;
1022 break;
1023 case ESCAPE_DEVICE:
1024 if (p->type == TERMTYPE_PDF) {
1025 rhs = "pdf";
1026 rsz = 3;
1027 } else if (p->type == TERMTYPE_PS) {
1028 rhs = "ps";
1029 rsz = 2;
1030 } else if (p->enc == TERMENC_ASCII) {
1031 rhs = "ascii";
1032 rsz = 5;
1033 } else {
1034 rhs = "utf8";
1035 rsz = 4;
1036 }
1037 break;
1038 case ESCAPE_SKIPCHAR:
1039 skip = 1;
1040 continue;
1041 case ESCAPE_OVERSTRIKE:
1042 rsz = 0;
1043 rhs = seq + ssz;
1044 while (seq < rhs) {
1045 if (*seq == '\\') {
1046 mandoc_escape(&seq, NULL, NULL);
1047 continue;
1048 }
1049 i = (*p->width)(p, *seq++);
1050 if (rsz < i)
1051 rsz = i;
1052 }
1053 sz += rsz;
1054 continue;
1055 default:
1056 continue;
1057 }
1058
1059 /*
1060 * Common handling for Unicode and numbered
1061 * character escape sequences.
1062 */
1063
1064 if (rhs == NULL) {
1065 if (p->enc == TERMENC_ASCII) {
1066 rhs = ascii_uc2str(uc);
1067 rsz = strlen(rhs);
1068 } else {
1069 if ((uc < 0x20 && uc != 0x09) ||
1070 (uc > 0x7E && uc < 0xA0))
1071 uc = 0xFFFD;
1072 sz += cond_width(p, uc, &skip);
1073 continue;
1074 }
1075 }
1076
1077 if (skip) {
1078 skip = 0;
1079 break;
1080 }
1081
1082 /*
1083 * Common handling for all escape sequences
1084 * printing more than one character.
1085 */
1086
1087 for (i = 0; i < rsz; i++)
1088 sz += (*p->width)(p, *rhs++);
1089 break;
1090 case ASCII_NBRSP:
1091 sz += cond_width(p, ' ', &skip);
1092 cp++;
1093 break;
1094 case ASCII_HYPH:
1095 sz += cond_width(p, '-', &skip);
1096 cp++;
1097 break;
1098 default:
1099 break;
1100 }
1101 }
1102
1103 return sz;
1104 }
1105
1106 int
term_vspan(const struct termp * p,const struct roffsu * su)1107 term_vspan(const struct termp *p, const struct roffsu *su)
1108 {
1109 double r;
1110 int ri;
1111
1112 switch (su->unit) {
1113 case SCALE_BU:
1114 r = su->scale / 40.0;
1115 break;
1116 case SCALE_CM:
1117 r = su->scale * 6.0 / 2.54;
1118 break;
1119 case SCALE_FS:
1120 r = su->scale * 65536.0 / 40.0;
1121 break;
1122 case SCALE_IN:
1123 r = su->scale * 6.0;
1124 break;
1125 case SCALE_MM:
1126 r = su->scale * 0.006;
1127 break;
1128 case SCALE_PC:
1129 r = su->scale;
1130 break;
1131 case SCALE_PT:
1132 r = su->scale / 12.0;
1133 break;
1134 case SCALE_EN:
1135 case SCALE_EM:
1136 r = su->scale * 0.6;
1137 break;
1138 case SCALE_VS:
1139 r = su->scale;
1140 break;
1141 default:
1142 abort();
1143 }
1144 ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1145 return ri < 66 ? ri : 1;
1146 }
1147
1148 /*
1149 * Convert a scaling width to basic units, rounding towards 0.
1150 */
1151 int
term_hspan(const struct termp * p,const struct roffsu * su)1152 term_hspan(const struct termp *p, const struct roffsu *su)
1153 {
1154
1155 return (*p->hspan)(p, su);
1156 }
1157
1158 /*
1159 * Convert a scaling width to basic units, rounding to closest.
1160 */
1161 int
term_hen(const struct termp * p,const struct roffsu * su)1162 term_hen(const struct termp *p, const struct roffsu *su)
1163 {
1164 int bu;
1165
1166 if ((bu = (*p->hspan)(p, su)) >= 0)
1167 return (bu + 11) / 24;
1168 else
1169 return -((-bu + 11) / 24);
1170 }
1171