xref: /freebsd/contrib/mandoc/term.c (revision 06410c1b51637e5e1f392d553b5008948af58014)
1 /* $Id: term.c,v 1.294 2025/08/01 14:59:39 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010-2022, 2025 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "out.h"
32 #include "term.h"
33 #include "main.h"
34 
35 static	size_t		 cond_width(const struct termp *, int, int *);
36 static	void		 adjbuf(struct termp_col *, size_t);
37 static	void		 bufferc(struct termp *, char);
38 static	void		 encode(struct termp *, const char *, size_t);
39 static	void		 encode1(struct termp *, int);
40 static	void		 endline(struct termp *);
41 static	void		 term_field(struct termp *, size_t, size_t);
42 static	void		 term_fill(struct termp *, size_t *, size_t *,
43 				size_t);
44 
45 
46 void
47 term_setcol(struct termp *p, size_t maxtcol)
48 {
49 	if (maxtcol > p->maxtcol) {
50 		p->tcols = mandoc_recallocarray(p->tcols,
51 		    p->maxtcol, maxtcol, sizeof(*p->tcols));
52 		p->maxtcol = maxtcol;
53 	}
54 	p->lasttcol = maxtcol - 1;
55 	p->tcol = p->tcols;
56 }
57 
58 void
59 term_free(struct termp *p)
60 {
61 	term_tab_free();
62 	for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
63 		free(p->tcol->buf);
64 	free(p->tcols);
65 	free(p->fontq);
66 	free(p);
67 }
68 
69 void
70 term_begin(struct termp *p, term_margin head,
71 		term_margin foot, const struct roff_meta *arg)
72 {
73 
74 	p->headf = head;
75 	p->footf = foot;
76 	p->argf = arg;
77 	(*p->begin)(p);
78 }
79 
80 void
81 term_end(struct termp *p)
82 {
83 
84 	(*p->end)(p);
85 }
86 
87 /*
88  * Flush a chunk of text.  By default, break the output line each time
89  * the right margin is reached, and continue output on the next line
90  * at the same offset as the chunk itself.  By default, also break the
91  * output line at the end of the chunk.  There are many flags modifying
92  * this behaviour, see the comments in the body of the function.
93  */
94 void
95 term_flushln(struct termp *p)
96 {
97 	/* Widths in basic units. */
98 	size_t	 vbl;      /* Whitespace to prepend to the output. */
99 	size_t	 vbr;      /* Actual visual position of the end of field. */
100 	size_t	 vfield;   /* Desired visual field width. */
101 	size_t	 vtarget;  /* Desired visual position of the right margin. */
102 
103 	/* Bytes. */
104 	size_t	 ic;       /* Byte index in the input buffer. */
105 	size_t	 nbr;      /* Number of bytes to print in this field. */
106 
107 	/*
108 	 * Normally, start writing at the left margin, but with the
109 	 * NOPAD flag, start writing at the current position instead.
110 	 */
111 
112 	vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
113 	    0 : p->tcol->offset - p->viscol;
114 	if (p->minbl > 0 && vbl < term_len(p, p->minbl))
115 		vbl = term_len(p, p->minbl);
116 
117 	if ((p->flags & TERMP_MULTICOL) == 0)
118 		p->tcol->col = 0;
119 
120 	/* Loop over output lines. */
121 
122 	for (;;) {
123 		vfield = p->tcol->rmargin > p->viscol + vbl ?
124 		    p->tcol->rmargin - p->viscol - vbl : 0;
125 
126 		/*
127 		 * Normally, break the line at the the right margin
128 		 * of the field, but with the NOBREAK flag, only
129 		 * break it at the max right margin of the screen,
130 		 * and with the BRNEVER flag, never break it at all.
131 		 */
132 
133 		vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield :
134 		    p->maxrmargin > p->viscol + vbl ?
135 		    p->maxrmargin - p->viscol - vbl : 0;
136 
137 		/*
138 		 * Figure out how much text will fit in the field.
139 		 * If there is whitespace only, print nothing.
140 		 */
141 
142 		term_fill(p, &nbr, &vbr,
143 		    p->flags & TERMP_BRNEVER ? SIZE_MAX / 2 : vtarget);
144 		if (nbr == 0)
145 			break;
146 
147 		/*
148 		 * With the CENTER or RIGHT flag, increase the indentation
149 		 * to center the text between the left and right margins
150 		 * or to adjust it to the right margin, respectively.
151 		 */
152 
153 		if (vbr < vtarget) {
154 			if (p->flags & TERMP_CENTER)
155 				vbl += (vtarget - vbr) / 2;
156 			else if (p->flags & TERMP_RIGHT)
157 				vbl += vtarget - vbr;
158 		}
159 
160 		/* Finally, print the field content. */
161 
162 		term_field(p, vbl, nbr);
163 		if (vbr < vtarget)
164 			p->tcol->taboff += vbr;
165 		else
166 			p->tcol->taboff += vtarget;
167 		p->tcol->taboff += term_len(p, 1);
168 
169 		/*
170 		 * If there is no text left in the field, exit the loop.
171 		 * If the BRTRSP flag is set, consider trailing
172 		 * whitespace significant when deciding whether
173 		 * the field fits or not.
174 		 */
175 
176 		for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
177 			switch (p->tcol->buf[ic]) {
178 			case '\t':
179 				if (p->flags & TERMP_BRTRSP)
180 					vbr = term_tab_next(vbr);
181 				continue;
182 			case ' ':
183 				if (p->flags & TERMP_BRTRSP)
184 					vbr += term_len(p, 1);
185 				continue;
186 			case '\n':
187 			case ASCII_NBRZW:
188 			case ASCII_BREAK:
189 			case ASCII_TABREF:
190 				continue;
191 			default:
192 				break;
193 			}
194 			break;
195 		}
196 		if (ic == p->tcol->lastcol)
197 			break;
198 
199 		/*
200 		 * At the location of an automatic line break, input
201 		 * space characters are consumed by the line break.
202 		 */
203 
204 		while (p->tcol->col < p->tcol->lastcol &&
205 		    p->tcol->buf[p->tcol->col] == ' ')
206 			p->tcol->col++;
207 
208 		/*
209 		 * In multi-column mode, leave the rest of the text
210 		 * in the buffer to be handled by a subsequent
211 		 * invocation, such that the other columns of the
212 		 * table can be handled first.
213 		 * In single-column mode, simply break the line.
214 		 */
215 
216 		if (p->flags & TERMP_MULTICOL)
217 			return;
218 
219 		endline(p);
220 
221 		/*
222 		 * Normally, start the next line at the same indentation
223 		 * as this one, but with the BRIND flag, start it at the
224 		 * right margin instead.  This is used together with
225 		 * NOBREAK for the tags in various kinds of tagged lists.
226 		 */
227 
228 		vbl = p->flags & TERMP_BRIND ?
229 		    p->tcol->rmargin : p->tcol->offset;
230 	}
231 
232 	/* Reset output state in preparation for the next field. */
233 
234 	p->col = p->tcol->col = p->tcol->lastcol = 0;
235 	p->minbl = p->trailspace;
236 	p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
237 
238 	if (p->flags & TERMP_MULTICOL)
239 		return;
240 
241 	/*
242 	 * The HANG flag means that the next field
243 	 * always follows on the same line.
244 	 * The NOBREAK flag means that the next field
245 	 * follows on the same line unless the field was overrun.
246 	 * Normally, break the line at the end of each field.
247 	 */
248 
249 	if ((p->flags & TERMP_HANG) == 0 &&
250 	    ((p->flags & TERMP_NOBREAK) == 0 ||
251 	     vbr + term_len(p, p->trailspace) > vfield + term_len(p, 1) / 2))
252 		endline(p);
253 }
254 
255 /*
256  * Store the number of input bytes to print in this field in *nbr
257  * and their total visual width in basic units in *vbr.
258  * If there is only whitespace in the field, both remain zero.
259  * The desired visual width of the field is provided by vtarget.
260  * If the first word is longer, the field will be overrun.
261  */
262 static void
263 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
264 {
265 	/* Widths in basic units. */
266 	size_t	 vis;       /* Visual position of the current character. */
267 	size_t	 vn;        /* Visual position of the next character. */
268 	size_t	 enw;       /* Width of an EN unit. */
269 	int	 taboff;    /* Temporary offset for literal tabs. */
270 
271 	size_t	 ic;        /* Byte index in the input buffer. */
272 	int	 breakline; /* Break at the end of this word. */
273 	int	 graph;     /* Last character was non-blank. */
274 
275 	*nbr = *vbr = vis = 0;
276 	breakline = graph = 0;
277 	taboff = p->tcol->taboff;
278 	enw = (*p->getwidth)(p, ' ');
279 	vtarget += enw / 2;
280 	for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
281 		switch (p->tcol->buf[ic]) {
282 		case '\b':  /* Escape \o (overstrike) or backspace markup. */
283 			assert(ic > 0);
284 			vis -= (*p->getwidth)(p, p->tcol->buf[ic - 1]);
285 			continue;
286 
287 		case ' ':
288 		case ASCII_BREAK:  /* Escape \: (breakpoint). */
289 			vn = vis;
290 			if (p->tcol->buf[ic] == ' ')
291 				vn += enw;
292 			/* Can break at the end of a word. */
293 			if (breakline || vn > vtarget)
294 				break;
295 			if (graph) {
296 				*nbr = ic;
297 				*vbr = vis;
298 				graph = 0;
299 			}
300 			vis = vn;
301 			continue;
302 
303 		case '\n':  /* Escape \p (break at the end of the word). */
304 			breakline = 1;
305 			continue;
306 
307 		case ASCII_HYPH:  /* Breakable hyphen. */
308 			graph = 1;
309 			/*
310 			 * We are about to decide whether to break the
311 			 * line or not, so we no longer need this hyphen
312 			 * to be marked as breakable.  Put back a real
313 			 * hyphen such that we get the correct width.
314 			 */
315 			p->tcol->buf[ic] = '-';
316 			vis += (*p->getwidth)(p, '-');
317 			if (vis > vtarget) {
318 				ic++;
319 				break;
320 			}
321 			*nbr = ic + 1;
322 			*vbr = vis;
323 			continue;
324 
325 		case ASCII_TABREF:
326 			taboff = -vis - enw;
327 			continue;
328 
329 		default:
330 			switch (p->tcol->buf[ic]) {
331 			case '\t':
332 				if (taboff < 0 && (size_t)-taboff > vis)
333 					vis = 0;
334 				else
335 					vis += taboff;
336 				vis = term_tab_next(vis);
337 				vis -= taboff;
338 				break;
339 			case ASCII_NBRZW:  /* Non-breakable zero-width. */
340 				break;
341 			case ASCII_NBRSP:  /* Non-breakable space. */
342 				p->tcol->buf[ic] = ' ';
343 				/* FALLTHROUGH */
344 			default:  /* Printable character. */
345 				vis += (*p->getwidth)(p, p->tcol->buf[ic]);
346 				break;
347 			}
348 			graph = 1;
349 			if (vis > vtarget && *nbr > 0)
350 				return;
351 			continue;
352 		}
353 		break;
354 	}
355 
356 	/*
357 	 * If the last word extends to the end of the field without any
358 	 * trailing whitespace, the loop could not check yet whether it
359 	 * can remain on this line.  So do the check now.
360 	 */
361 
362 	if (graph && (vis <= vtarget || *nbr == 0)) {
363 		*nbr = ic;
364 		*vbr = vis;
365 	}
366 }
367 
368 /*
369  * Print the contents of one field
370  * with an indentation        of  vbl  basic units
371  * and an input string length of  nbr  bytes.
372  */
373 static void
374 term_field(struct termp *p, size_t vbl, size_t nbr)
375 {
376 	/* Widths in basic units. */
377 	size_t	 vis;	/* Visual position of the current character. */
378 	size_t	 vt;	/* Visual position including tab offset. */
379 	size_t	 dv;	/* Visual width of the current character. */
380 	int	 taboff; /* Temporary offset for literal tabs. */
381 
382 	size_t	 ic;	/* Byte position in the input buffer. */
383 
384 	vis = 0;
385 	taboff = p->tcol->taboff;
386 	for (ic = p->tcol->col; ic < nbr; ic++) {
387 
388 		/*
389 		 * To avoid the printing of trailing whitespace,
390 		 * do not print whitespace right away, only count it.
391 		 */
392 
393 		switch (p->tcol->buf[ic]) {
394 		case '\n':
395 		case ASCII_BREAK:
396 		case ASCII_NBRZW:
397 			continue;
398 		case ASCII_TABREF:
399 			taboff = -vis - (*p->getwidth)(p, ' ');
400 			continue;
401 		case '\t':
402 		case ' ':
403 		case ASCII_NBRSP:
404 			if (p->tcol->buf[ic] == '\t') {
405 				if (taboff < 0 && (size_t)-taboff > vis)
406 					vt = 0;
407 				else
408 					vt = vis + taboff;
409 				dv = term_tab_next(vt) - vt;
410 			} else
411 				dv = (*p->getwidth)(p, ' ');
412 			vbl += dv;
413 			vis += dv;
414 			continue;
415 		default:
416 			break;
417 		}
418 
419 		/*
420 		 * We found a non-blank character to print,
421 		 * so write preceding white space now.
422 		 */
423 
424 		if (vbl > 0) {
425 			(*p->advance)(p, vbl);
426 			vbl = 0;
427 		}
428 
429 		/* Print the character and adjust the visual position. */
430 
431 		(*p->letter)(p, p->tcol->buf[ic]);
432 		if (p->tcol->buf[ic] == '\b') {
433 			dv = (*p->getwidth)(p, p->tcol->buf[ic - 1]);
434 			p->viscol -= dv;
435 			vis -= dv;
436 		} else {
437 			dv = (*p->getwidth)(p, p->tcol->buf[ic]);
438 			p->viscol += dv;
439 			vis += dv;
440 		}
441 	}
442 	p->tcol->col = nbr;
443 }
444 
445 /*
446  * Print the margin character, if one is configured,
447  * and end the output line.
448  */
449 static void
450 endline(struct termp *p)
451 {
452 	if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
453 		p->mc = NULL;
454 		p->flags &= ~TERMP_ENDMC;
455 	}
456 	if (p->mc != NULL) {
457 		if (p->viscol > 0 && p->viscol <= p->maxrmargin)
458 			(*p->advance)(p,
459 			    p->maxrmargin - p->viscol + term_len(p, 1));
460 		p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
461 		term_word(p, p->mc);
462 		p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
463 	}
464 	(*p->endline)(p);
465 }
466 
467 /*
468  * A newline only breaks an existing line; it won't assert vertical
469  * space.  All data in the output buffer is flushed prior to the newline
470  * assertion.
471  */
472 void
473 term_newln(struct termp *p)
474 {
475 	p->flags |= TERMP_NOSPACE;
476 	if (p->tcol->lastcol || p->viscol)
477 		term_flushln(p);
478 	p->tcol->taboff = 0;
479 }
480 
481 /*
482  * Asserts a vertical space (a full, empty line-break between lines).
483  * Note that if used twice, this will cause two blank spaces and so on.
484  * All data in the output buffer is flushed prior to the newline
485  * assertion.
486  */
487 void
488 term_vspace(struct termp *p)
489 {
490 
491 	term_newln(p);
492 	if (0 < p->skipvsp)
493 		p->skipvsp--;
494 	else
495 		(*p->endline)(p);
496 }
497 
498 /* Swap current and previous font; for \fP and .ft P */
499 void
500 term_fontlast(struct termp *p)
501 {
502 	enum termfont	 f;
503 
504 	f = p->fontl;
505 	p->fontl = p->fontq[p->fonti];
506 	p->fontq[p->fonti] = f;
507 }
508 
509 /* Set font, save current, discard previous; for \f, .ft, and man(7). */
510 void
511 term_fontrepl(struct termp *p, enum termfont f)
512 {
513 	p->fontl = p->fontq[p->fonti];
514 	if (p->fontibi && f == TERMFONT_UNDER)
515 		f = TERMFONT_BI;
516 	p->fontq[p->fonti] = f;
517 }
518 
519 /* Set font, save previous; for mdoc(7), eqn(7), and tbl(7). */
520 void
521 term_fontpush(struct termp *p, enum termfont f)
522 {
523 	enum termfont	 fl;
524 
525 	fl = p->fontq[p->fonti];
526 	if (++p->fonti == p->fontsz) {
527 		p->fontsz += 8;
528 		p->fontq = mandoc_reallocarray(p->fontq,
529 		    p->fontsz, sizeof(*p->fontq));
530 	}
531 	p->fontq[p->fonti] = fl;
532 	term_fontrepl(p, f);
533 }
534 
535 /* Flush to make the saved pointer current again. */
536 void
537 term_fontpopq(struct termp *p, int i)
538 {
539 	assert(i >= 0);
540 	if (p->fonti > i)
541 		p->fonti = i;
542 }
543 
544 /* Pop one font off the stack. */
545 void
546 term_fontpop(struct termp *p)
547 {
548 	assert(p->fonti > 0);
549 	p->fonti--;
550 }
551 
552 /*
553  * Handle pwords, partial words, which may be either a single word or a
554  * phrase that cannot be broken down (such as a literal string).  This
555  * handles word styling.
556  */
557 void
558 term_word(struct termp *p, const char *word)
559 {
560 	struct roffsu	 su;
561 	const char	 nbrsp[2] = { ASCII_NBRSP, 0 };
562 	const char	*seq;		/* Escape sequence argument. */
563 	const char	*cp;		/* String to be printed. */
564 	size_t		 csz;		/* String length in basic units. */
565 	size_t		 lsz;		/* Line width in basic units. */
566 	size_t		 ssz;		/* Substring length in bytes. */
567 	int		 sz;		/* Argument length in bytes. */
568 	int		 uc;		/* Unicode codepoint number. */
569 	int		 bu;		/* Width in basic units. */
570 	enum mandoc_esc	 esc;
571 
572 	if ((p->flags & TERMP_NOBUF) == 0) {
573 		if ((p->flags & TERMP_NOSPACE) == 0) {
574 			if ((p->flags & TERMP_KEEP) == 0) {
575 				bufferc(p, ' ');
576 				if (p->flags & TERMP_SENTENCE)
577 					bufferc(p, ' ');
578 			} else
579 				bufferc(p, ASCII_NBRSP);
580 		}
581 		if (p->flags & TERMP_PREKEEP)
582 			p->flags |= TERMP_KEEP;
583 		if (p->flags & TERMP_NONOSPACE)
584 			p->flags |= TERMP_NOSPACE;
585 		else
586 			p->flags &= ~TERMP_NOSPACE;
587 		p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
588 		p->skipvsp = 0;
589 	}
590 
591 	while ('\0' != *word) {
592 		if ('\\' != *word) {
593 			if (TERMP_NBRWORD & p->flags) {
594 				if (' ' == *word) {
595 					encode(p, nbrsp, 1);
596 					word++;
597 					continue;
598 				}
599 				ssz = strcspn(word, "\\ ");
600 			} else
601 				ssz = strcspn(word, "\\");
602 			encode(p, word, ssz);
603 			word += (int)ssz;
604 			continue;
605 		}
606 
607 		word++;
608 		esc = mandoc_escape(&word, &seq, &sz);
609 		switch (esc) {
610 		case ESCAPE_UNICODE:
611 			uc = mchars_num2uc(seq + 1, sz - 1);
612 			break;
613 		case ESCAPE_NUMBERED:
614 			uc = mchars_num2char(seq, sz);
615 			if (uc >= 0)
616 				break;
617 			bufferc(p, ASCII_NBRZW);
618 			continue;
619 		case ESCAPE_SPECIAL:
620 			if (p->enc == TERMENC_ASCII) {
621 				cp = mchars_spec2str(seq, sz, &ssz);
622 				if (cp != NULL)
623 					encode(p, cp, ssz);
624 				else
625 					bufferc(p, ASCII_NBRZW);
626 			} else {
627 				uc = mchars_spec2cp(seq, sz);
628 				if (uc > 0)
629 					encode1(p, uc);
630 				else
631 					bufferc(p, ASCII_NBRZW);
632 			}
633 			continue;
634 		case ESCAPE_UNDEF:
635 			uc = *seq;
636 			break;
637 		case ESCAPE_FONTBOLD:
638 		case ESCAPE_FONTCB:
639 			term_fontrepl(p, TERMFONT_BOLD);
640 			continue;
641 		case ESCAPE_FONTITALIC:
642 		case ESCAPE_FONTCI:
643 			term_fontrepl(p, TERMFONT_UNDER);
644 			continue;
645 		case ESCAPE_FONTBI:
646 			term_fontrepl(p, TERMFONT_BI);
647 			continue;
648 		case ESCAPE_FONT:
649 		case ESCAPE_FONTCR:
650 		case ESCAPE_FONTROMAN:
651 			term_fontrepl(p, TERMFONT_NONE);
652 			continue;
653 		case ESCAPE_FONTPREV:
654 			term_fontlast(p);
655 			continue;
656 		case ESCAPE_BREAK:
657 			bufferc(p, '\n');
658 			continue;
659 		case ESCAPE_NOSPACE:
660 			if (p->flags & TERMP_BACKAFTER)
661 				p->flags &= ~TERMP_BACKAFTER;
662 			else if (*word == '\0')
663 				p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
664 			continue;
665 		case ESCAPE_DEVICE:
666 			if (p->type == TERMTYPE_PDF)
667 				encode(p, "pdf", 3);
668 			else if (p->type == TERMTYPE_PS)
669 				encode(p, "ps", 2);
670 			else if (p->enc == TERMENC_ASCII)
671 				encode(p, "ascii", 5);
672 			else
673 				encode(p, "utf8", 4);
674 			continue;
675 		case ESCAPE_HORIZ:
676 			if (p->flags & TERMP_BACKAFTER) {
677 				p->flags &= ~TERMP_BACKAFTER;
678 				continue;
679 			}
680 			if (*seq == '|') {
681 				seq++;
682 				bu = -term_len(p, p->col);
683 			} else
684 				bu = 0;
685 			if (a2roffsu(seq, &su, SCALE_EM) == NULL)
686 				continue;
687 			bu += term_hspan(p, &su);
688 			if (bu >= 0) {
689 				while (bu > 0) {
690 					bu -= term_len(p, 1);
691 					if (p->flags & TERMP_BACKBEFORE)
692 						p->flags &= ~TERMP_BACKBEFORE;
693 					else
694 						bufferc(p, ASCII_NBRSP);
695 				}
696 				continue;
697 			}
698 			if (p->flags & TERMP_BACKBEFORE) {
699 				p->flags &= ~TERMP_BACKBEFORE;
700 				assert(p->col > 1);
701 				p->col--;
702 			}
703 			if (term_len(p, p->col) >= (size_t)(-bu)) {
704 				p->col -= -bu / term_len(p, 1);
705 			} else {
706 				bu += term_len(p, p->col);
707 				p->col = 0;
708 				if (p->tcol->offset > (size_t)(-bu)) {
709 					p->ti += bu;
710 					p->tcol->offset += bu;
711 				} else {
712 					p->ti -= p->tcol->offset;
713 					p->tcol->offset = 0;
714 				}
715 			}
716 			continue;
717 		case ESCAPE_HLINE:
718 			if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
719 				continue;
720 			bu = term_hspan(p, &su);
721 			if (bu <= 0) {
722 				if (p->tcol->rmargin <= p->tcol->offset)
723 					continue;
724 				lsz = p->tcol->rmargin - p->tcol->offset;
725 			} else
726 				lsz = bu;
727 			if (*cp == seq[-1])
728 				uc = -1;
729 			else if (*cp == '\\') {
730 				seq = cp + 1;
731 				esc = mandoc_escape(&seq, &cp, &sz);
732 				switch (esc) {
733 				case ESCAPE_UNICODE:
734 					uc = mchars_num2uc(cp + 1, sz - 1);
735 					break;
736 				case ESCAPE_NUMBERED:
737 					uc = mchars_num2char(cp, sz);
738 					break;
739 				case ESCAPE_SPECIAL:
740 					uc = mchars_spec2cp(cp, sz);
741 					break;
742 				case ESCAPE_UNDEF:
743 					uc = *seq;
744 					break;
745 				default:
746 					uc = -1;
747 					break;
748 				}
749 			} else
750 				uc = *cp;
751 			if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
752 				uc = '_';
753 			if (p->enc == TERMENC_ASCII) {
754 				cp = ascii_uc2str(uc);
755 				csz = term_strlen(p, cp);
756 				ssz = strlen(cp);
757 			} else
758 				csz = (*p->getwidth)(p, uc);
759 			while (lsz > 0) {
760 				if (p->enc == TERMENC_ASCII)
761 					encode(p, cp, ssz);
762 				else
763 					encode1(p, uc);
764 				if (lsz > csz)
765 					lsz -= csz;
766 				else
767 					lsz = 0;
768 			}
769 			continue;
770 		case ESCAPE_SKIPCHAR:
771 			p->flags |= TERMP_BACKAFTER;
772 			continue;
773 		case ESCAPE_OVERSTRIKE:
774 			cp = seq + sz;
775 			while (seq < cp) {
776 				if (*seq == '\\') {
777 					mandoc_escape(&seq, NULL, NULL);
778 					continue;
779 				}
780 				encode1(p, *seq++);
781 				if (seq < cp) {
782 					if (p->flags & TERMP_BACKBEFORE)
783 						p->flags |= TERMP_BACKAFTER;
784 					else
785 						p->flags |= TERMP_BACKBEFORE;
786 				}
787 			}
788 			/* Trim trailing backspace/blank pair. */
789 			if (p->tcol->lastcol > 2 &&
790 			    (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
791 			     p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
792 				p->tcol->lastcol -= 2;
793 			if (p->col > p->tcol->lastcol)
794 				p->col = p->tcol->lastcol;
795 			continue;
796 		case ESCAPE_IGNORE:
797 			bufferc(p, ASCII_NBRZW);
798 			continue;
799 		default:
800 			continue;
801 		}
802 
803 		/*
804 		 * Common handling for Unicode and numbered
805 		 * character escape sequences.
806 		 */
807 
808 		if (p->enc == TERMENC_ASCII) {
809 			cp = ascii_uc2str(uc);
810 			encode(p, cp, strlen(cp));
811 		} else {
812 			if ((uc < 0x20 && uc != 0x09) ||
813 			    (uc > 0x7E && uc < 0xA0))
814 				uc = 0xFFFD;
815 			encode1(p, uc);
816 		}
817 	}
818 	p->flags &= ~TERMP_NBRWORD;
819 }
820 
821 static void
822 adjbuf(struct termp_col *c, size_t sz)
823 {
824 	if (c->maxcols == 0)
825 		c->maxcols = 1024;
826 	while (c->maxcols <= sz)
827 		c->maxcols <<= 2;
828 	c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
829 }
830 
831 static void
832 bufferc(struct termp *p, char c)
833 {
834 	if (p->flags & TERMP_NOBUF) {
835 		(*p->letter)(p, c);
836 		return;
837 	}
838 	if (p->col + 1 >= p->tcol->maxcols)
839 		adjbuf(p->tcol, p->col + 1);
840 	if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
841 		p->tcol->buf[p->col] = c;
842 	if (p->tcol->lastcol < ++p->col)
843 		p->tcol->lastcol = p->col;
844 }
845 
846 void
847 term_tab_ref(struct termp *p)
848 {
849 	if (p->tcol->lastcol && p->tcol->lastcol <= p->col &&
850 	    (p->flags & TERMP_NOBUF) == 0)
851 		bufferc(p, ASCII_TABREF);
852 }
853 
854 /*
855  * See encode().
856  * Do this for a single (probably unicode) value.
857  * Does not check for non-decorated glyphs.
858  */
859 static void
860 encode1(struct termp *p, int c)
861 {
862 	enum termfont	  f;
863 
864 	if (p->flags & TERMP_NOBUF) {
865 		(*p->letter)(p, c);
866 		return;
867 	}
868 
869 	if (p->col + 7 >= p->tcol->maxcols)
870 		adjbuf(p->tcol, p->col + 7);
871 
872 	f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
873 	    p->fontq[p->fonti] : TERMFONT_NONE;
874 
875 	if (p->flags & TERMP_BACKBEFORE) {
876 		if (p->tcol->buf[p->col - 1] == ' ' ||
877 		    p->tcol->buf[p->col - 1] == '\t')
878 			p->col--;
879 		else
880 			p->tcol->buf[p->col++] = '\b';
881 		p->flags &= ~TERMP_BACKBEFORE;
882 	}
883 	if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
884 		p->tcol->buf[p->col++] = '_';
885 		p->tcol->buf[p->col++] = '\b';
886 	}
887 	if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
888 		if (c == ASCII_HYPH)
889 			p->tcol->buf[p->col++] = '-';
890 		else
891 			p->tcol->buf[p->col++] = c;
892 		p->tcol->buf[p->col++] = '\b';
893 	}
894 	if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
895 		p->tcol->buf[p->col] = c;
896 	if (p->tcol->lastcol < ++p->col)
897 		p->tcol->lastcol = p->col;
898 	if (p->flags & TERMP_BACKAFTER) {
899 		p->flags |= TERMP_BACKBEFORE;
900 		p->flags &= ~TERMP_BACKAFTER;
901 	}
902 }
903 
904 static void
905 encode(struct termp *p, const char *word, size_t sz)
906 {
907 	size_t		  i;
908 
909 	if (p->flags & TERMP_NOBUF) {
910 		for (i = 0; i < sz; i++)
911 			(*p->letter)(p, word[i]);
912 		return;
913 	}
914 
915 	if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
916 		adjbuf(p->tcol, p->col + 2 + (sz * 5));
917 
918 	for (i = 0; i < sz; i++) {
919 		if (ASCII_HYPH == word[i] ||
920 		    isgraph((unsigned char)word[i]))
921 			encode1(p, word[i]);
922 		else {
923 			if (p->tcol->lastcol <= p->col ||
924 			    (word[i] != ' ' && word[i] != ASCII_NBRSP))
925 				p->tcol->buf[p->col] = word[i];
926 			p->col++;
927 
928 			/*
929 			 * Postpone the effect of \z while handling
930 			 * an overstrike sequence from ascii_uc2str().
931 			 */
932 
933 			if (word[i] == '\b' &&
934 			    (p->flags & TERMP_BACKBEFORE)) {
935 				p->flags &= ~TERMP_BACKBEFORE;
936 				p->flags |= TERMP_BACKAFTER;
937 			}
938 		}
939 	}
940 	if (p->tcol->lastcol < p->col)
941 		p->tcol->lastcol = p->col;
942 }
943 
944 void
945 term_setwidth(struct termp *p, const char *wstr)
946 {
947 	struct roffsu	 su;
948 	int		 iop, width;
949 
950 	iop = 0;
951 	width = 0;
952 	if (NULL != wstr) {
953 		switch (*wstr) {
954 		case '+':
955 			iop = 1;
956 			wstr++;
957 			break;
958 		case '-':
959 			iop = -1;
960 			wstr++;
961 			break;
962 		default:
963 			break;
964 		}
965 		if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
966 			width = term_hspan(p, &su);
967 		else
968 			iop = 0;
969 	}
970 	(*p->setwidth)(p, iop, width);
971 }
972 
973 size_t
974 term_len(const struct termp *p, size_t sz)
975 {
976 	return (*p->getwidth)(p, ' ') * sz;
977 }
978 
979 static size_t
980 cond_width(const struct termp *p, int c, int *skip)
981 {
982 	if (*skip) {
983 		(*skip) = 0;
984 		return 0;
985 	} else
986 		return (*p->getwidth)(p, c);
987 }
988 
989 size_t
990 term_strlen(const struct termp *p, const char *cp)
991 {
992 	const char	*seq;		/* Escape sequence argument. */
993 	const char	*rhs;		/* String to be printed. */
994 
995 	/* Widths in basic units. */
996 	size_t		 sz;		/* Return value. */
997 	size_t		 this_sz;	/* Individual char for overstrike. */
998 	size_t		 max_sz;	/* Result of overstrike. */
999 
1000 	/* Numbers of bytes. */
1001 	size_t		 rsz;		/* Substring length in bytes. */
1002 	size_t		 i;		/* Byte index in substring. */
1003 	int		 ssz;		/* Argument length in bytes. */
1004 	int		 skip;		/* Number of bytes to skip. */
1005 
1006 	int		 uc;		/* Unicode codepoint number. */
1007 	enum mandoc_esc	 esc;
1008 
1009 	static const char rej[] = { '\\', ASCII_NBRSP, ASCII_NBRZW,
1010 		ASCII_BREAK, ASCII_HYPH, ASCII_TABREF, '\0' };
1011 
1012 	/*
1013 	 * Account for escaped sequences within string length
1014 	 * calculations.  This follows the logic in term_word() as we
1015 	 * must calculate the width of produced strings.
1016 	 */
1017 
1018 	sz = 0;
1019 	skip = 0;
1020 	while ('\0' != *cp) {
1021 		rsz = strcspn(cp, rej);
1022 		for (i = 0; i < rsz; i++)
1023 			sz += cond_width(p, *cp++, &skip);
1024 
1025 		switch (*cp) {
1026 		case '\\':
1027 			cp++;
1028 			rhs = NULL;
1029 			esc = mandoc_escape(&cp, &seq, &ssz);
1030 			switch (esc) {
1031 			case ESCAPE_UNICODE:
1032 				uc = mchars_num2uc(seq + 1, ssz - 1);
1033 				break;
1034 			case ESCAPE_NUMBERED:
1035 				uc = mchars_num2char(seq, ssz);
1036 				if (uc < 0)
1037 					continue;
1038 				break;
1039 			case ESCAPE_SPECIAL:
1040 				if (p->enc == TERMENC_ASCII) {
1041 					rhs = mchars_spec2str(seq, ssz, &rsz);
1042 					if (rhs != NULL)
1043 						break;
1044 				} else {
1045 					uc = mchars_spec2cp(seq, ssz);
1046 					if (uc > 0)
1047 						sz += cond_width(p, uc, &skip);
1048 				}
1049 				continue;
1050 			case ESCAPE_UNDEF:
1051 				uc = *seq;
1052 				break;
1053 			case ESCAPE_DEVICE:
1054 				if (p->type == TERMTYPE_PDF) {
1055 					rhs = "pdf";
1056 					rsz = 3;
1057 				} else if (p->type == TERMTYPE_PS) {
1058 					rhs = "ps";
1059 					rsz = 2;
1060 				} else if (p->enc == TERMENC_ASCII) {
1061 					rhs = "ascii";
1062 					rsz = 5;
1063 				} else {
1064 					rhs = "utf8";
1065 					rsz = 4;
1066 				}
1067 				break;
1068 			case ESCAPE_SKIPCHAR:
1069 				skip = 1;
1070 				continue;
1071 			case ESCAPE_OVERSTRIKE:
1072 				max_sz = 0;
1073 				rhs = seq + ssz;
1074 				while (seq < rhs) {
1075 					if (*seq == '\\') {
1076 						mandoc_escape(&seq, NULL, NULL);
1077 						continue;
1078 					}
1079 					this_sz = (*p->getwidth)(p, *seq++);
1080 					if (max_sz < this_sz)
1081 						max_sz = this_sz;
1082 				}
1083 				sz += max_sz;
1084 				continue;
1085 			default:
1086 				continue;
1087 			}
1088 
1089 			/*
1090 			 * Common handling for Unicode and numbered
1091 			 * character escape sequences.
1092 			 */
1093 
1094 			if (rhs == NULL) {
1095 				if (p->enc == TERMENC_ASCII) {
1096 					rhs = ascii_uc2str(uc);
1097 					rsz = strlen(rhs);
1098 				} else {
1099 					if ((uc < 0x20 && uc != 0x09) ||
1100 					    (uc > 0x7E && uc < 0xA0))
1101 						uc = 0xFFFD;
1102 					sz += cond_width(p, uc, &skip);
1103 					continue;
1104 				}
1105 			}
1106 
1107 			if (skip) {
1108 				skip = 0;
1109 				break;
1110 			}
1111 
1112 			/*
1113 			 * Common handling for all escape sequences
1114 			 * printing more than one character.
1115 			 */
1116 
1117 			for (i = 0; i < rsz; i++)
1118 				sz += (*p->getwidth)(p, *rhs++);
1119 			break;
1120 		case ASCII_NBRSP:
1121 			sz += cond_width(p, ' ', &skip);
1122 			cp++;
1123 			break;
1124 		case ASCII_HYPH:
1125 			sz += cond_width(p, '-', &skip);
1126 			cp++;
1127 			break;
1128 		default:
1129 			break;
1130 		}
1131 	}
1132 
1133 	return sz;
1134 }
1135 
1136 int
1137 term_vspan(const struct termp *p, const struct roffsu *su)
1138 {
1139 	double		 r;
1140 	int		 ri;
1141 
1142 	switch (su->unit) {
1143 	case SCALE_BU:
1144 		r = su->scale / 40.0;
1145 		break;
1146 	case SCALE_CM:
1147 		r = su->scale * 6.0 / 2.54;
1148 		break;
1149 	case SCALE_FS:
1150 		r = su->scale * 65536.0 / 40.0;
1151 		break;
1152 	case SCALE_IN:
1153 		r = su->scale * 6.0;
1154 		break;
1155 	case SCALE_MM:
1156 		r = su->scale * 0.006;
1157 		break;
1158 	case SCALE_PC:
1159 		r = su->scale;
1160 		break;
1161 	case SCALE_PT:
1162 		r = su->scale / 12.0;
1163 		break;
1164 	case SCALE_EN:
1165 	case SCALE_EM:
1166 		r = su->scale * 0.6;
1167 		break;
1168 	case SCALE_VS:
1169 		r = su->scale;
1170 		break;
1171 	default:
1172 		abort();
1173 	}
1174 	ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1175 	return ri < 66 ? ri : 1;
1176 }
1177 
1178 /*
1179  * Convert a scaling width to basic units.
1180  */
1181 int
1182 term_hspan(const struct termp *p, const struct roffsu *su)
1183 {
1184 	return (*p->hspan)(p, su);
1185 }
1186