xref: /freebsd/contrib/mandoc/term.c (revision 6d38604fc532a3fc060788e3ce40464b46047eaf)
1 /* $Id: term.c,v 1.283 2021/08/10 12:55:04 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010-2020 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdint.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "out.h"
32 #include "term.h"
33 #include "main.h"
34 
35 static	size_t		 cond_width(const struct termp *, int, int *);
36 static	void		 adjbuf(struct termp_col *, size_t);
37 static	void		 bufferc(struct termp *, char);
38 static	void		 encode(struct termp *, const char *, size_t);
39 static	void		 encode1(struct termp *, int);
40 static	void		 endline(struct termp *);
41 static	void		 term_field(struct termp *, size_t, size_t);
42 static	void		 term_fill(struct termp *, size_t *, size_t *,
43 				size_t);
44 
45 
46 void
term_setcol(struct termp * p,size_t maxtcol)47 term_setcol(struct termp *p, size_t maxtcol)
48 {
49 	if (maxtcol > p->maxtcol) {
50 		p->tcols = mandoc_recallocarray(p->tcols,
51 		    p->maxtcol, maxtcol, sizeof(*p->tcols));
52 		p->maxtcol = maxtcol;
53 	}
54 	p->lasttcol = maxtcol - 1;
55 	p->tcol = p->tcols;
56 }
57 
58 void
term_free(struct termp * p)59 term_free(struct termp *p)
60 {
61 	for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
62 		free(p->tcol->buf);
63 	free(p->tcols);
64 	free(p->fontq);
65 	free(p);
66 }
67 
68 void
term_begin(struct termp * p,term_margin head,term_margin foot,const struct roff_meta * arg)69 term_begin(struct termp *p, term_margin head,
70 		term_margin foot, const struct roff_meta *arg)
71 {
72 
73 	p->headf = head;
74 	p->footf = foot;
75 	p->argf = arg;
76 	(*p->begin)(p);
77 }
78 
79 void
term_end(struct termp * p)80 term_end(struct termp *p)
81 {
82 
83 	(*p->end)(p);
84 }
85 
86 /*
87  * Flush a chunk of text.  By default, break the output line each time
88  * the right margin is reached, and continue output on the next line
89  * at the same offset as the chunk itself.  By default, also break the
90  * output line at the end of the chunk.  There are many flags modifying
91  * this behaviour, see the comments in the body of the function.
92  */
93 void
term_flushln(struct termp * p)94 term_flushln(struct termp *p)
95 {
96 	size_t	 vbl;      /* Number of blanks to prepend to the output. */
97 	size_t	 vbr;      /* Actual visual position of the end of field. */
98 	size_t	 vfield;   /* Desired visual field width. */
99 	size_t	 vtarget;  /* Desired visual position of the right margin. */
100 	size_t	 ic;       /* Character position in the input buffer. */
101 	size_t	 nbr;      /* Number of characters to print in this field. */
102 
103 	/*
104 	 * Normally, start writing at the left margin, but with the
105 	 * NOPAD flag, start writing at the current position instead.
106 	 */
107 
108 	vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
109 	    0 : p->tcol->offset - p->viscol;
110 	if (p->minbl && vbl < p->minbl)
111 		vbl = p->minbl;
112 
113 	if ((p->flags & TERMP_MULTICOL) == 0)
114 		p->tcol->col = 0;
115 
116 	/* Loop over output lines. */
117 
118 	for (;;) {
119 		vfield = p->tcol->rmargin > p->viscol + vbl ?
120 		    p->tcol->rmargin - p->viscol - vbl : 0;
121 
122 		/*
123 		 * Normally, break the line at the the right margin
124 		 * of the field, but with the NOBREAK flag, only
125 		 * break it at the max right margin of the screen,
126 		 * and with the BRNEVER flag, never break it at all.
127 		 */
128 
129 		vtarget = (p->flags & TERMP_NOBREAK) == 0 ? vfield :
130 		    p->maxrmargin > p->viscol + vbl ?
131 		    p->maxrmargin - p->viscol - vbl : 0;
132 
133 		/*
134 		 * Figure out how much text will fit in the field.
135 		 * If there is whitespace only, print nothing.
136 		 */
137 
138 		term_fill(p, &nbr, &vbr,
139 		    p->flags & TERMP_BRNEVER ? SIZE_MAX : vtarget);
140 		if (nbr == 0)
141 			break;
142 
143 		/*
144 		 * With the CENTER or RIGHT flag, increase the indentation
145 		 * to center the text between the left and right margins
146 		 * or to adjust it to the right margin, respectively.
147 		 */
148 
149 		if (vbr < vtarget) {
150 			if (p->flags & TERMP_CENTER)
151 				vbl += (vtarget - vbr) / 2;
152 			else if (p->flags & TERMP_RIGHT)
153 				vbl += vtarget - vbr;
154 		}
155 
156 		/* Finally, print the field content. */
157 
158 		term_field(p, vbl, nbr);
159 
160 		/*
161 		 * If there is no text left in the field, exit the loop.
162 		 * If the BRTRSP flag is set, consider trailing
163 		 * whitespace significant when deciding whether
164 		 * the field fits or not.
165 		 */
166 
167 		for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
168 			switch (p->tcol->buf[ic]) {
169 			case '\t':
170 				if (p->flags & TERMP_BRTRSP)
171 					vbr = term_tab_next(vbr);
172 				continue;
173 			case ' ':
174 				if (p->flags & TERMP_BRTRSP)
175 					vbr += (*p->width)(p, ' ');
176 				continue;
177 			case '\n':
178 			case ASCII_BREAK:
179 				continue;
180 			default:
181 				break;
182 			}
183 			break;
184 		}
185 		if (ic == p->tcol->lastcol)
186 			break;
187 
188 		/*
189 		 * At the location of an automtic line break, input
190 		 * space characters are consumed by the line break.
191 		 */
192 
193 		while (p->tcol->col < p->tcol->lastcol &&
194 		    p->tcol->buf[p->tcol->col] == ' ')
195 			p->tcol->col++;
196 
197 		/*
198 		 * In multi-column mode, leave the rest of the text
199 		 * in the buffer to be handled by a subsequent
200 		 * invocation, such that the other columns of the
201 		 * table can be handled first.
202 		 * In single-column mode, simply break the line.
203 		 */
204 
205 		if (p->flags & TERMP_MULTICOL)
206 			return;
207 
208 		endline(p);
209 		p->viscol = 0;
210 
211 		/*
212 		 * Normally, start the next line at the same indentation
213 		 * as this one, but with the BRIND flag, start it at the
214 		 * right margin instead.  This is used together with
215 		 * NOBREAK for the tags in various kinds of tagged lists.
216 		 */
217 
218 		vbl = p->flags & TERMP_BRIND ?
219 		    p->tcol->rmargin : p->tcol->offset;
220 	}
221 
222 	/* Reset output state in preparation for the next field. */
223 
224 	p->col = p->tcol->col = p->tcol->lastcol = 0;
225 	p->minbl = p->trailspace;
226 	p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
227 
228 	if (p->flags & TERMP_MULTICOL)
229 		return;
230 
231 	/*
232 	 * The HANG flag means that the next field
233 	 * always follows on the same line.
234 	 * The NOBREAK flag means that the next field
235 	 * follows on the same line unless the field was overrun.
236 	 * Normally, break the line at the end of each field.
237 	 */
238 
239 	if ((p->flags & TERMP_HANG) == 0 &&
240 	    ((p->flags & TERMP_NOBREAK) == 0 ||
241 	     vbr + term_len(p, p->trailspace) > vfield))
242 		endline(p);
243 }
244 
245 /*
246  * Store the number of input characters to print in this field in *nbr
247  * and their total visual width to print in *vbr.
248  * If there is only whitespace in the field, both remain zero.
249  * The desired visual width of the field is provided by vtarget.
250  * If the first word is longer, the field will be overrun.
251  */
252 static void
term_fill(struct termp * p,size_t * nbr,size_t * vbr,size_t vtarget)253 term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
254 {
255 	size_t	 ic;        /* Character position in the input buffer. */
256 	size_t	 vis;       /* Visual position of the current character. */
257 	size_t	 vn;        /* Visual position of the next character. */
258 	int	 breakline; /* Break at the end of this word. */
259 	int	 graph;     /* Last character was non-blank. */
260 
261 	*nbr = *vbr = vis = 0;
262 	breakline = graph = 0;
263 	for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
264 		switch (p->tcol->buf[ic]) {
265 		case '\b':  /* Escape \o (overstrike) or backspace markup. */
266 			assert(ic > 0);
267 			vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
268 			continue;
269 
270 		case '\t':  /* Normal ASCII whitespace. */
271 		case ' ':
272 		case ASCII_BREAK:  /* Escape \: (breakpoint). */
273 			switch (p->tcol->buf[ic]) {
274 			case '\t':
275 				vn = term_tab_next(vis);
276 				break;
277 			case ' ':
278 				vn = vis + (*p->width)(p, ' ');
279 				break;
280 			case ASCII_BREAK:
281 				vn = vis;
282 				break;
283 			default:
284 				abort();
285 			}
286 			/* Can break at the end of a word. */
287 			if (breakline || vn > vtarget)
288 				break;
289 			if (graph) {
290 				*nbr = ic;
291 				*vbr = vis;
292 				graph = 0;
293 			}
294 			vis = vn;
295 			continue;
296 
297 		case '\n':  /* Escape \p (break at the end of the word). */
298 			breakline = 1;
299 			continue;
300 
301 		case ASCII_HYPH:  /* Breakable hyphen. */
302 			graph = 1;
303 			/*
304 			 * We are about to decide whether to break the
305 			 * line or not, so we no longer need this hyphen
306 			 * to be marked as breakable.  Put back a real
307 			 * hyphen such that we get the correct width.
308 			 */
309 			p->tcol->buf[ic] = '-';
310 			vis += (*p->width)(p, '-');
311 			if (vis > vtarget) {
312 				ic++;
313 				break;
314 			}
315 			*nbr = ic + 1;
316 			*vbr = vis;
317 			continue;
318 
319 		case ASCII_NBRSP:  /* Non-breakable space. */
320 			p->tcol->buf[ic] = ' ';
321 			/* FALLTHROUGH */
322 		default:  /* Printable character. */
323 			graph = 1;
324 			vis += (*p->width)(p, p->tcol->buf[ic]);
325 			if (vis > vtarget && *nbr > 0)
326 				return;
327 			continue;
328 		}
329 		break;
330 	}
331 
332 	/*
333 	 * If the last word extends to the end of the field without any
334 	 * trailing whitespace, the loop could not check yet whether it
335 	 * can remain on this line.  So do the check now.
336 	 */
337 
338 	if (graph && (vis <= vtarget || *nbr == 0)) {
339 		*nbr = ic;
340 		*vbr = vis;
341 	}
342 }
343 
344 /*
345  * Print the contents of one field
346  * with an indentation of	 vbl	  visual columns,
347  * and an input string length of nbr	  characters.
348  */
349 static void
term_field(struct termp * p,size_t vbl,size_t nbr)350 term_field(struct termp *p, size_t vbl, size_t nbr)
351 {
352 	size_t	 ic;	/* Character position in the input buffer. */
353 	size_t	 vis;	/* Visual position of the current character. */
354 	size_t	 dv;	/* Visual width of the current character. */
355 	size_t	 vn;	/* Visual position of the next character. */
356 
357 	vis = 0;
358 	for (ic = p->tcol->col; ic < nbr; ic++) {
359 
360 		/*
361 		 * To avoid the printing of trailing whitespace,
362 		 * do not print whitespace right away, only count it.
363 		 */
364 
365 		switch (p->tcol->buf[ic]) {
366 		case '\n':
367 		case ASCII_BREAK:
368 			continue;
369 		case '\t':
370 			vn = term_tab_next(vis);
371 			vbl += vn - vis;
372 			vis = vn;
373 			continue;
374 		case ' ':
375 		case ASCII_NBRSP:
376 			dv = (*p->width)(p, ' ');
377 			vbl += dv;
378 			vis += dv;
379 			continue;
380 		default:
381 			break;
382 		}
383 
384 		/*
385 		 * We found a non-blank character to print,
386 		 * so write preceding white space now.
387 		 */
388 
389 		if (vbl > 0) {
390 			(*p->advance)(p, vbl);
391 			p->viscol += vbl;
392 			vbl = 0;
393 		}
394 
395 		/* Print the character and adjust the visual position. */
396 
397 		(*p->letter)(p, p->tcol->buf[ic]);
398 		if (p->tcol->buf[ic] == '\b') {
399 			dv = (*p->width)(p, p->tcol->buf[ic - 1]);
400 			p->viscol -= dv;
401 			vis -= dv;
402 		} else {
403 			dv = (*p->width)(p, p->tcol->buf[ic]);
404 			p->viscol += dv;
405 			vis += dv;
406 		}
407 	}
408 	p->tcol->col = nbr;
409 }
410 
411 static void
endline(struct termp * p)412 endline(struct termp *p)
413 {
414 	if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
415 		p->mc = NULL;
416 		p->flags &= ~TERMP_ENDMC;
417 	}
418 	if (p->mc != NULL) {
419 		if (p->viscol && p->maxrmargin >= p->viscol)
420 			(*p->advance)(p, p->maxrmargin - p->viscol + 1);
421 		p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
422 		term_word(p, p->mc);
423 		p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
424 	}
425 	p->viscol = 0;
426 	p->minbl = 0;
427 	(*p->endline)(p);
428 }
429 
430 /*
431  * A newline only breaks an existing line; it won't assert vertical
432  * space.  All data in the output buffer is flushed prior to the newline
433  * assertion.
434  */
435 void
term_newln(struct termp * p)436 term_newln(struct termp *p)
437 {
438 
439 	p->flags |= TERMP_NOSPACE;
440 	if (p->tcol->lastcol || p->viscol)
441 		term_flushln(p);
442 }
443 
444 /*
445  * Asserts a vertical space (a full, empty line-break between lines).
446  * Note that if used twice, this will cause two blank spaces and so on.
447  * All data in the output buffer is flushed prior to the newline
448  * assertion.
449  */
450 void
term_vspace(struct termp * p)451 term_vspace(struct termp *p)
452 {
453 
454 	term_newln(p);
455 	p->viscol = 0;
456 	p->minbl = 0;
457 	if (0 < p->skipvsp)
458 		p->skipvsp--;
459 	else
460 		(*p->endline)(p);
461 }
462 
463 /* Swap current and previous font; for \fP and .ft P */
464 void
term_fontlast(struct termp * p)465 term_fontlast(struct termp *p)
466 {
467 	enum termfont	 f;
468 
469 	f = p->fontl;
470 	p->fontl = p->fontq[p->fonti];
471 	p->fontq[p->fonti] = f;
472 }
473 
474 /* Set font, save current, discard previous; for \f, .ft, .B etc. */
475 void
term_fontrepl(struct termp * p,enum termfont f)476 term_fontrepl(struct termp *p, enum termfont f)
477 {
478 
479 	p->fontl = p->fontq[p->fonti];
480 	p->fontq[p->fonti] = f;
481 }
482 
483 /* Set font, save previous. */
484 void
term_fontpush(struct termp * p,enum termfont f)485 term_fontpush(struct termp *p, enum termfont f)
486 {
487 
488 	p->fontl = p->fontq[p->fonti];
489 	if (++p->fonti == p->fontsz) {
490 		p->fontsz += 8;
491 		p->fontq = mandoc_reallocarray(p->fontq,
492 		    p->fontsz, sizeof(*p->fontq));
493 	}
494 	p->fontq[p->fonti] = f;
495 }
496 
497 /* Flush to make the saved pointer current again. */
498 void
term_fontpopq(struct termp * p,int i)499 term_fontpopq(struct termp *p, int i)
500 {
501 
502 	assert(i >= 0);
503 	if (p->fonti > i)
504 		p->fonti = i;
505 }
506 
507 /* Pop one font off the stack. */
508 void
term_fontpop(struct termp * p)509 term_fontpop(struct termp *p)
510 {
511 
512 	assert(p->fonti);
513 	p->fonti--;
514 }
515 
516 /*
517  * Handle pwords, partial words, which may be either a single word or a
518  * phrase that cannot be broken down (such as a literal string).  This
519  * handles word styling.
520  */
521 void
term_word(struct termp * p,const char * word)522 term_word(struct termp *p, const char *word)
523 {
524 	struct roffsu	 su;
525 	const char	 nbrsp[2] = { ASCII_NBRSP, 0 };
526 	const char	*seq, *cp;
527 	int		 sz, uc;
528 	size_t		 csz, lsz, ssz;
529 	enum mandoc_esc	 esc;
530 
531 	if ((p->flags & TERMP_NOBUF) == 0) {
532 		if ((p->flags & TERMP_NOSPACE) == 0) {
533 			if ((p->flags & TERMP_KEEP) == 0) {
534 				bufferc(p, ' ');
535 				if (p->flags & TERMP_SENTENCE)
536 					bufferc(p, ' ');
537 			} else
538 				bufferc(p, ASCII_NBRSP);
539 		}
540 		if (p->flags & TERMP_PREKEEP)
541 			p->flags |= TERMP_KEEP;
542 		if (p->flags & TERMP_NONOSPACE)
543 			p->flags |= TERMP_NOSPACE;
544 		else
545 			p->flags &= ~TERMP_NOSPACE;
546 		p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
547 		p->skipvsp = 0;
548 	}
549 
550 	while ('\0' != *word) {
551 		if ('\\' != *word) {
552 			if (TERMP_NBRWORD & p->flags) {
553 				if (' ' == *word) {
554 					encode(p, nbrsp, 1);
555 					word++;
556 					continue;
557 				}
558 				ssz = strcspn(word, "\\ ");
559 			} else
560 				ssz = strcspn(word, "\\");
561 			encode(p, word, ssz);
562 			word += (int)ssz;
563 			continue;
564 		}
565 
566 		word++;
567 		esc = mandoc_escape(&word, &seq, &sz);
568 		switch (esc) {
569 		case ESCAPE_UNICODE:
570 			uc = mchars_num2uc(seq + 1, sz - 1);
571 			break;
572 		case ESCAPE_NUMBERED:
573 			uc = mchars_num2char(seq, sz);
574 			if (uc < 0)
575 				continue;
576 			break;
577 		case ESCAPE_SPECIAL:
578 			if (p->enc == TERMENC_ASCII) {
579 				cp = mchars_spec2str(seq, sz, &ssz);
580 				if (cp != NULL)
581 					encode(p, cp, ssz);
582 			} else {
583 				uc = mchars_spec2cp(seq, sz);
584 				if (uc > 0)
585 					encode1(p, uc);
586 			}
587 			continue;
588 		case ESCAPE_UNDEF:
589 			uc = *seq;
590 			break;
591 		case ESCAPE_FONTBOLD:
592 		case ESCAPE_FONTCB:
593 			term_fontrepl(p, TERMFONT_BOLD);
594 			continue;
595 		case ESCAPE_FONTITALIC:
596 		case ESCAPE_FONTCI:
597 			term_fontrepl(p, TERMFONT_UNDER);
598 			continue;
599 		case ESCAPE_FONTBI:
600 			term_fontrepl(p, TERMFONT_BI);
601 			continue;
602 		case ESCAPE_FONT:
603 		case ESCAPE_FONTCR:
604 		case ESCAPE_FONTROMAN:
605 			term_fontrepl(p, TERMFONT_NONE);
606 			continue;
607 		case ESCAPE_FONTPREV:
608 			term_fontlast(p);
609 			continue;
610 		case ESCAPE_BREAK:
611 			bufferc(p, '\n');
612 			continue;
613 		case ESCAPE_NOSPACE:
614 			if (p->flags & TERMP_BACKAFTER)
615 				p->flags &= ~TERMP_BACKAFTER;
616 			else if (*word == '\0')
617 				p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
618 			continue;
619 		case ESCAPE_DEVICE:
620 			if (p->type == TERMTYPE_PDF)
621 				encode(p, "pdf", 3);
622 			else if (p->type == TERMTYPE_PS)
623 				encode(p, "ps", 2);
624 			else if (p->enc == TERMENC_ASCII)
625 				encode(p, "ascii", 5);
626 			else
627 				encode(p, "utf8", 4);
628 			continue;
629 		case ESCAPE_HORIZ:
630 			if (*seq == '|') {
631 				seq++;
632 				uc = -p->col;
633 			} else
634 				uc = 0;
635 			if (a2roffsu(seq, &su, SCALE_EM) == NULL)
636 				continue;
637 			uc += term_hen(p, &su);
638 			if (uc > 0)
639 				while (uc-- > 0)
640 					bufferc(p, ASCII_NBRSP);
641 			else if (p->col > (size_t)(-uc))
642 				p->col += uc;
643 			else {
644 				uc += p->col;
645 				p->col = 0;
646 				if (p->tcol->offset > (size_t)(-uc)) {
647 					p->ti += uc;
648 					p->tcol->offset += uc;
649 				} else {
650 					p->ti -= p->tcol->offset;
651 					p->tcol->offset = 0;
652 				}
653 			}
654 			continue;
655 		case ESCAPE_HLINE:
656 			if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
657 				continue;
658 			uc = term_hen(p, &su);
659 			if (uc <= 0) {
660 				if (p->tcol->rmargin <= p->tcol->offset)
661 					continue;
662 				lsz = p->tcol->rmargin - p->tcol->offset;
663 			} else
664 				lsz = uc;
665 			if (*cp == seq[-1])
666 				uc = -1;
667 			else if (*cp == '\\') {
668 				seq = cp + 1;
669 				esc = mandoc_escape(&seq, &cp, &sz);
670 				switch (esc) {
671 				case ESCAPE_UNICODE:
672 					uc = mchars_num2uc(cp + 1, sz - 1);
673 					break;
674 				case ESCAPE_NUMBERED:
675 					uc = mchars_num2char(cp, sz);
676 					break;
677 				case ESCAPE_SPECIAL:
678 					uc = mchars_spec2cp(cp, sz);
679 					break;
680 				case ESCAPE_UNDEF:
681 					uc = *seq;
682 					break;
683 				default:
684 					uc = -1;
685 					break;
686 				}
687 			} else
688 				uc = *cp;
689 			if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
690 				uc = '_';
691 			if (p->enc == TERMENC_ASCII) {
692 				cp = ascii_uc2str(uc);
693 				csz = term_strlen(p, cp);
694 				ssz = strlen(cp);
695 			} else
696 				csz = (*p->width)(p, uc);
697 			while (lsz >= csz) {
698 				if (p->enc == TERMENC_ASCII)
699 					encode(p, cp, ssz);
700 				else
701 					encode1(p, uc);
702 				lsz -= csz;
703 			}
704 			continue;
705 		case ESCAPE_SKIPCHAR:
706 			p->flags |= TERMP_BACKAFTER;
707 			continue;
708 		case ESCAPE_OVERSTRIKE:
709 			cp = seq + sz;
710 			while (seq < cp) {
711 				if (*seq == '\\') {
712 					mandoc_escape(&seq, NULL, NULL);
713 					continue;
714 				}
715 				encode1(p, *seq++);
716 				if (seq < cp) {
717 					if (p->flags & TERMP_BACKBEFORE)
718 						p->flags |= TERMP_BACKAFTER;
719 					else
720 						p->flags |= TERMP_BACKBEFORE;
721 				}
722 			}
723 			/* Trim trailing backspace/blank pair. */
724 			if (p->tcol->lastcol > 2 &&
725 			    (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
726 			     p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
727 				p->tcol->lastcol -= 2;
728 			if (p->col > p->tcol->lastcol)
729 				p->col = p->tcol->lastcol;
730 			continue;
731 		default:
732 			continue;
733 		}
734 
735 		/*
736 		 * Common handling for Unicode and numbered
737 		 * character escape sequences.
738 		 */
739 
740 		if (p->enc == TERMENC_ASCII) {
741 			cp = ascii_uc2str(uc);
742 			encode(p, cp, strlen(cp));
743 		} else {
744 			if ((uc < 0x20 && uc != 0x09) ||
745 			    (uc > 0x7E && uc < 0xA0))
746 				uc = 0xFFFD;
747 			encode1(p, uc);
748 		}
749 	}
750 	p->flags &= ~TERMP_NBRWORD;
751 }
752 
753 static void
adjbuf(struct termp_col * c,size_t sz)754 adjbuf(struct termp_col *c, size_t sz)
755 {
756 	if (c->maxcols == 0)
757 		c->maxcols = 1024;
758 	while (c->maxcols <= sz)
759 		c->maxcols <<= 2;
760 	c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
761 }
762 
763 static void
bufferc(struct termp * p,char c)764 bufferc(struct termp *p, char c)
765 {
766 	if (p->flags & TERMP_NOBUF) {
767 		(*p->letter)(p, c);
768 		return;
769 	}
770 	if (p->col + 1 >= p->tcol->maxcols)
771 		adjbuf(p->tcol, p->col + 1);
772 	if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
773 		p->tcol->buf[p->col] = c;
774 	if (p->tcol->lastcol < ++p->col)
775 		p->tcol->lastcol = p->col;
776 }
777 
778 /*
779  * See encode().
780  * Do this for a single (probably unicode) value.
781  * Does not check for non-decorated glyphs.
782  */
783 static void
encode1(struct termp * p,int c)784 encode1(struct termp *p, int c)
785 {
786 	enum termfont	  f;
787 
788 	if (p->flags & TERMP_NOBUF) {
789 		(*p->letter)(p, c);
790 		return;
791 	}
792 
793 	if (p->col + 7 >= p->tcol->maxcols)
794 		adjbuf(p->tcol, p->col + 7);
795 
796 	f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
797 	    p->fontq[p->fonti] : TERMFONT_NONE;
798 
799 	if (p->flags & TERMP_BACKBEFORE) {
800 		if (p->tcol->buf[p->col - 1] == ' ' ||
801 		    p->tcol->buf[p->col - 1] == '\t')
802 			p->col--;
803 		else
804 			p->tcol->buf[p->col++] = '\b';
805 		p->flags &= ~TERMP_BACKBEFORE;
806 	}
807 	if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
808 		p->tcol->buf[p->col++] = '_';
809 		p->tcol->buf[p->col++] = '\b';
810 	}
811 	if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
812 		if (c == ASCII_HYPH)
813 			p->tcol->buf[p->col++] = '-';
814 		else
815 			p->tcol->buf[p->col++] = c;
816 		p->tcol->buf[p->col++] = '\b';
817 	}
818 	if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
819 		p->tcol->buf[p->col] = c;
820 	if (p->tcol->lastcol < ++p->col)
821 		p->tcol->lastcol = p->col;
822 	if (p->flags & TERMP_BACKAFTER) {
823 		p->flags |= TERMP_BACKBEFORE;
824 		p->flags &= ~TERMP_BACKAFTER;
825 	}
826 }
827 
828 static void
encode(struct termp * p,const char * word,size_t sz)829 encode(struct termp *p, const char *word, size_t sz)
830 {
831 	size_t		  i;
832 
833 	if (p->flags & TERMP_NOBUF) {
834 		for (i = 0; i < sz; i++)
835 			(*p->letter)(p, word[i]);
836 		return;
837 	}
838 
839 	if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
840 		adjbuf(p->tcol, p->col + 2 + (sz * 5));
841 
842 	for (i = 0; i < sz; i++) {
843 		if (ASCII_HYPH == word[i] ||
844 		    isgraph((unsigned char)word[i]))
845 			encode1(p, word[i]);
846 		else {
847 			if (p->tcol->lastcol <= p->col ||
848 			    (word[i] != ' ' && word[i] != ASCII_NBRSP))
849 				p->tcol->buf[p->col] = word[i];
850 			p->col++;
851 
852 			/*
853 			 * Postpone the effect of \z while handling
854 			 * an overstrike sequence from ascii_uc2str().
855 			 */
856 
857 			if (word[i] == '\b' &&
858 			    (p->flags & TERMP_BACKBEFORE)) {
859 				p->flags &= ~TERMP_BACKBEFORE;
860 				p->flags |= TERMP_BACKAFTER;
861 			}
862 		}
863 	}
864 	if (p->tcol->lastcol < p->col)
865 		p->tcol->lastcol = p->col;
866 }
867 
868 void
term_setwidth(struct termp * p,const char * wstr)869 term_setwidth(struct termp *p, const char *wstr)
870 {
871 	struct roffsu	 su;
872 	int		 iop, width;
873 
874 	iop = 0;
875 	width = 0;
876 	if (NULL != wstr) {
877 		switch (*wstr) {
878 		case '+':
879 			iop = 1;
880 			wstr++;
881 			break;
882 		case '-':
883 			iop = -1;
884 			wstr++;
885 			break;
886 		default:
887 			break;
888 		}
889 		if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
890 			width = term_hspan(p, &su);
891 		else
892 			iop = 0;
893 	}
894 	(*p->setwidth)(p, iop, width);
895 }
896 
897 size_t
term_len(const struct termp * p,size_t sz)898 term_len(const struct termp *p, size_t sz)
899 {
900 
901 	return (*p->width)(p, ' ') * sz;
902 }
903 
904 static size_t
cond_width(const struct termp * p,int c,int * skip)905 cond_width(const struct termp *p, int c, int *skip)
906 {
907 
908 	if (*skip) {
909 		(*skip) = 0;
910 		return 0;
911 	} else
912 		return (*p->width)(p, c);
913 }
914 
915 size_t
term_strlen(const struct termp * p,const char * cp)916 term_strlen(const struct termp *p, const char *cp)
917 {
918 	size_t		 sz, rsz, i;
919 	int		 ssz, skip, uc;
920 	const char	*seq, *rhs;
921 	enum mandoc_esc	 esc;
922 	static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
923 			ASCII_BREAK, '\0' };
924 
925 	/*
926 	 * Account for escaped sequences within string length
927 	 * calculations.  This follows the logic in term_word() as we
928 	 * must calculate the width of produced strings.
929 	 */
930 
931 	sz = 0;
932 	skip = 0;
933 	while ('\0' != *cp) {
934 		rsz = strcspn(cp, rej);
935 		for (i = 0; i < rsz; i++)
936 			sz += cond_width(p, *cp++, &skip);
937 
938 		switch (*cp) {
939 		case '\\':
940 			cp++;
941 			rhs = NULL;
942 			esc = mandoc_escape(&cp, &seq, &ssz);
943 			switch (esc) {
944 			case ESCAPE_UNICODE:
945 				uc = mchars_num2uc(seq + 1, ssz - 1);
946 				break;
947 			case ESCAPE_NUMBERED:
948 				uc = mchars_num2char(seq, ssz);
949 				if (uc < 0)
950 					continue;
951 				break;
952 			case ESCAPE_SPECIAL:
953 				if (p->enc == TERMENC_ASCII) {
954 					rhs = mchars_spec2str(seq, ssz, &rsz);
955 					if (rhs != NULL)
956 						break;
957 				} else {
958 					uc = mchars_spec2cp(seq, ssz);
959 					if (uc > 0)
960 						sz += cond_width(p, uc, &skip);
961 				}
962 				continue;
963 			case ESCAPE_UNDEF:
964 				uc = *seq;
965 				break;
966 			case ESCAPE_DEVICE:
967 				if (p->type == TERMTYPE_PDF) {
968 					rhs = "pdf";
969 					rsz = 3;
970 				} else if (p->type == TERMTYPE_PS) {
971 					rhs = "ps";
972 					rsz = 2;
973 				} else if (p->enc == TERMENC_ASCII) {
974 					rhs = "ascii";
975 					rsz = 5;
976 				} else {
977 					rhs = "utf8";
978 					rsz = 4;
979 				}
980 				break;
981 			case ESCAPE_SKIPCHAR:
982 				skip = 1;
983 				continue;
984 			case ESCAPE_OVERSTRIKE:
985 				rsz = 0;
986 				rhs = seq + ssz;
987 				while (seq < rhs) {
988 					if (*seq == '\\') {
989 						mandoc_escape(&seq, NULL, NULL);
990 						continue;
991 					}
992 					i = (*p->width)(p, *seq++);
993 					if (rsz < i)
994 						rsz = i;
995 				}
996 				sz += rsz;
997 				continue;
998 			default:
999 				continue;
1000 			}
1001 
1002 			/*
1003 			 * Common handling for Unicode and numbered
1004 			 * character escape sequences.
1005 			 */
1006 
1007 			if (rhs == NULL) {
1008 				if (p->enc == TERMENC_ASCII) {
1009 					rhs = ascii_uc2str(uc);
1010 					rsz = strlen(rhs);
1011 				} else {
1012 					if ((uc < 0x20 && uc != 0x09) ||
1013 					    (uc > 0x7E && uc < 0xA0))
1014 						uc = 0xFFFD;
1015 					sz += cond_width(p, uc, &skip);
1016 					continue;
1017 				}
1018 			}
1019 
1020 			if (skip) {
1021 				skip = 0;
1022 				break;
1023 			}
1024 
1025 			/*
1026 			 * Common handling for all escape sequences
1027 			 * printing more than one character.
1028 			 */
1029 
1030 			for (i = 0; i < rsz; i++)
1031 				sz += (*p->width)(p, *rhs++);
1032 			break;
1033 		case ASCII_NBRSP:
1034 			sz += cond_width(p, ' ', &skip);
1035 			cp++;
1036 			break;
1037 		case ASCII_HYPH:
1038 			sz += cond_width(p, '-', &skip);
1039 			cp++;
1040 			break;
1041 		default:
1042 			break;
1043 		}
1044 	}
1045 
1046 	return sz;
1047 }
1048 
1049 int
term_vspan(const struct termp * p,const struct roffsu * su)1050 term_vspan(const struct termp *p, const struct roffsu *su)
1051 {
1052 	double		 r;
1053 	int		 ri;
1054 
1055 	switch (su->unit) {
1056 	case SCALE_BU:
1057 		r = su->scale / 40.0;
1058 		break;
1059 	case SCALE_CM:
1060 		r = su->scale * 6.0 / 2.54;
1061 		break;
1062 	case SCALE_FS:
1063 		r = su->scale * 65536.0 / 40.0;
1064 		break;
1065 	case SCALE_IN:
1066 		r = su->scale * 6.0;
1067 		break;
1068 	case SCALE_MM:
1069 		r = su->scale * 0.006;
1070 		break;
1071 	case SCALE_PC:
1072 		r = su->scale;
1073 		break;
1074 	case SCALE_PT:
1075 		r = su->scale / 12.0;
1076 		break;
1077 	case SCALE_EN:
1078 	case SCALE_EM:
1079 		r = su->scale * 0.6;
1080 		break;
1081 	case SCALE_VS:
1082 		r = su->scale;
1083 		break;
1084 	default:
1085 		abort();
1086 	}
1087 	ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1088 	return ri < 66 ? ri : 1;
1089 }
1090 
1091 /*
1092  * Convert a scaling width to basic units, rounding towards 0.
1093  */
1094 int
term_hspan(const struct termp * p,const struct roffsu * su)1095 term_hspan(const struct termp *p, const struct roffsu *su)
1096 {
1097 
1098 	return (*p->hspan)(p, su);
1099 }
1100 
1101 /*
1102  * Convert a scaling width to basic units, rounding to closest.
1103  */
1104 int
term_hen(const struct termp * p,const struct roffsu * su)1105 term_hen(const struct termp *p, const struct roffsu *su)
1106 {
1107 	int bu;
1108 
1109 	if ((bu = (*p->hspan)(p, su)) >= 0)
1110 		return (bu + 11) / 24;
1111 	else
1112 		return -((-bu + 11) / 24);
1113 }
1114