xref: /freebsd/contrib/mandoc/roff_escape.c (revision c1c95add8c80843ba15d784f95c361d795b1f593)
1*c1c95addSBrooks Davis /* $Id: roff_escape.c,v 1.15 2024/05/16 21:23:00 schwarze Exp $ */
2*c1c95addSBrooks Davis /*
3*c1c95addSBrooks Davis  * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
4*c1c95addSBrooks Davis  *               Ingo Schwarze <schwarze@openbsd.org>
5*c1c95addSBrooks Davis  * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
6*c1c95addSBrooks Davis  *
7*c1c95addSBrooks Davis  * Permission to use, copy, modify, and distribute this software for any
8*c1c95addSBrooks Davis  * purpose with or without fee is hereby granted, provided that the above
9*c1c95addSBrooks Davis  * copyright notice and this permission notice appear in all copies.
10*c1c95addSBrooks Davis  *
11*c1c95addSBrooks Davis  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12*c1c95addSBrooks Davis  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13*c1c95addSBrooks Davis  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14*c1c95addSBrooks Davis  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15*c1c95addSBrooks Davis  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16*c1c95addSBrooks Davis  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17*c1c95addSBrooks Davis  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18*c1c95addSBrooks Davis  *
19*c1c95addSBrooks Davis  * Parser for roff(7) escape sequences.
20*c1c95addSBrooks Davis  * To be used by all mandoc(1) parsers and formatters.
21*c1c95addSBrooks Davis  */
22*c1c95addSBrooks Davis #include <assert.h>
23*c1c95addSBrooks Davis #include <ctype.h>
24*c1c95addSBrooks Davis #include <limits.h>
25*c1c95addSBrooks Davis #include <stdio.h>
26*c1c95addSBrooks Davis #include <string.h>
27*c1c95addSBrooks Davis 
28*c1c95addSBrooks Davis #include "mandoc.h"
29*c1c95addSBrooks Davis #include "roff.h"
30*c1c95addSBrooks Davis #include "roff_int.h"
31*c1c95addSBrooks Davis 
32*c1c95addSBrooks Davis /*
33*c1c95addSBrooks Davis  * Traditional escape sequence interpreter for general use
34*c1c95addSBrooks Davis  * including in high-level formatters.  This function does not issue
35*c1c95addSBrooks Davis  * diagnostics and is not usable for expansion in the roff(7) parser.
36*c1c95addSBrooks Davis  * It is documented in the mandoc_escape(3) manual page.
37*c1c95addSBrooks Davis  */
38*c1c95addSBrooks Davis enum mandoc_esc
mandoc_escape(const char ** rendarg,const char ** rarg,int * rargl)39*c1c95addSBrooks Davis mandoc_escape(const char **rendarg, const char **rarg, int *rargl)
40*c1c95addSBrooks Davis {
41*c1c95addSBrooks Davis         int		 iarg, iendarg, iend;
42*c1c95addSBrooks Davis         enum mandoc_esc  rval;
43*c1c95addSBrooks Davis 
44*c1c95addSBrooks Davis         rval = roff_escape(--*rendarg, 0, 0,
45*c1c95addSBrooks Davis 	    NULL, NULL, &iarg, &iendarg, &iend);
46*c1c95addSBrooks Davis         assert(rval != ESCAPE_EXPAND);
47*c1c95addSBrooks Davis         if (rarg != NULL)
48*c1c95addSBrooks Davis 	       *rarg = *rendarg + iarg;
49*c1c95addSBrooks Davis         if (rargl != NULL)
50*c1c95addSBrooks Davis 	       *rargl = iendarg - iarg;
51*c1c95addSBrooks Davis         *rendarg += iend;
52*c1c95addSBrooks Davis         return rval;
53*c1c95addSBrooks Davis }
54*c1c95addSBrooks Davis 
55*c1c95addSBrooks Davis /*
56*c1c95addSBrooks Davis  * Full-featured escape sequence parser.
57*c1c95addSBrooks Davis  * If it encounters a nested escape sequence that requires expansion
58*c1c95addSBrooks Davis  * by the parser and re-parsing, the positions of that inner escape
59*c1c95addSBrooks Davis  * sequence are returned in *resc ... *rend.
60*c1c95addSBrooks Davis  * Otherwise, *resc is set to aesc and the positions of the escape
61*c1c95addSBrooks Davis  * sequence starting at aesc are returned.
62*c1c95addSBrooks Davis  * Diagnostic messages are generated if and only if ln != 0,
63*c1c95addSBrooks Davis  * that is, if and only if called by roff_expand().
64*c1c95addSBrooks Davis  */
65*c1c95addSBrooks Davis enum mandoc_esc
roff_escape(const char * buf,const int ln,const int aesc,int * resc,int * rnam,int * rarg,int * rendarg,int * rend)66*c1c95addSBrooks Davis roff_escape(const char *buf, const int ln, const int aesc,
67*c1c95addSBrooks Davis     int *resc, int *rnam, int *rarg, int *rendarg, int *rend)
68*c1c95addSBrooks Davis {
69*c1c95addSBrooks Davis 	int		 iesc;		/* index of leading escape char */
70*c1c95addSBrooks Davis 	int		 inam;		/* index of escape name */
71*c1c95addSBrooks Davis 	int		 iarg;		/* index beginning the argument */
72*c1c95addSBrooks Davis 	int		 iendarg;	/* index right after the argument */
73*c1c95addSBrooks Davis 	int		 iend;		/* index right after the sequence */
74*c1c95addSBrooks Davis 	int		 sesc, snam, sarg, sendarg, send; /* for sub-escape */
75*c1c95addSBrooks Davis 	int		 escterm;	/* whether term is escaped */
76*c1c95addSBrooks Davis 	int		 maxl;		/* expected length of the argument */
77*c1c95addSBrooks Davis 	int		 argl;		/* actual length of the argument */
78*c1c95addSBrooks Davis 	int		 c, i;		/* for \[char...] parsing */
79*c1c95addSBrooks Davis 	int 		 valid_A;	/* for \A parsing */
80*c1c95addSBrooks Davis 	enum mandoc_esc	 rval;		/* return value */
81*c1c95addSBrooks Davis 	enum mandoc_esc	 stype;		/* for sub-escape */
82*c1c95addSBrooks Davis 	enum mandocerr	 err;		/* diagnostic code */
83*c1c95addSBrooks Davis 	char		 term;		/* byte terminating the argument */
84*c1c95addSBrooks Davis 
85*c1c95addSBrooks Davis 	/*
86*c1c95addSBrooks Davis 	 * Treat "\E" just like "\";
87*c1c95addSBrooks Davis 	 * it only makes a difference in copy mode.
88*c1c95addSBrooks Davis 	 */
89*c1c95addSBrooks Davis 
90*c1c95addSBrooks Davis 	iesc = inam = aesc;
91*c1c95addSBrooks Davis 	do {
92*c1c95addSBrooks Davis 		inam++;
93*c1c95addSBrooks Davis 	} while (buf[inam] == 'E');
94*c1c95addSBrooks Davis 
95*c1c95addSBrooks Davis 	/*
96*c1c95addSBrooks Davis 	 * Sort the following cases first by syntax category,
97*c1c95addSBrooks Davis 	 * then by escape sequence type, and finally by ASCII code.
98*c1c95addSBrooks Davis 	 */
99*c1c95addSBrooks Davis 
100*c1c95addSBrooks Davis 	iarg = iendarg = iend = inam + 1;
101*c1c95addSBrooks Davis 	maxl = INT_MAX;
102*c1c95addSBrooks Davis 	term = '\0';
103*c1c95addSBrooks Davis 	err = MANDOCERR_OK;
104*c1c95addSBrooks Davis 	switch (buf[inam]) {
105*c1c95addSBrooks Davis 
106*c1c95addSBrooks Davis 	/* Escape sequences taking no arguments at all. */
107*c1c95addSBrooks Davis 
108*c1c95addSBrooks Davis 	case '!':
109*c1c95addSBrooks Davis 	case '?':
110*c1c95addSBrooks Davis 	case 'r':
111*c1c95addSBrooks Davis 		rval = ESCAPE_UNSUPP;
112*c1c95addSBrooks Davis 		goto out;
113*c1c95addSBrooks Davis 
114*c1c95addSBrooks Davis 	case '%':
115*c1c95addSBrooks Davis 	case '&':
116*c1c95addSBrooks Davis 	case ')':
117*c1c95addSBrooks Davis 	case ',':
118*c1c95addSBrooks Davis 	case '/':
119*c1c95addSBrooks Davis 	case '^':
120*c1c95addSBrooks Davis 	case 'a':
121*c1c95addSBrooks Davis 	case 'd':
122*c1c95addSBrooks Davis 	case 't':
123*c1c95addSBrooks Davis 	case 'u':
124*c1c95addSBrooks Davis 	case '{':
125*c1c95addSBrooks Davis 	case '|':
126*c1c95addSBrooks Davis 	case '}':
127*c1c95addSBrooks Davis 		rval = ESCAPE_IGNORE;
128*c1c95addSBrooks Davis 		goto out;
129*c1c95addSBrooks Davis 
130*c1c95addSBrooks Davis 	case '\0':
131*c1c95addSBrooks Davis 		iendarg = --iend;
132*c1c95addSBrooks Davis 		/* FALLTHROUGH */
133*c1c95addSBrooks Davis 	case '.':
134*c1c95addSBrooks Davis 	case '\\':
135*c1c95addSBrooks Davis 	default:
136*c1c95addSBrooks Davis 		iarg--;
137*c1c95addSBrooks Davis 		rval = ESCAPE_UNDEF;
138*c1c95addSBrooks Davis 		goto out;
139*c1c95addSBrooks Davis 
140*c1c95addSBrooks Davis 	case ' ':
141*c1c95addSBrooks Davis 	case '\'':
142*c1c95addSBrooks Davis 	case '-':
143*c1c95addSBrooks Davis 	case '0':
144*c1c95addSBrooks Davis 	case ':':
145*c1c95addSBrooks Davis 	case '_':
146*c1c95addSBrooks Davis 	case '`':
147*c1c95addSBrooks Davis 	case 'e':
148*c1c95addSBrooks Davis 	case '~':
149*c1c95addSBrooks Davis 		iarg--;
150*c1c95addSBrooks Davis 		argl = 1;
151*c1c95addSBrooks Davis 		rval = ESCAPE_SPECIAL;
152*c1c95addSBrooks Davis 		goto out;
153*c1c95addSBrooks Davis 	case 'p':
154*c1c95addSBrooks Davis 		rval = ESCAPE_BREAK;
155*c1c95addSBrooks Davis 		goto out;
156*c1c95addSBrooks Davis 	case 'c':
157*c1c95addSBrooks Davis 		rval = ESCAPE_NOSPACE;
158*c1c95addSBrooks Davis 		goto out;
159*c1c95addSBrooks Davis 	case 'z':
160*c1c95addSBrooks Davis 		rval = ESCAPE_SKIPCHAR;
161*c1c95addSBrooks Davis 		goto out;
162*c1c95addSBrooks Davis 
163*c1c95addSBrooks Davis 	/* Standard argument format. */
164*c1c95addSBrooks Davis 
165*c1c95addSBrooks Davis 	case '$':
166*c1c95addSBrooks Davis 	case '*':
167*c1c95addSBrooks Davis 	case 'V':
168*c1c95addSBrooks Davis 	case 'g':
169*c1c95addSBrooks Davis 	case 'n':
170*c1c95addSBrooks Davis 		rval = ESCAPE_EXPAND;
171*c1c95addSBrooks Davis 		break;
172*c1c95addSBrooks Davis 	case 'F':
173*c1c95addSBrooks Davis 	case 'M':
174*c1c95addSBrooks Davis 	case 'O':
175*c1c95addSBrooks Davis 	case 'Y':
176*c1c95addSBrooks Davis 	case 'k':
177*c1c95addSBrooks Davis 	case 'm':
178*c1c95addSBrooks Davis 		rval = ESCAPE_IGNORE;
179*c1c95addSBrooks Davis 		break;
180*c1c95addSBrooks Davis 	case '(':
181*c1c95addSBrooks Davis 	case '[':
182*c1c95addSBrooks Davis 		rval = ESCAPE_SPECIAL;
183*c1c95addSBrooks Davis 		iendarg = iend = --iarg;
184*c1c95addSBrooks Davis 		break;
185*c1c95addSBrooks Davis 	case 'f':
186*c1c95addSBrooks Davis 		rval = ESCAPE_FONT;
187*c1c95addSBrooks Davis 		break;
188*c1c95addSBrooks Davis 
189*c1c95addSBrooks Davis 	/* Quoted arguments */
190*c1c95addSBrooks Davis 
191*c1c95addSBrooks Davis 	case 'A':
192*c1c95addSBrooks Davis 	case 'B':
193*c1c95addSBrooks Davis 	case 'w':
194*c1c95addSBrooks Davis 		rval = ESCAPE_EXPAND;
195*c1c95addSBrooks Davis 		term = '\b';
196*c1c95addSBrooks Davis 		break;
197*c1c95addSBrooks Davis 	case 'D':
198*c1c95addSBrooks Davis 	case 'H':
199*c1c95addSBrooks Davis 	case 'L':
200*c1c95addSBrooks Davis 	case 'R':
201*c1c95addSBrooks Davis 	case 'S':
202*c1c95addSBrooks Davis 	case 'X':
203*c1c95addSBrooks Davis 	case 'Z':
204*c1c95addSBrooks Davis 	case 'b':
205*c1c95addSBrooks Davis 	case 'v':
206*c1c95addSBrooks Davis 	case 'x':
207*c1c95addSBrooks Davis 		rval = ESCAPE_IGNORE;
208*c1c95addSBrooks Davis 		term = '\b';
209*c1c95addSBrooks Davis 		break;
210*c1c95addSBrooks Davis 	case 'C':
211*c1c95addSBrooks Davis 		rval = ESCAPE_SPECIAL;
212*c1c95addSBrooks Davis 		term = '\b';
213*c1c95addSBrooks Davis 		break;
214*c1c95addSBrooks Davis 	case 'N':
215*c1c95addSBrooks Davis 		rval = ESCAPE_NUMBERED;
216*c1c95addSBrooks Davis 		term = '\b';
217*c1c95addSBrooks Davis 		break;
218*c1c95addSBrooks Davis 	case 'h':
219*c1c95addSBrooks Davis 		rval = ESCAPE_HORIZ;
220*c1c95addSBrooks Davis 		term = '\b';
221*c1c95addSBrooks Davis 		break;
222*c1c95addSBrooks Davis 	case 'l':
223*c1c95addSBrooks Davis 		rval = ESCAPE_HLINE;
224*c1c95addSBrooks Davis 		term = '\b';
225*c1c95addSBrooks Davis 		break;
226*c1c95addSBrooks Davis 	case 'o':
227*c1c95addSBrooks Davis 		rval = ESCAPE_OVERSTRIKE;
228*c1c95addSBrooks Davis 		term = '\b';
229*c1c95addSBrooks Davis 		break;
230*c1c95addSBrooks Davis 
231*c1c95addSBrooks Davis 	/* Sizes support both forms, with additional peculiarities. */
232*c1c95addSBrooks Davis 
233*c1c95addSBrooks Davis 	case 's':
234*c1c95addSBrooks Davis 		rval = ESCAPE_IGNORE;
235*c1c95addSBrooks Davis 		if (buf[iarg] == '+' || buf[iarg] == '-'||
236*c1c95addSBrooks Davis 		    buf[iarg] == ASCII_HYPH)
237*c1c95addSBrooks Davis 			iarg++;
238*c1c95addSBrooks Davis 		switch (buf[iarg]) {
239*c1c95addSBrooks Davis 		case '(':
240*c1c95addSBrooks Davis 			maxl = 2;
241*c1c95addSBrooks Davis 			iarg++;
242*c1c95addSBrooks Davis 			break;
243*c1c95addSBrooks Davis 		case '[':
244*c1c95addSBrooks Davis 			term = ']';
245*c1c95addSBrooks Davis 			iarg++;
246*c1c95addSBrooks Davis 			break;
247*c1c95addSBrooks Davis 		case '\'':
248*c1c95addSBrooks Davis 			term = '\'';
249*c1c95addSBrooks Davis 			iarg++;
250*c1c95addSBrooks Davis 			break;
251*c1c95addSBrooks Davis 		case '1':
252*c1c95addSBrooks Davis 		case '2':
253*c1c95addSBrooks Davis 		case '3':
254*c1c95addSBrooks Davis 			if (buf[iarg - 1] == 's' &&
255*c1c95addSBrooks Davis 			    isdigit((unsigned char)buf[iarg + 1])) {
256*c1c95addSBrooks Davis 				maxl = 2;
257*c1c95addSBrooks Davis 				break;
258*c1c95addSBrooks Davis 			}
259*c1c95addSBrooks Davis 			/* FALLTHROUGH */
260*c1c95addSBrooks Davis 		default:
261*c1c95addSBrooks Davis 			maxl = 1;
262*c1c95addSBrooks Davis 			break;
263*c1c95addSBrooks Davis 		}
264*c1c95addSBrooks Davis 		iendarg = iend = iarg;
265*c1c95addSBrooks Davis 	}
266*c1c95addSBrooks Davis 
267*c1c95addSBrooks Davis 	/* Decide how to end the argument. */
268*c1c95addSBrooks Davis 
269*c1c95addSBrooks Davis 	escterm = 0;
270*c1c95addSBrooks Davis 	stype = ESCAPE_EXPAND;
271*c1c95addSBrooks Davis 	if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
272*c1c95addSBrooks Davis 	    buf[iarg] == buf[iesc]) {
273*c1c95addSBrooks Davis 		stype = roff_escape(buf, ln, iendarg,
274*c1c95addSBrooks Davis 		    &sesc, &snam, &sarg, &sendarg, &send);
275*c1c95addSBrooks Davis 		if (stype == ESCAPE_EXPAND)
276*c1c95addSBrooks Davis 			goto out_sub;
277*c1c95addSBrooks Davis 	}
278*c1c95addSBrooks Davis 
279*c1c95addSBrooks Davis 	if (term == '\b') {
280*c1c95addSBrooks Davis 		if (stype == ESCAPE_UNDEF)
281*c1c95addSBrooks Davis 			iarg++;
282*c1c95addSBrooks Davis 		if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) {
283*c1c95addSBrooks Davis 			if (strchr("BHLRSNhlvx", buf[inam]) != NULL &&
284*c1c95addSBrooks Davis 			    strchr(" ,.0DLOXYZ^abdhlortuvx|~",
285*c1c95addSBrooks Davis 			    buf[snam]) != NULL) {
286*c1c95addSBrooks Davis 				err = MANDOCERR_ESC_DELIM;
287*c1c95addSBrooks Davis 				iend = send;
288*c1c95addSBrooks Davis 				iarg = iendarg = sesc;
289*c1c95addSBrooks Davis 				goto out;
290*c1c95addSBrooks Davis 			}
291*c1c95addSBrooks Davis 			escterm = 1;
292*c1c95addSBrooks Davis 			iarg = send;
293*c1c95addSBrooks Davis 			term = buf[snam];
294*c1c95addSBrooks Davis 		} else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
295*c1c95addSBrooks Davis 		    strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
296*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_DELIM;
297*c1c95addSBrooks Davis 			if (rval != ESCAPE_EXPAND)
298*c1c95addSBrooks Davis 				rval = ESCAPE_ERROR;
299*c1c95addSBrooks Davis 			if (buf[inam] != 'D') {
300*c1c95addSBrooks Davis 				iendarg = iend = iarg + 1;
301*c1c95addSBrooks Davis 				goto out;
302*c1c95addSBrooks Davis 			}
303*c1c95addSBrooks Davis 		}
304*c1c95addSBrooks Davis 		if (term == '\b')
305*c1c95addSBrooks Davis 			term = buf[iarg++];
306*c1c95addSBrooks Davis 	} else if (term == '\0' && maxl == INT_MAX) {
307*c1c95addSBrooks Davis 		if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
308*c1c95addSBrooks Davis 			iarg++;
309*c1c95addSBrooks Davis 		switch (buf[iarg]) {
310*c1c95addSBrooks Davis 		case '(':
311*c1c95addSBrooks Davis 			maxl = 2;
312*c1c95addSBrooks Davis 			iarg++;
313*c1c95addSBrooks Davis 			break;
314*c1c95addSBrooks Davis 		case '[':
315*c1c95addSBrooks Davis 			if (buf[++iarg] == ' ') {
316*c1c95addSBrooks Davis 				iendarg = iend = iarg + 1;
317*c1c95addSBrooks Davis 				err = MANDOCERR_ESC_ARG;
318*c1c95addSBrooks Davis 				rval = ESCAPE_ERROR;
319*c1c95addSBrooks Davis 				goto out;
320*c1c95addSBrooks Davis 			}
321*c1c95addSBrooks Davis 			term = ']';
322*c1c95addSBrooks Davis 			break;
323*c1c95addSBrooks Davis 		default:
324*c1c95addSBrooks Davis 			maxl = 1;
325*c1c95addSBrooks Davis 			break;
326*c1c95addSBrooks Davis 		}
327*c1c95addSBrooks Davis 	}
328*c1c95addSBrooks Davis 
329*c1c95addSBrooks Davis 	/* Advance to the end of the argument. */
330*c1c95addSBrooks Davis 
331*c1c95addSBrooks Davis 	valid_A = 1;
332*c1c95addSBrooks Davis 	iendarg = iarg;
333*c1c95addSBrooks Davis 	while (maxl > 0) {
334*c1c95addSBrooks Davis 		if (buf[iendarg] == '\0') {
335*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_INCOMPLETE;
336*c1c95addSBrooks Davis 			if (rval != ESCAPE_EXPAND &&
337*c1c95addSBrooks Davis 			    rval != ESCAPE_OVERSTRIKE)
338*c1c95addSBrooks Davis 				rval = ESCAPE_ERROR;
339*c1c95addSBrooks Davis 			/* Usually, ignore an incomplete argument. */
340*c1c95addSBrooks Davis 			if (strchr("Aow", buf[inam]) == NULL)
341*c1c95addSBrooks Davis 				iendarg = iarg;
342*c1c95addSBrooks Davis 			break;
343*c1c95addSBrooks Davis 		}
344*c1c95addSBrooks Davis 		if (escterm == 0 && buf[iendarg] == term) {
345*c1c95addSBrooks Davis 			iend = iendarg + 1;
346*c1c95addSBrooks Davis 			break;
347*c1c95addSBrooks Davis 		}
348*c1c95addSBrooks Davis 		if (buf[iendarg] == buf[iesc]) {
349*c1c95addSBrooks Davis 			stype = roff_escape(buf, ln, iendarg,
350*c1c95addSBrooks Davis 			    &sesc, &snam, &sarg, &sendarg, &send);
351*c1c95addSBrooks Davis 			if (stype == ESCAPE_EXPAND)
352*c1c95addSBrooks Davis 				goto out_sub;
353*c1c95addSBrooks Davis 			iend = send;
354*c1c95addSBrooks Davis 			if (escterm == 1 &&
355*c1c95addSBrooks Davis 			    (buf[snam] == term || buf[inam] == 'N'))
356*c1c95addSBrooks Davis 				break;
357*c1c95addSBrooks Davis 			if (stype != ESCAPE_UNDEF)
358*c1c95addSBrooks Davis 				valid_A = 0;
359*c1c95addSBrooks Davis 			iendarg = send;
360*c1c95addSBrooks Davis 		} else if (buf[inam] == 'N' &&
361*c1c95addSBrooks Davis 		    isdigit((unsigned char)buf[iendarg]) == 0) {
362*c1c95addSBrooks Davis 			iend = iendarg + 1;
363*c1c95addSBrooks Davis 			break;
364*c1c95addSBrooks Davis 		} else {
365*c1c95addSBrooks Davis 			if (buf[iendarg] == ' ' || buf[iendarg] == '\t')
366*c1c95addSBrooks Davis 				valid_A = 0;
367*c1c95addSBrooks Davis 			if (maxl != INT_MAX)
368*c1c95addSBrooks Davis 				maxl--;
369*c1c95addSBrooks Davis 			iend = ++iendarg;
370*c1c95addSBrooks Davis 		}
371*c1c95addSBrooks Davis 	}
372*c1c95addSBrooks Davis 
373*c1c95addSBrooks Davis 	/* Post-process depending on the content of the argument. */
374*c1c95addSBrooks Davis 
375*c1c95addSBrooks Davis 	argl = iendarg - iarg;
376*c1c95addSBrooks Davis 	switch (buf[inam]) {
377*c1c95addSBrooks Davis 	case '*':
378*c1c95addSBrooks Davis 		if (resc == NULL && argl == 2 &&
379*c1c95addSBrooks Davis 		    buf[iarg] == '.' && buf[iarg + 1] == 'T')
380*c1c95addSBrooks Davis 			rval = ESCAPE_DEVICE;
381*c1c95addSBrooks Davis 		break;
382*c1c95addSBrooks Davis 	case 'A':
383*c1c95addSBrooks Davis 		if (valid_A == 0)
384*c1c95addSBrooks Davis 			iendarg = iarg;
385*c1c95addSBrooks Davis 		break;
386*c1c95addSBrooks Davis 	case 'O':
387*c1c95addSBrooks Davis 		switch (buf[iarg]) {
388*c1c95addSBrooks Davis 		case '0':
389*c1c95addSBrooks Davis 			rval = ESCAPE_UNSUPP;
390*c1c95addSBrooks Davis 			break;
391*c1c95addSBrooks Davis 		case '1':
392*c1c95addSBrooks Davis 		case '2':
393*c1c95addSBrooks Davis 		case '3':
394*c1c95addSBrooks Davis 		case '4':
395*c1c95addSBrooks Davis 			if (argl == 1)
396*c1c95addSBrooks Davis 				rval = ESCAPE_IGNORE;
397*c1c95addSBrooks Davis 			else {
398*c1c95addSBrooks Davis 				err = MANDOCERR_ESC_ARG;
399*c1c95addSBrooks Davis 				rval = ESCAPE_ERROR;
400*c1c95addSBrooks Davis 			}
401*c1c95addSBrooks Davis 			break;
402*c1c95addSBrooks Davis 		case '5':
403*c1c95addSBrooks Davis 			if (buf[iarg - 1] == '[')
404*c1c95addSBrooks Davis 				rval = ESCAPE_UNSUPP;
405*c1c95addSBrooks Davis 			else {
406*c1c95addSBrooks Davis 				err = MANDOCERR_ESC_ARG;
407*c1c95addSBrooks Davis 				rval = ESCAPE_ERROR;
408*c1c95addSBrooks Davis 			}
409*c1c95addSBrooks Davis 			break;
410*c1c95addSBrooks Davis 		default:
411*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_ARG;
412*c1c95addSBrooks Davis 			rval = ESCAPE_ERROR;
413*c1c95addSBrooks Davis 			break;
414*c1c95addSBrooks Davis 		}
415*c1c95addSBrooks Davis 		break;
416*c1c95addSBrooks Davis 	default:
417*c1c95addSBrooks Davis 		break;
418*c1c95addSBrooks Davis 	}
419*c1c95addSBrooks Davis 
420*c1c95addSBrooks Davis 	switch (rval) {
421*c1c95addSBrooks Davis 	case ESCAPE_FONT:
422*c1c95addSBrooks Davis 		rval = mandoc_font(buf + iarg, argl);
423*c1c95addSBrooks Davis 		if (rval == ESCAPE_ERROR)
424*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_ARG;
425*c1c95addSBrooks Davis 		break;
426*c1c95addSBrooks Davis 
427*c1c95addSBrooks Davis 	case ESCAPE_SPECIAL:
428*c1c95addSBrooks Davis 		if (argl == 0) {
429*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_BADCHAR;
430*c1c95addSBrooks Davis 			rval = ESCAPE_ERROR;
431*c1c95addSBrooks Davis 			break;
432*c1c95addSBrooks Davis 		}
433*c1c95addSBrooks Davis 
434*c1c95addSBrooks Davis 		/*
435*c1c95addSBrooks Davis 		 * The file chars.c only provides one common list of
436*c1c95addSBrooks Davis 		 * character names, but \[-] == \- is the only one of
437*c1c95addSBrooks Davis 		 * the characters with one-byte names that allows
438*c1c95addSBrooks Davis 		 * enclosing the name in brackets.
439*c1c95addSBrooks Davis 		 */
440*c1c95addSBrooks Davis 
441*c1c95addSBrooks Davis 		if (term != '\0' && argl == 1 && buf[iarg] != '-') {
442*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_BADCHAR;
443*c1c95addSBrooks Davis 			rval = ESCAPE_ERROR;
444*c1c95addSBrooks Davis 			break;
445*c1c95addSBrooks Davis 		}
446*c1c95addSBrooks Davis 
447*c1c95addSBrooks Davis 		/* Treat \[char...] as an alias for \N'...'. */
448*c1c95addSBrooks Davis 
449*c1c95addSBrooks Davis 		if (buf[iarg] == 'c') {
450*c1c95addSBrooks Davis 			if (argl < 6 || argl > 7 ||
451*c1c95addSBrooks Davis 			    strncmp(buf + iarg, "char", 4) != 0 ||
452*c1c95addSBrooks Davis 			    (int)strspn(buf + iarg + 4, "0123456789")
453*c1c95addSBrooks Davis 			     + 4 < argl)
454*c1c95addSBrooks Davis 				break;
455*c1c95addSBrooks Davis 			c = 0;
456*c1c95addSBrooks Davis 			for (i = iarg; i < iendarg; i++)
457*c1c95addSBrooks Davis 				c = 10 * c + (buf[i] - '0');
458*c1c95addSBrooks Davis 			if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) {
459*c1c95addSBrooks Davis 				err = MANDOCERR_ESC_BADCHAR;
460*c1c95addSBrooks Davis 				break;
461*c1c95addSBrooks Davis 			}
462*c1c95addSBrooks Davis 			iarg += 4;
463*c1c95addSBrooks Davis 			rval = ESCAPE_NUMBERED;
464*c1c95addSBrooks Davis 			break;
465*c1c95addSBrooks Davis 		}
466*c1c95addSBrooks Davis 
467*c1c95addSBrooks Davis 		/*
468*c1c95addSBrooks Davis 		 * Unicode escapes are defined in groff as \[u0000]
469*c1c95addSBrooks Davis 		 * to \[u10FFFF], where the contained value must be
470*c1c95addSBrooks Davis 		 * a valid Unicode codepoint.
471*c1c95addSBrooks Davis 		 */
472*c1c95addSBrooks Davis 
473*c1c95addSBrooks Davis 		if (buf[iarg] != 'u' || argl < 5 || argl > 7)
474*c1c95addSBrooks Davis 			break;
475*c1c95addSBrooks Davis 		if (argl == 7 &&  /* beyond the Unicode range */
476*c1c95addSBrooks Davis 		    (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) {
477*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_BADCHAR;
478*c1c95addSBrooks Davis 			break;
479*c1c95addSBrooks Davis 		}
480*c1c95addSBrooks Davis 		if (argl == 6 && buf[iarg + 1] == '0') {
481*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_BADCHAR;
482*c1c95addSBrooks Davis 			break;
483*c1c95addSBrooks Davis 		}
484*c1c95addSBrooks Davis 		if (argl == 5 &&  /* UTF-16 surrogate */
485*c1c95addSBrooks Davis 		    toupper((unsigned char)buf[iarg + 1]) == 'D' &&
486*c1c95addSBrooks Davis 		    strchr("89ABCDEFabcdef", buf[iarg + 2]) != NULL) {
487*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_BADCHAR;
488*c1c95addSBrooks Davis 			break;
489*c1c95addSBrooks Davis 		}
490*c1c95addSBrooks Davis 		if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
491*c1c95addSBrooks Davis 		    + 1 == argl)
492*c1c95addSBrooks Davis 			rval = ESCAPE_UNICODE;
493*c1c95addSBrooks Davis 		break;
494*c1c95addSBrooks Davis 	default:
495*c1c95addSBrooks Davis 		break;
496*c1c95addSBrooks Davis 	}
497*c1c95addSBrooks Davis 	goto out;
498*c1c95addSBrooks Davis 
499*c1c95addSBrooks Davis out_sub:
500*c1c95addSBrooks Davis 	iesc = sesc;
501*c1c95addSBrooks Davis 	inam = snam;
502*c1c95addSBrooks Davis 	iarg = sarg;
503*c1c95addSBrooks Davis 	iendarg = sendarg;
504*c1c95addSBrooks Davis 	iend = send;
505*c1c95addSBrooks Davis 	rval = ESCAPE_EXPAND;
506*c1c95addSBrooks Davis 
507*c1c95addSBrooks Davis out:
508*c1c95addSBrooks Davis 	if (resc != NULL)
509*c1c95addSBrooks Davis 		*resc = iesc;
510*c1c95addSBrooks Davis 	if (rnam != NULL)
511*c1c95addSBrooks Davis 		*rnam = inam;
512*c1c95addSBrooks Davis 	if (rarg != NULL)
513*c1c95addSBrooks Davis 		*rarg = iarg;
514*c1c95addSBrooks Davis 	if (rendarg != NULL)
515*c1c95addSBrooks Davis 		*rendarg = iendarg;
516*c1c95addSBrooks Davis 	if (rend != NULL)
517*c1c95addSBrooks Davis 		*rend = iend;
518*c1c95addSBrooks Davis 	if (ln == 0)
519*c1c95addSBrooks Davis 		return rval;
520*c1c95addSBrooks Davis 
521*c1c95addSBrooks Davis 	/*
522*c1c95addSBrooks Davis 	 * Diagnostic messages are only issued when called
523*c1c95addSBrooks Davis 	 * from the parser, not when called from the formatters.
524*c1c95addSBrooks Davis 	 */
525*c1c95addSBrooks Davis 
526*c1c95addSBrooks Davis 	switch (rval) {
527*c1c95addSBrooks Davis 	case ESCAPE_UNSUPP:
528*c1c95addSBrooks Davis 		err = MANDOCERR_ESC_UNSUPP;
529*c1c95addSBrooks Davis 		break;
530*c1c95addSBrooks Davis 	case ESCAPE_UNDEF:
531*c1c95addSBrooks Davis 		if (buf[inam] != '\\' && buf[inam] != '.')
532*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_UNDEF;
533*c1c95addSBrooks Davis 		break;
534*c1c95addSBrooks Davis 	case ESCAPE_SPECIAL:
535*c1c95addSBrooks Davis 		if (mchars_spec2cp(buf + iarg, argl) >= 0)
536*c1c95addSBrooks Davis 			err = MANDOCERR_OK;
537*c1c95addSBrooks Davis 		else if (err == MANDOCERR_OK)
538*c1c95addSBrooks Davis 			err = MANDOCERR_ESC_UNKCHAR;
539*c1c95addSBrooks Davis 		break;
540*c1c95addSBrooks Davis 	default:
541*c1c95addSBrooks Davis 		break;
542*c1c95addSBrooks Davis 	}
543*c1c95addSBrooks Davis 	if (err != MANDOCERR_OK)
544*c1c95addSBrooks Davis 		mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc);
545*c1c95addSBrooks Davis 	return rval;
546*c1c95addSBrooks Davis }
547