1*c1c95addSBrooks Davis /* $Id: roff_escape.c,v 1.15 2024/05/16 21:23:00 schwarze Exp $ */
2*c1c95addSBrooks Davis /*
3*c1c95addSBrooks Davis * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022
4*c1c95addSBrooks Davis * Ingo Schwarze <schwarze@openbsd.org>
5*c1c95addSBrooks Davis * Copyright (c) 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
6*c1c95addSBrooks Davis *
7*c1c95addSBrooks Davis * Permission to use, copy, modify, and distribute this software for any
8*c1c95addSBrooks Davis * purpose with or without fee is hereby granted, provided that the above
9*c1c95addSBrooks Davis * copyright notice and this permission notice appear in all copies.
10*c1c95addSBrooks Davis *
11*c1c95addSBrooks Davis * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12*c1c95addSBrooks Davis * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13*c1c95addSBrooks Davis * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14*c1c95addSBrooks Davis * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15*c1c95addSBrooks Davis * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16*c1c95addSBrooks Davis * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17*c1c95addSBrooks Davis * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18*c1c95addSBrooks Davis *
19*c1c95addSBrooks Davis * Parser for roff(7) escape sequences.
20*c1c95addSBrooks Davis * To be used by all mandoc(1) parsers and formatters.
21*c1c95addSBrooks Davis */
22*c1c95addSBrooks Davis #include <assert.h>
23*c1c95addSBrooks Davis #include <ctype.h>
24*c1c95addSBrooks Davis #include <limits.h>
25*c1c95addSBrooks Davis #include <stdio.h>
26*c1c95addSBrooks Davis #include <string.h>
27*c1c95addSBrooks Davis
28*c1c95addSBrooks Davis #include "mandoc.h"
29*c1c95addSBrooks Davis #include "roff.h"
30*c1c95addSBrooks Davis #include "roff_int.h"
31*c1c95addSBrooks Davis
32*c1c95addSBrooks Davis /*
33*c1c95addSBrooks Davis * Traditional escape sequence interpreter for general use
34*c1c95addSBrooks Davis * including in high-level formatters. This function does not issue
35*c1c95addSBrooks Davis * diagnostics and is not usable for expansion in the roff(7) parser.
36*c1c95addSBrooks Davis * It is documented in the mandoc_escape(3) manual page.
37*c1c95addSBrooks Davis */
38*c1c95addSBrooks Davis enum mandoc_esc
mandoc_escape(const char ** rendarg,const char ** rarg,int * rargl)39*c1c95addSBrooks Davis mandoc_escape(const char **rendarg, const char **rarg, int *rargl)
40*c1c95addSBrooks Davis {
41*c1c95addSBrooks Davis int iarg, iendarg, iend;
42*c1c95addSBrooks Davis enum mandoc_esc rval;
43*c1c95addSBrooks Davis
44*c1c95addSBrooks Davis rval = roff_escape(--*rendarg, 0, 0,
45*c1c95addSBrooks Davis NULL, NULL, &iarg, &iendarg, &iend);
46*c1c95addSBrooks Davis assert(rval != ESCAPE_EXPAND);
47*c1c95addSBrooks Davis if (rarg != NULL)
48*c1c95addSBrooks Davis *rarg = *rendarg + iarg;
49*c1c95addSBrooks Davis if (rargl != NULL)
50*c1c95addSBrooks Davis *rargl = iendarg - iarg;
51*c1c95addSBrooks Davis *rendarg += iend;
52*c1c95addSBrooks Davis return rval;
53*c1c95addSBrooks Davis }
54*c1c95addSBrooks Davis
55*c1c95addSBrooks Davis /*
56*c1c95addSBrooks Davis * Full-featured escape sequence parser.
57*c1c95addSBrooks Davis * If it encounters a nested escape sequence that requires expansion
58*c1c95addSBrooks Davis * by the parser and re-parsing, the positions of that inner escape
59*c1c95addSBrooks Davis * sequence are returned in *resc ... *rend.
60*c1c95addSBrooks Davis * Otherwise, *resc is set to aesc and the positions of the escape
61*c1c95addSBrooks Davis * sequence starting at aesc are returned.
62*c1c95addSBrooks Davis * Diagnostic messages are generated if and only if ln != 0,
63*c1c95addSBrooks Davis * that is, if and only if called by roff_expand().
64*c1c95addSBrooks Davis */
65*c1c95addSBrooks Davis enum mandoc_esc
roff_escape(const char * buf,const int ln,const int aesc,int * resc,int * rnam,int * rarg,int * rendarg,int * rend)66*c1c95addSBrooks Davis roff_escape(const char *buf, const int ln, const int aesc,
67*c1c95addSBrooks Davis int *resc, int *rnam, int *rarg, int *rendarg, int *rend)
68*c1c95addSBrooks Davis {
69*c1c95addSBrooks Davis int iesc; /* index of leading escape char */
70*c1c95addSBrooks Davis int inam; /* index of escape name */
71*c1c95addSBrooks Davis int iarg; /* index beginning the argument */
72*c1c95addSBrooks Davis int iendarg; /* index right after the argument */
73*c1c95addSBrooks Davis int iend; /* index right after the sequence */
74*c1c95addSBrooks Davis int sesc, snam, sarg, sendarg, send; /* for sub-escape */
75*c1c95addSBrooks Davis int escterm; /* whether term is escaped */
76*c1c95addSBrooks Davis int maxl; /* expected length of the argument */
77*c1c95addSBrooks Davis int argl; /* actual length of the argument */
78*c1c95addSBrooks Davis int c, i; /* for \[char...] parsing */
79*c1c95addSBrooks Davis int valid_A; /* for \A parsing */
80*c1c95addSBrooks Davis enum mandoc_esc rval; /* return value */
81*c1c95addSBrooks Davis enum mandoc_esc stype; /* for sub-escape */
82*c1c95addSBrooks Davis enum mandocerr err; /* diagnostic code */
83*c1c95addSBrooks Davis char term; /* byte terminating the argument */
84*c1c95addSBrooks Davis
85*c1c95addSBrooks Davis /*
86*c1c95addSBrooks Davis * Treat "\E" just like "\";
87*c1c95addSBrooks Davis * it only makes a difference in copy mode.
88*c1c95addSBrooks Davis */
89*c1c95addSBrooks Davis
90*c1c95addSBrooks Davis iesc = inam = aesc;
91*c1c95addSBrooks Davis do {
92*c1c95addSBrooks Davis inam++;
93*c1c95addSBrooks Davis } while (buf[inam] == 'E');
94*c1c95addSBrooks Davis
95*c1c95addSBrooks Davis /*
96*c1c95addSBrooks Davis * Sort the following cases first by syntax category,
97*c1c95addSBrooks Davis * then by escape sequence type, and finally by ASCII code.
98*c1c95addSBrooks Davis */
99*c1c95addSBrooks Davis
100*c1c95addSBrooks Davis iarg = iendarg = iend = inam + 1;
101*c1c95addSBrooks Davis maxl = INT_MAX;
102*c1c95addSBrooks Davis term = '\0';
103*c1c95addSBrooks Davis err = MANDOCERR_OK;
104*c1c95addSBrooks Davis switch (buf[inam]) {
105*c1c95addSBrooks Davis
106*c1c95addSBrooks Davis /* Escape sequences taking no arguments at all. */
107*c1c95addSBrooks Davis
108*c1c95addSBrooks Davis case '!':
109*c1c95addSBrooks Davis case '?':
110*c1c95addSBrooks Davis case 'r':
111*c1c95addSBrooks Davis rval = ESCAPE_UNSUPP;
112*c1c95addSBrooks Davis goto out;
113*c1c95addSBrooks Davis
114*c1c95addSBrooks Davis case '%':
115*c1c95addSBrooks Davis case '&':
116*c1c95addSBrooks Davis case ')':
117*c1c95addSBrooks Davis case ',':
118*c1c95addSBrooks Davis case '/':
119*c1c95addSBrooks Davis case '^':
120*c1c95addSBrooks Davis case 'a':
121*c1c95addSBrooks Davis case 'd':
122*c1c95addSBrooks Davis case 't':
123*c1c95addSBrooks Davis case 'u':
124*c1c95addSBrooks Davis case '{':
125*c1c95addSBrooks Davis case '|':
126*c1c95addSBrooks Davis case '}':
127*c1c95addSBrooks Davis rval = ESCAPE_IGNORE;
128*c1c95addSBrooks Davis goto out;
129*c1c95addSBrooks Davis
130*c1c95addSBrooks Davis case '\0':
131*c1c95addSBrooks Davis iendarg = --iend;
132*c1c95addSBrooks Davis /* FALLTHROUGH */
133*c1c95addSBrooks Davis case '.':
134*c1c95addSBrooks Davis case '\\':
135*c1c95addSBrooks Davis default:
136*c1c95addSBrooks Davis iarg--;
137*c1c95addSBrooks Davis rval = ESCAPE_UNDEF;
138*c1c95addSBrooks Davis goto out;
139*c1c95addSBrooks Davis
140*c1c95addSBrooks Davis case ' ':
141*c1c95addSBrooks Davis case '\'':
142*c1c95addSBrooks Davis case '-':
143*c1c95addSBrooks Davis case '0':
144*c1c95addSBrooks Davis case ':':
145*c1c95addSBrooks Davis case '_':
146*c1c95addSBrooks Davis case '`':
147*c1c95addSBrooks Davis case 'e':
148*c1c95addSBrooks Davis case '~':
149*c1c95addSBrooks Davis iarg--;
150*c1c95addSBrooks Davis argl = 1;
151*c1c95addSBrooks Davis rval = ESCAPE_SPECIAL;
152*c1c95addSBrooks Davis goto out;
153*c1c95addSBrooks Davis case 'p':
154*c1c95addSBrooks Davis rval = ESCAPE_BREAK;
155*c1c95addSBrooks Davis goto out;
156*c1c95addSBrooks Davis case 'c':
157*c1c95addSBrooks Davis rval = ESCAPE_NOSPACE;
158*c1c95addSBrooks Davis goto out;
159*c1c95addSBrooks Davis case 'z':
160*c1c95addSBrooks Davis rval = ESCAPE_SKIPCHAR;
161*c1c95addSBrooks Davis goto out;
162*c1c95addSBrooks Davis
163*c1c95addSBrooks Davis /* Standard argument format. */
164*c1c95addSBrooks Davis
165*c1c95addSBrooks Davis case '$':
166*c1c95addSBrooks Davis case '*':
167*c1c95addSBrooks Davis case 'V':
168*c1c95addSBrooks Davis case 'g':
169*c1c95addSBrooks Davis case 'n':
170*c1c95addSBrooks Davis rval = ESCAPE_EXPAND;
171*c1c95addSBrooks Davis break;
172*c1c95addSBrooks Davis case 'F':
173*c1c95addSBrooks Davis case 'M':
174*c1c95addSBrooks Davis case 'O':
175*c1c95addSBrooks Davis case 'Y':
176*c1c95addSBrooks Davis case 'k':
177*c1c95addSBrooks Davis case 'm':
178*c1c95addSBrooks Davis rval = ESCAPE_IGNORE;
179*c1c95addSBrooks Davis break;
180*c1c95addSBrooks Davis case '(':
181*c1c95addSBrooks Davis case '[':
182*c1c95addSBrooks Davis rval = ESCAPE_SPECIAL;
183*c1c95addSBrooks Davis iendarg = iend = --iarg;
184*c1c95addSBrooks Davis break;
185*c1c95addSBrooks Davis case 'f':
186*c1c95addSBrooks Davis rval = ESCAPE_FONT;
187*c1c95addSBrooks Davis break;
188*c1c95addSBrooks Davis
189*c1c95addSBrooks Davis /* Quoted arguments */
190*c1c95addSBrooks Davis
191*c1c95addSBrooks Davis case 'A':
192*c1c95addSBrooks Davis case 'B':
193*c1c95addSBrooks Davis case 'w':
194*c1c95addSBrooks Davis rval = ESCAPE_EXPAND;
195*c1c95addSBrooks Davis term = '\b';
196*c1c95addSBrooks Davis break;
197*c1c95addSBrooks Davis case 'D':
198*c1c95addSBrooks Davis case 'H':
199*c1c95addSBrooks Davis case 'L':
200*c1c95addSBrooks Davis case 'R':
201*c1c95addSBrooks Davis case 'S':
202*c1c95addSBrooks Davis case 'X':
203*c1c95addSBrooks Davis case 'Z':
204*c1c95addSBrooks Davis case 'b':
205*c1c95addSBrooks Davis case 'v':
206*c1c95addSBrooks Davis case 'x':
207*c1c95addSBrooks Davis rval = ESCAPE_IGNORE;
208*c1c95addSBrooks Davis term = '\b';
209*c1c95addSBrooks Davis break;
210*c1c95addSBrooks Davis case 'C':
211*c1c95addSBrooks Davis rval = ESCAPE_SPECIAL;
212*c1c95addSBrooks Davis term = '\b';
213*c1c95addSBrooks Davis break;
214*c1c95addSBrooks Davis case 'N':
215*c1c95addSBrooks Davis rval = ESCAPE_NUMBERED;
216*c1c95addSBrooks Davis term = '\b';
217*c1c95addSBrooks Davis break;
218*c1c95addSBrooks Davis case 'h':
219*c1c95addSBrooks Davis rval = ESCAPE_HORIZ;
220*c1c95addSBrooks Davis term = '\b';
221*c1c95addSBrooks Davis break;
222*c1c95addSBrooks Davis case 'l':
223*c1c95addSBrooks Davis rval = ESCAPE_HLINE;
224*c1c95addSBrooks Davis term = '\b';
225*c1c95addSBrooks Davis break;
226*c1c95addSBrooks Davis case 'o':
227*c1c95addSBrooks Davis rval = ESCAPE_OVERSTRIKE;
228*c1c95addSBrooks Davis term = '\b';
229*c1c95addSBrooks Davis break;
230*c1c95addSBrooks Davis
231*c1c95addSBrooks Davis /* Sizes support both forms, with additional peculiarities. */
232*c1c95addSBrooks Davis
233*c1c95addSBrooks Davis case 's':
234*c1c95addSBrooks Davis rval = ESCAPE_IGNORE;
235*c1c95addSBrooks Davis if (buf[iarg] == '+' || buf[iarg] == '-'||
236*c1c95addSBrooks Davis buf[iarg] == ASCII_HYPH)
237*c1c95addSBrooks Davis iarg++;
238*c1c95addSBrooks Davis switch (buf[iarg]) {
239*c1c95addSBrooks Davis case '(':
240*c1c95addSBrooks Davis maxl = 2;
241*c1c95addSBrooks Davis iarg++;
242*c1c95addSBrooks Davis break;
243*c1c95addSBrooks Davis case '[':
244*c1c95addSBrooks Davis term = ']';
245*c1c95addSBrooks Davis iarg++;
246*c1c95addSBrooks Davis break;
247*c1c95addSBrooks Davis case '\'':
248*c1c95addSBrooks Davis term = '\'';
249*c1c95addSBrooks Davis iarg++;
250*c1c95addSBrooks Davis break;
251*c1c95addSBrooks Davis case '1':
252*c1c95addSBrooks Davis case '2':
253*c1c95addSBrooks Davis case '3':
254*c1c95addSBrooks Davis if (buf[iarg - 1] == 's' &&
255*c1c95addSBrooks Davis isdigit((unsigned char)buf[iarg + 1])) {
256*c1c95addSBrooks Davis maxl = 2;
257*c1c95addSBrooks Davis break;
258*c1c95addSBrooks Davis }
259*c1c95addSBrooks Davis /* FALLTHROUGH */
260*c1c95addSBrooks Davis default:
261*c1c95addSBrooks Davis maxl = 1;
262*c1c95addSBrooks Davis break;
263*c1c95addSBrooks Davis }
264*c1c95addSBrooks Davis iendarg = iend = iarg;
265*c1c95addSBrooks Davis }
266*c1c95addSBrooks Davis
267*c1c95addSBrooks Davis /* Decide how to end the argument. */
268*c1c95addSBrooks Davis
269*c1c95addSBrooks Davis escterm = 0;
270*c1c95addSBrooks Davis stype = ESCAPE_EXPAND;
271*c1c95addSBrooks Davis if ((term == '\b' || (term == '\0' && maxl == INT_MAX)) &&
272*c1c95addSBrooks Davis buf[iarg] == buf[iesc]) {
273*c1c95addSBrooks Davis stype = roff_escape(buf, ln, iendarg,
274*c1c95addSBrooks Davis &sesc, &snam, &sarg, &sendarg, &send);
275*c1c95addSBrooks Davis if (stype == ESCAPE_EXPAND)
276*c1c95addSBrooks Davis goto out_sub;
277*c1c95addSBrooks Davis }
278*c1c95addSBrooks Davis
279*c1c95addSBrooks Davis if (term == '\b') {
280*c1c95addSBrooks Davis if (stype == ESCAPE_UNDEF)
281*c1c95addSBrooks Davis iarg++;
282*c1c95addSBrooks Davis if (stype != ESCAPE_EXPAND && stype != ESCAPE_UNDEF) {
283*c1c95addSBrooks Davis if (strchr("BHLRSNhlvx", buf[inam]) != NULL &&
284*c1c95addSBrooks Davis strchr(" ,.0DLOXYZ^abdhlortuvx|~",
285*c1c95addSBrooks Davis buf[snam]) != NULL) {
286*c1c95addSBrooks Davis err = MANDOCERR_ESC_DELIM;
287*c1c95addSBrooks Davis iend = send;
288*c1c95addSBrooks Davis iarg = iendarg = sesc;
289*c1c95addSBrooks Davis goto out;
290*c1c95addSBrooks Davis }
291*c1c95addSBrooks Davis escterm = 1;
292*c1c95addSBrooks Davis iarg = send;
293*c1c95addSBrooks Davis term = buf[snam];
294*c1c95addSBrooks Davis } else if (strchr("BDHLRSvxNhl", buf[inam]) != NULL &&
295*c1c95addSBrooks Davis strchr(" %&()*+-./0123456789:<=>", buf[iarg]) != NULL) {
296*c1c95addSBrooks Davis err = MANDOCERR_ESC_DELIM;
297*c1c95addSBrooks Davis if (rval != ESCAPE_EXPAND)
298*c1c95addSBrooks Davis rval = ESCAPE_ERROR;
299*c1c95addSBrooks Davis if (buf[inam] != 'D') {
300*c1c95addSBrooks Davis iendarg = iend = iarg + 1;
301*c1c95addSBrooks Davis goto out;
302*c1c95addSBrooks Davis }
303*c1c95addSBrooks Davis }
304*c1c95addSBrooks Davis if (term == '\b')
305*c1c95addSBrooks Davis term = buf[iarg++];
306*c1c95addSBrooks Davis } else if (term == '\0' && maxl == INT_MAX) {
307*c1c95addSBrooks Davis if (buf[inam] == 'n' && (buf[iarg] == '+' || buf[iarg] == '-'))
308*c1c95addSBrooks Davis iarg++;
309*c1c95addSBrooks Davis switch (buf[iarg]) {
310*c1c95addSBrooks Davis case '(':
311*c1c95addSBrooks Davis maxl = 2;
312*c1c95addSBrooks Davis iarg++;
313*c1c95addSBrooks Davis break;
314*c1c95addSBrooks Davis case '[':
315*c1c95addSBrooks Davis if (buf[++iarg] == ' ') {
316*c1c95addSBrooks Davis iendarg = iend = iarg + 1;
317*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG;
318*c1c95addSBrooks Davis rval = ESCAPE_ERROR;
319*c1c95addSBrooks Davis goto out;
320*c1c95addSBrooks Davis }
321*c1c95addSBrooks Davis term = ']';
322*c1c95addSBrooks Davis break;
323*c1c95addSBrooks Davis default:
324*c1c95addSBrooks Davis maxl = 1;
325*c1c95addSBrooks Davis break;
326*c1c95addSBrooks Davis }
327*c1c95addSBrooks Davis }
328*c1c95addSBrooks Davis
329*c1c95addSBrooks Davis /* Advance to the end of the argument. */
330*c1c95addSBrooks Davis
331*c1c95addSBrooks Davis valid_A = 1;
332*c1c95addSBrooks Davis iendarg = iarg;
333*c1c95addSBrooks Davis while (maxl > 0) {
334*c1c95addSBrooks Davis if (buf[iendarg] == '\0') {
335*c1c95addSBrooks Davis err = MANDOCERR_ESC_INCOMPLETE;
336*c1c95addSBrooks Davis if (rval != ESCAPE_EXPAND &&
337*c1c95addSBrooks Davis rval != ESCAPE_OVERSTRIKE)
338*c1c95addSBrooks Davis rval = ESCAPE_ERROR;
339*c1c95addSBrooks Davis /* Usually, ignore an incomplete argument. */
340*c1c95addSBrooks Davis if (strchr("Aow", buf[inam]) == NULL)
341*c1c95addSBrooks Davis iendarg = iarg;
342*c1c95addSBrooks Davis break;
343*c1c95addSBrooks Davis }
344*c1c95addSBrooks Davis if (escterm == 0 && buf[iendarg] == term) {
345*c1c95addSBrooks Davis iend = iendarg + 1;
346*c1c95addSBrooks Davis break;
347*c1c95addSBrooks Davis }
348*c1c95addSBrooks Davis if (buf[iendarg] == buf[iesc]) {
349*c1c95addSBrooks Davis stype = roff_escape(buf, ln, iendarg,
350*c1c95addSBrooks Davis &sesc, &snam, &sarg, &sendarg, &send);
351*c1c95addSBrooks Davis if (stype == ESCAPE_EXPAND)
352*c1c95addSBrooks Davis goto out_sub;
353*c1c95addSBrooks Davis iend = send;
354*c1c95addSBrooks Davis if (escterm == 1 &&
355*c1c95addSBrooks Davis (buf[snam] == term || buf[inam] == 'N'))
356*c1c95addSBrooks Davis break;
357*c1c95addSBrooks Davis if (stype != ESCAPE_UNDEF)
358*c1c95addSBrooks Davis valid_A = 0;
359*c1c95addSBrooks Davis iendarg = send;
360*c1c95addSBrooks Davis } else if (buf[inam] == 'N' &&
361*c1c95addSBrooks Davis isdigit((unsigned char)buf[iendarg]) == 0) {
362*c1c95addSBrooks Davis iend = iendarg + 1;
363*c1c95addSBrooks Davis break;
364*c1c95addSBrooks Davis } else {
365*c1c95addSBrooks Davis if (buf[iendarg] == ' ' || buf[iendarg] == '\t')
366*c1c95addSBrooks Davis valid_A = 0;
367*c1c95addSBrooks Davis if (maxl != INT_MAX)
368*c1c95addSBrooks Davis maxl--;
369*c1c95addSBrooks Davis iend = ++iendarg;
370*c1c95addSBrooks Davis }
371*c1c95addSBrooks Davis }
372*c1c95addSBrooks Davis
373*c1c95addSBrooks Davis /* Post-process depending on the content of the argument. */
374*c1c95addSBrooks Davis
375*c1c95addSBrooks Davis argl = iendarg - iarg;
376*c1c95addSBrooks Davis switch (buf[inam]) {
377*c1c95addSBrooks Davis case '*':
378*c1c95addSBrooks Davis if (resc == NULL && argl == 2 &&
379*c1c95addSBrooks Davis buf[iarg] == '.' && buf[iarg + 1] == 'T')
380*c1c95addSBrooks Davis rval = ESCAPE_DEVICE;
381*c1c95addSBrooks Davis break;
382*c1c95addSBrooks Davis case 'A':
383*c1c95addSBrooks Davis if (valid_A == 0)
384*c1c95addSBrooks Davis iendarg = iarg;
385*c1c95addSBrooks Davis break;
386*c1c95addSBrooks Davis case 'O':
387*c1c95addSBrooks Davis switch (buf[iarg]) {
388*c1c95addSBrooks Davis case '0':
389*c1c95addSBrooks Davis rval = ESCAPE_UNSUPP;
390*c1c95addSBrooks Davis break;
391*c1c95addSBrooks Davis case '1':
392*c1c95addSBrooks Davis case '2':
393*c1c95addSBrooks Davis case '3':
394*c1c95addSBrooks Davis case '4':
395*c1c95addSBrooks Davis if (argl == 1)
396*c1c95addSBrooks Davis rval = ESCAPE_IGNORE;
397*c1c95addSBrooks Davis else {
398*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG;
399*c1c95addSBrooks Davis rval = ESCAPE_ERROR;
400*c1c95addSBrooks Davis }
401*c1c95addSBrooks Davis break;
402*c1c95addSBrooks Davis case '5':
403*c1c95addSBrooks Davis if (buf[iarg - 1] == '[')
404*c1c95addSBrooks Davis rval = ESCAPE_UNSUPP;
405*c1c95addSBrooks Davis else {
406*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG;
407*c1c95addSBrooks Davis rval = ESCAPE_ERROR;
408*c1c95addSBrooks Davis }
409*c1c95addSBrooks Davis break;
410*c1c95addSBrooks Davis default:
411*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG;
412*c1c95addSBrooks Davis rval = ESCAPE_ERROR;
413*c1c95addSBrooks Davis break;
414*c1c95addSBrooks Davis }
415*c1c95addSBrooks Davis break;
416*c1c95addSBrooks Davis default:
417*c1c95addSBrooks Davis break;
418*c1c95addSBrooks Davis }
419*c1c95addSBrooks Davis
420*c1c95addSBrooks Davis switch (rval) {
421*c1c95addSBrooks Davis case ESCAPE_FONT:
422*c1c95addSBrooks Davis rval = mandoc_font(buf + iarg, argl);
423*c1c95addSBrooks Davis if (rval == ESCAPE_ERROR)
424*c1c95addSBrooks Davis err = MANDOCERR_ESC_ARG;
425*c1c95addSBrooks Davis break;
426*c1c95addSBrooks Davis
427*c1c95addSBrooks Davis case ESCAPE_SPECIAL:
428*c1c95addSBrooks Davis if (argl == 0) {
429*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR;
430*c1c95addSBrooks Davis rval = ESCAPE_ERROR;
431*c1c95addSBrooks Davis break;
432*c1c95addSBrooks Davis }
433*c1c95addSBrooks Davis
434*c1c95addSBrooks Davis /*
435*c1c95addSBrooks Davis * The file chars.c only provides one common list of
436*c1c95addSBrooks Davis * character names, but \[-] == \- is the only one of
437*c1c95addSBrooks Davis * the characters with one-byte names that allows
438*c1c95addSBrooks Davis * enclosing the name in brackets.
439*c1c95addSBrooks Davis */
440*c1c95addSBrooks Davis
441*c1c95addSBrooks Davis if (term != '\0' && argl == 1 && buf[iarg] != '-') {
442*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR;
443*c1c95addSBrooks Davis rval = ESCAPE_ERROR;
444*c1c95addSBrooks Davis break;
445*c1c95addSBrooks Davis }
446*c1c95addSBrooks Davis
447*c1c95addSBrooks Davis /* Treat \[char...] as an alias for \N'...'. */
448*c1c95addSBrooks Davis
449*c1c95addSBrooks Davis if (buf[iarg] == 'c') {
450*c1c95addSBrooks Davis if (argl < 6 || argl > 7 ||
451*c1c95addSBrooks Davis strncmp(buf + iarg, "char", 4) != 0 ||
452*c1c95addSBrooks Davis (int)strspn(buf + iarg + 4, "0123456789")
453*c1c95addSBrooks Davis + 4 < argl)
454*c1c95addSBrooks Davis break;
455*c1c95addSBrooks Davis c = 0;
456*c1c95addSBrooks Davis for (i = iarg; i < iendarg; i++)
457*c1c95addSBrooks Davis c = 10 * c + (buf[i] - '0');
458*c1c95addSBrooks Davis if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) {
459*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR;
460*c1c95addSBrooks Davis break;
461*c1c95addSBrooks Davis }
462*c1c95addSBrooks Davis iarg += 4;
463*c1c95addSBrooks Davis rval = ESCAPE_NUMBERED;
464*c1c95addSBrooks Davis break;
465*c1c95addSBrooks Davis }
466*c1c95addSBrooks Davis
467*c1c95addSBrooks Davis /*
468*c1c95addSBrooks Davis * Unicode escapes are defined in groff as \[u0000]
469*c1c95addSBrooks Davis * to \[u10FFFF], where the contained value must be
470*c1c95addSBrooks Davis * a valid Unicode codepoint.
471*c1c95addSBrooks Davis */
472*c1c95addSBrooks Davis
473*c1c95addSBrooks Davis if (buf[iarg] != 'u' || argl < 5 || argl > 7)
474*c1c95addSBrooks Davis break;
475*c1c95addSBrooks Davis if (argl == 7 && /* beyond the Unicode range */
476*c1c95addSBrooks Davis (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) {
477*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR;
478*c1c95addSBrooks Davis break;
479*c1c95addSBrooks Davis }
480*c1c95addSBrooks Davis if (argl == 6 && buf[iarg + 1] == '0') {
481*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR;
482*c1c95addSBrooks Davis break;
483*c1c95addSBrooks Davis }
484*c1c95addSBrooks Davis if (argl == 5 && /* UTF-16 surrogate */
485*c1c95addSBrooks Davis toupper((unsigned char)buf[iarg + 1]) == 'D' &&
486*c1c95addSBrooks Davis strchr("89ABCDEFabcdef", buf[iarg + 2]) != NULL) {
487*c1c95addSBrooks Davis err = MANDOCERR_ESC_BADCHAR;
488*c1c95addSBrooks Davis break;
489*c1c95addSBrooks Davis }
490*c1c95addSBrooks Davis if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef")
491*c1c95addSBrooks Davis + 1 == argl)
492*c1c95addSBrooks Davis rval = ESCAPE_UNICODE;
493*c1c95addSBrooks Davis break;
494*c1c95addSBrooks Davis default:
495*c1c95addSBrooks Davis break;
496*c1c95addSBrooks Davis }
497*c1c95addSBrooks Davis goto out;
498*c1c95addSBrooks Davis
499*c1c95addSBrooks Davis out_sub:
500*c1c95addSBrooks Davis iesc = sesc;
501*c1c95addSBrooks Davis inam = snam;
502*c1c95addSBrooks Davis iarg = sarg;
503*c1c95addSBrooks Davis iendarg = sendarg;
504*c1c95addSBrooks Davis iend = send;
505*c1c95addSBrooks Davis rval = ESCAPE_EXPAND;
506*c1c95addSBrooks Davis
507*c1c95addSBrooks Davis out:
508*c1c95addSBrooks Davis if (resc != NULL)
509*c1c95addSBrooks Davis *resc = iesc;
510*c1c95addSBrooks Davis if (rnam != NULL)
511*c1c95addSBrooks Davis *rnam = inam;
512*c1c95addSBrooks Davis if (rarg != NULL)
513*c1c95addSBrooks Davis *rarg = iarg;
514*c1c95addSBrooks Davis if (rendarg != NULL)
515*c1c95addSBrooks Davis *rendarg = iendarg;
516*c1c95addSBrooks Davis if (rend != NULL)
517*c1c95addSBrooks Davis *rend = iend;
518*c1c95addSBrooks Davis if (ln == 0)
519*c1c95addSBrooks Davis return rval;
520*c1c95addSBrooks Davis
521*c1c95addSBrooks Davis /*
522*c1c95addSBrooks Davis * Diagnostic messages are only issued when called
523*c1c95addSBrooks Davis * from the parser, not when called from the formatters.
524*c1c95addSBrooks Davis */
525*c1c95addSBrooks Davis
526*c1c95addSBrooks Davis switch (rval) {
527*c1c95addSBrooks Davis case ESCAPE_UNSUPP:
528*c1c95addSBrooks Davis err = MANDOCERR_ESC_UNSUPP;
529*c1c95addSBrooks Davis break;
530*c1c95addSBrooks Davis case ESCAPE_UNDEF:
531*c1c95addSBrooks Davis if (buf[inam] != '\\' && buf[inam] != '.')
532*c1c95addSBrooks Davis err = MANDOCERR_ESC_UNDEF;
533*c1c95addSBrooks Davis break;
534*c1c95addSBrooks Davis case ESCAPE_SPECIAL:
535*c1c95addSBrooks Davis if (mchars_spec2cp(buf + iarg, argl) >= 0)
536*c1c95addSBrooks Davis err = MANDOCERR_OK;
537*c1c95addSBrooks Davis else if (err == MANDOCERR_OK)
538*c1c95addSBrooks Davis err = MANDOCERR_ESC_UNKCHAR;
539*c1c95addSBrooks Davis break;
540*c1c95addSBrooks Davis default:
541*c1c95addSBrooks Davis break;
542*c1c95addSBrooks Davis }
543*c1c95addSBrooks Davis if (err != MANDOCERR_OK)
544*c1c95addSBrooks Davis mandoc_msg(err, ln, iesc, "%.*s", iend - iesc, buf + iesc);
545*c1c95addSBrooks Davis return rval;
546*c1c95addSBrooks Davis }
547