1*ffb8ebfaSGarrett D'Amore /* $Id: mandoc.c,v 1.74 2013/12/30 18:30:32 schwarze Exp $ */
232a712daSGarrett D'Amore /*
332a712daSGarrett D'Amore * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*ffb8ebfaSGarrett D'Amore * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
532a712daSGarrett D'Amore *
632a712daSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
732a712daSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
832a712daSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
932a712daSGarrett D'Amore *
1032a712daSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1132a712daSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1232a712daSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1332a712daSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1432a712daSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1532a712daSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1632a712daSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1732a712daSGarrett D'Amore */
1832a712daSGarrett D'Amore #ifdef HAVE_CONFIG_H
1932a712daSGarrett D'Amore #include "config.h"
2032a712daSGarrett D'Amore #endif
2132a712daSGarrett D'Amore
2232a712daSGarrett D'Amore #include <sys/types.h>
2332a712daSGarrett D'Amore
2432a712daSGarrett D'Amore #include <assert.h>
2532a712daSGarrett D'Amore #include <ctype.h>
2632a712daSGarrett D'Amore #include <errno.h>
2732a712daSGarrett D'Amore #include <limits.h>
2832a712daSGarrett D'Amore #include <stdlib.h>
2932a712daSGarrett D'Amore #include <stdio.h>
3032a712daSGarrett D'Amore #include <string.h>
3132a712daSGarrett D'Amore #include <time.h>
3232a712daSGarrett D'Amore
3332a712daSGarrett D'Amore #include "mandoc.h"
3432a712daSGarrett D'Amore #include "libmandoc.h"
3532a712daSGarrett D'Amore
3632a712daSGarrett D'Amore #define DATESIZE 32
3732a712daSGarrett D'Amore
3832a712daSGarrett D'Amore static int a2time(time_t *, const char *, const char *);
3932a712daSGarrett D'Amore static char *time2a(time_t);
4032a712daSGarrett D'Amore
4132a712daSGarrett D'Amore
4232a712daSGarrett D'Amore enum mandoc_esc
mandoc_escape(const char ** end,const char ** start,int * sz)4332a712daSGarrett D'Amore mandoc_escape(const char **end, const char **start, int *sz)
4432a712daSGarrett D'Amore {
45*ffb8ebfaSGarrett D'Amore const char *local_start;
46*ffb8ebfaSGarrett D'Amore int local_sz;
47*ffb8ebfaSGarrett D'Amore char term;
4832a712daSGarrett D'Amore enum mandoc_esc gly;
4932a712daSGarrett D'Amore
50*ffb8ebfaSGarrett D'Amore /*
51*ffb8ebfaSGarrett D'Amore * When the caller doesn't provide return storage,
52*ffb8ebfaSGarrett D'Amore * use local storage.
53*ffb8ebfaSGarrett D'Amore */
5432a712daSGarrett D'Amore
55*ffb8ebfaSGarrett D'Amore if (NULL == start)
56*ffb8ebfaSGarrett D'Amore start = &local_start;
57*ffb8ebfaSGarrett D'Amore if (NULL == sz)
58*ffb8ebfaSGarrett D'Amore sz = &local_sz;
59*ffb8ebfaSGarrett D'Amore
60*ffb8ebfaSGarrett D'Amore /*
61*ffb8ebfaSGarrett D'Amore * Beyond the backslash, at least one input character
62*ffb8ebfaSGarrett D'Amore * is part of the escape sequence. With one exception
63*ffb8ebfaSGarrett D'Amore * (see below), that character won't be returned.
64*ffb8ebfaSGarrett D'Amore */
65*ffb8ebfaSGarrett D'Amore
66*ffb8ebfaSGarrett D'Amore gly = ESCAPE_ERROR;
67*ffb8ebfaSGarrett D'Amore *start = ++*end;
68*ffb8ebfaSGarrett D'Amore *sz = 0;
69*ffb8ebfaSGarrett D'Amore term = '\0';
70*ffb8ebfaSGarrett D'Amore
71*ffb8ebfaSGarrett D'Amore switch ((*start)[-1]) {
7232a712daSGarrett D'Amore /*
7332a712daSGarrett D'Amore * First the glyphs. There are several different forms of
7432a712daSGarrett D'Amore * these, but each eventually returns a substring of the glyph
7532a712daSGarrett D'Amore * name.
7632a712daSGarrett D'Amore */
7732a712daSGarrett D'Amore case ('('):
7832a712daSGarrett D'Amore gly = ESCAPE_SPECIAL;
79*ffb8ebfaSGarrett D'Amore *sz = 2;
8032a712daSGarrett D'Amore break;
8132a712daSGarrett D'Amore case ('['):
8232a712daSGarrett D'Amore gly = ESCAPE_SPECIAL;
8332a712daSGarrett D'Amore /*
8432a712daSGarrett D'Amore * Unicode escapes are defined in groff as \[uXXXX] to
8532a712daSGarrett D'Amore * \[u10FFFF], where the contained value must be a valid
8632a712daSGarrett D'Amore * Unicode codepoint. Here, however, only check whether
8732a712daSGarrett D'Amore * it's not a zero-width escape.
8832a712daSGarrett D'Amore */
89*ffb8ebfaSGarrett D'Amore if ('u' == (*start)[0] && ']' != (*start)[1])
9032a712daSGarrett D'Amore gly = ESCAPE_UNICODE;
9132a712daSGarrett D'Amore term = ']';
9232a712daSGarrett D'Amore break;
9332a712daSGarrett D'Amore case ('C'):
94*ffb8ebfaSGarrett D'Amore if ('\'' != **start)
9532a712daSGarrett D'Amore return(ESCAPE_ERROR);
96*ffb8ebfaSGarrett D'Amore *start = ++*end;
97*ffb8ebfaSGarrett D'Amore if ('u' == (*start)[0] && '\'' != (*start)[1])
98*ffb8ebfaSGarrett D'Amore gly = ESCAPE_UNICODE;
99*ffb8ebfaSGarrett D'Amore else
10032a712daSGarrett D'Amore gly = ESCAPE_SPECIAL;
10132a712daSGarrett D'Amore term = '\'';
10232a712daSGarrett D'Amore break;
10332a712daSGarrett D'Amore
10432a712daSGarrett D'Amore /*
105*ffb8ebfaSGarrett D'Amore * Escapes taking no arguments at all.
106*ffb8ebfaSGarrett D'Amore */
107*ffb8ebfaSGarrett D'Amore case ('d'):
108*ffb8ebfaSGarrett D'Amore /* FALLTHROUGH */
109*ffb8ebfaSGarrett D'Amore case ('u'):
110*ffb8ebfaSGarrett D'Amore return(ESCAPE_IGNORE);
111*ffb8ebfaSGarrett D'Amore
112*ffb8ebfaSGarrett D'Amore /*
113*ffb8ebfaSGarrett D'Amore * The \z escape is supposed to output the following
114*ffb8ebfaSGarrett D'Amore * character without advancing the cursor position.
115*ffb8ebfaSGarrett D'Amore * Since we are mostly dealing with terminal mode,
116*ffb8ebfaSGarrett D'Amore * let us just skip the next character.
117*ffb8ebfaSGarrett D'Amore */
118*ffb8ebfaSGarrett D'Amore case ('z'):
119*ffb8ebfaSGarrett D'Amore return(ESCAPE_SKIPCHAR);
120*ffb8ebfaSGarrett D'Amore
121*ffb8ebfaSGarrett D'Amore /*
12232a712daSGarrett D'Amore * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where
12332a712daSGarrett D'Amore * 'X' is the trigger. These have opaque sub-strings.
12432a712daSGarrett D'Amore */
12532a712daSGarrett D'Amore case ('F'):
12632a712daSGarrett D'Amore /* FALLTHROUGH */
12732a712daSGarrett D'Amore case ('g'):
12832a712daSGarrett D'Amore /* FALLTHROUGH */
12932a712daSGarrett D'Amore case ('k'):
13032a712daSGarrett D'Amore /* FALLTHROUGH */
13132a712daSGarrett D'Amore case ('M'):
13232a712daSGarrett D'Amore /* FALLTHROUGH */
13332a712daSGarrett D'Amore case ('m'):
13432a712daSGarrett D'Amore /* FALLTHROUGH */
13532a712daSGarrett D'Amore case ('n'):
13632a712daSGarrett D'Amore /* FALLTHROUGH */
13732a712daSGarrett D'Amore case ('V'):
13832a712daSGarrett D'Amore /* FALLTHROUGH */
13932a712daSGarrett D'Amore case ('Y'):
14032a712daSGarrett D'Amore gly = ESCAPE_IGNORE;
14132a712daSGarrett D'Amore /* FALLTHROUGH */
14232a712daSGarrett D'Amore case ('f'):
14332a712daSGarrett D'Amore if (ESCAPE_ERROR == gly)
14432a712daSGarrett D'Amore gly = ESCAPE_FONT;
145*ffb8ebfaSGarrett D'Amore switch (**start) {
14632a712daSGarrett D'Amore case ('('):
147*ffb8ebfaSGarrett D'Amore *start = ++*end;
148*ffb8ebfaSGarrett D'Amore *sz = 2;
14932a712daSGarrett D'Amore break;
15032a712daSGarrett D'Amore case ('['):
151*ffb8ebfaSGarrett D'Amore *start = ++*end;
15232a712daSGarrett D'Amore term = ']';
15332a712daSGarrett D'Amore break;
15432a712daSGarrett D'Amore default:
155*ffb8ebfaSGarrett D'Amore *sz = 1;
15632a712daSGarrett D'Amore break;
15732a712daSGarrett D'Amore }
15832a712daSGarrett D'Amore break;
15932a712daSGarrett D'Amore
16032a712daSGarrett D'Amore /*
16132a712daSGarrett D'Amore * These escapes are of the form \X'Y', where 'X' is the trigger
16232a712daSGarrett D'Amore * and 'Y' is any string. These have opaque sub-strings.
16332a712daSGarrett D'Amore */
16432a712daSGarrett D'Amore case ('A'):
16532a712daSGarrett D'Amore /* FALLTHROUGH */
16632a712daSGarrett D'Amore case ('b'):
16732a712daSGarrett D'Amore /* FALLTHROUGH */
168*ffb8ebfaSGarrett D'Amore case ('B'):
169*ffb8ebfaSGarrett D'Amore /* FALLTHROUGH */
17032a712daSGarrett D'Amore case ('D'):
17132a712daSGarrett D'Amore /* FALLTHROUGH */
17232a712daSGarrett D'Amore case ('o'):
17332a712daSGarrett D'Amore /* FALLTHROUGH */
17432a712daSGarrett D'Amore case ('R'):
17532a712daSGarrett D'Amore /* FALLTHROUGH */
176*ffb8ebfaSGarrett D'Amore case ('w'):
177*ffb8ebfaSGarrett D'Amore /* FALLTHROUGH */
17832a712daSGarrett D'Amore case ('X'):
17932a712daSGarrett D'Amore /* FALLTHROUGH */
18032a712daSGarrett D'Amore case ('Z'):
181*ffb8ebfaSGarrett D'Amore if ('\'' != **start)
18232a712daSGarrett D'Amore return(ESCAPE_ERROR);
18332a712daSGarrett D'Amore gly = ESCAPE_IGNORE;
184*ffb8ebfaSGarrett D'Amore *start = ++*end;
18532a712daSGarrett D'Amore term = '\'';
18632a712daSGarrett D'Amore break;
18732a712daSGarrett D'Amore
18832a712daSGarrett D'Amore /*
18932a712daSGarrett D'Amore * These escapes are of the form \X'N', where 'X' is the trigger
19032a712daSGarrett D'Amore * and 'N' resolves to a numerical expression.
19132a712daSGarrett D'Amore */
19232a712daSGarrett D'Amore case ('h'):
19332a712daSGarrett D'Amore /* FALLTHROUGH */
19432a712daSGarrett D'Amore case ('H'):
19532a712daSGarrett D'Amore /* FALLTHROUGH */
19632a712daSGarrett D'Amore case ('L'):
19732a712daSGarrett D'Amore /* FALLTHROUGH */
19832a712daSGarrett D'Amore case ('l'):
19932a712daSGarrett D'Amore /* FALLTHROUGH */
20032a712daSGarrett D'Amore case ('S'):
20132a712daSGarrett D'Amore /* FALLTHROUGH */
20232a712daSGarrett D'Amore case ('v'):
20332a712daSGarrett D'Amore /* FALLTHROUGH */
20432a712daSGarrett D'Amore case ('x'):
205*ffb8ebfaSGarrett D'Amore if ('\'' != **start)
20632a712daSGarrett D'Amore return(ESCAPE_ERROR);
207*ffb8ebfaSGarrett D'Amore gly = ESCAPE_IGNORE;
208*ffb8ebfaSGarrett D'Amore *start = ++*end;
209*ffb8ebfaSGarrett D'Amore term = '\'';
21032a712daSGarrett D'Amore break;
21132a712daSGarrett D'Amore
21232a712daSGarrett D'Amore /*
21332a712daSGarrett D'Amore * Special handling for the numbered character escape.
21432a712daSGarrett D'Amore * XXX Do any other escapes need similar handling?
21532a712daSGarrett D'Amore */
21632a712daSGarrett D'Amore case ('N'):
217*ffb8ebfaSGarrett D'Amore if ('\0' == **start)
21832a712daSGarrett D'Amore return(ESCAPE_ERROR);
219*ffb8ebfaSGarrett D'Amore (*end)++;
220*ffb8ebfaSGarrett D'Amore if (isdigit((unsigned char)**start)) {
221*ffb8ebfaSGarrett D'Amore *sz = 1;
22232a712daSGarrett D'Amore return(ESCAPE_IGNORE);
223*ffb8ebfaSGarrett D'Amore }
224*ffb8ebfaSGarrett D'Amore (*start)++;
22532a712daSGarrett D'Amore while (isdigit((unsigned char)**end))
22632a712daSGarrett D'Amore (*end)++;
227*ffb8ebfaSGarrett D'Amore *sz = *end - *start;
22832a712daSGarrett D'Amore if ('\0' != **end)
22932a712daSGarrett D'Amore (*end)++;
23032a712daSGarrett D'Amore return(ESCAPE_NUMBERED);
23132a712daSGarrett D'Amore
23232a712daSGarrett D'Amore /*
23332a712daSGarrett D'Amore * Sizes get a special category of their own.
23432a712daSGarrett D'Amore */
23532a712daSGarrett D'Amore case ('s'):
23632a712daSGarrett D'Amore gly = ESCAPE_IGNORE;
23732a712daSGarrett D'Amore
23832a712daSGarrett D'Amore /* See +/- counts as a sign. */
239*ffb8ebfaSGarrett D'Amore if ('+' == **end || '-' == **end || ASCII_HYPH == **end)
240*ffb8ebfaSGarrett D'Amore (*end)++;
24132a712daSGarrett D'Amore
242*ffb8ebfaSGarrett D'Amore switch (**end) {
24332a712daSGarrett D'Amore case ('('):
244*ffb8ebfaSGarrett D'Amore *start = ++*end;
245*ffb8ebfaSGarrett D'Amore *sz = 2;
24632a712daSGarrett D'Amore break;
24732a712daSGarrett D'Amore case ('['):
248*ffb8ebfaSGarrett D'Amore *start = ++*end;
249*ffb8ebfaSGarrett D'Amore term = ']';
25032a712daSGarrett D'Amore break;
25132a712daSGarrett D'Amore case ('\''):
252*ffb8ebfaSGarrett D'Amore *start = ++*end;
253*ffb8ebfaSGarrett D'Amore term = '\'';
25432a712daSGarrett D'Amore break;
25532a712daSGarrett D'Amore default:
256*ffb8ebfaSGarrett D'Amore *sz = 1;
25732a712daSGarrett D'Amore break;
25832a712daSGarrett D'Amore }
25932a712daSGarrett D'Amore
26032a712daSGarrett D'Amore break;
26132a712daSGarrett D'Amore
26232a712daSGarrett D'Amore /*
26332a712daSGarrett D'Amore * Anything else is assumed to be a glyph.
264*ffb8ebfaSGarrett D'Amore * In this case, pass back the character after the backslash.
26532a712daSGarrett D'Amore */
26632a712daSGarrett D'Amore default:
26732a712daSGarrett D'Amore gly = ESCAPE_SPECIAL;
268*ffb8ebfaSGarrett D'Amore *start = --*end;
269*ffb8ebfaSGarrett D'Amore *sz = 1;
27032a712daSGarrett D'Amore break;
27132a712daSGarrett D'Amore }
27232a712daSGarrett D'Amore
27332a712daSGarrett D'Amore assert(ESCAPE_ERROR != gly);
27432a712daSGarrett D'Amore
27532a712daSGarrett D'Amore /*
276*ffb8ebfaSGarrett D'Amore * Read up to the terminating character,
277*ffb8ebfaSGarrett D'Amore * paying attention to nested escapes.
27832a712daSGarrett D'Amore */
27932a712daSGarrett D'Amore
28032a712daSGarrett D'Amore if ('\0' != term) {
281*ffb8ebfaSGarrett D'Amore while (**end != term) {
282*ffb8ebfaSGarrett D'Amore switch (**end) {
283*ffb8ebfaSGarrett D'Amore case ('\0'):
28432a712daSGarrett D'Amore return(ESCAPE_ERROR);
285*ffb8ebfaSGarrett D'Amore case ('\\'):
28632a712daSGarrett D'Amore (*end)++;
287*ffb8ebfaSGarrett D'Amore if (ESCAPE_ERROR ==
288*ffb8ebfaSGarrett D'Amore mandoc_escape(end, NULL, NULL))
28932a712daSGarrett D'Amore return(ESCAPE_ERROR);
290*ffb8ebfaSGarrett D'Amore break;
291*ffb8ebfaSGarrett D'Amore default:
292*ffb8ebfaSGarrett D'Amore (*end)++;
293*ffb8ebfaSGarrett D'Amore break;
294*ffb8ebfaSGarrett D'Amore }
295*ffb8ebfaSGarrett D'Amore }
296*ffb8ebfaSGarrett D'Amore *sz = (*end)++ - *start;
297*ffb8ebfaSGarrett D'Amore } else {
298*ffb8ebfaSGarrett D'Amore assert(*sz > 0);
299*ffb8ebfaSGarrett D'Amore if ((size_t)*sz > strlen(*start))
300*ffb8ebfaSGarrett D'Amore return(ESCAPE_ERROR);
301*ffb8ebfaSGarrett D'Amore *end += *sz;
302*ffb8ebfaSGarrett D'Amore }
30332a712daSGarrett D'Amore
30432a712daSGarrett D'Amore /* Run post-processors. */
30532a712daSGarrett D'Amore
30632a712daSGarrett D'Amore switch (gly) {
30732a712daSGarrett D'Amore case (ESCAPE_FONT):
308*ffb8ebfaSGarrett D'Amore if (2 == *sz) {
309*ffb8ebfaSGarrett D'Amore if ('C' == **start) {
31032a712daSGarrett D'Amore /*
311*ffb8ebfaSGarrett D'Amore * Treat constant-width font modes
312*ffb8ebfaSGarrett D'Amore * just like regular font modes.
31332a712daSGarrett D'Amore */
314*ffb8ebfaSGarrett D'Amore (*start)++;
315*ffb8ebfaSGarrett D'Amore (*sz)--;
316*ffb8ebfaSGarrett D'Amore } else {
317*ffb8ebfaSGarrett D'Amore if ('B' == (*start)[0] && 'I' == (*start)[1])
318*ffb8ebfaSGarrett D'Amore gly = ESCAPE_FONTBI;
319*ffb8ebfaSGarrett D'Amore break;
320*ffb8ebfaSGarrett D'Amore }
321*ffb8ebfaSGarrett D'Amore } else if (1 != *sz)
32232a712daSGarrett D'Amore break;
32332a712daSGarrett D'Amore
324*ffb8ebfaSGarrett D'Amore switch (**start) {
32532a712daSGarrett D'Amore case ('3'):
32632a712daSGarrett D'Amore /* FALLTHROUGH */
32732a712daSGarrett D'Amore case ('B'):
32832a712daSGarrett D'Amore gly = ESCAPE_FONTBOLD;
32932a712daSGarrett D'Amore break;
33032a712daSGarrett D'Amore case ('2'):
33132a712daSGarrett D'Amore /* FALLTHROUGH */
33232a712daSGarrett D'Amore case ('I'):
33332a712daSGarrett D'Amore gly = ESCAPE_FONTITALIC;
33432a712daSGarrett D'Amore break;
33532a712daSGarrett D'Amore case ('P'):
33632a712daSGarrett D'Amore gly = ESCAPE_FONTPREV;
33732a712daSGarrett D'Amore break;
33832a712daSGarrett D'Amore case ('1'):
33932a712daSGarrett D'Amore /* FALLTHROUGH */
34032a712daSGarrett D'Amore case ('R'):
34132a712daSGarrett D'Amore gly = ESCAPE_FONTROMAN;
34232a712daSGarrett D'Amore break;
34332a712daSGarrett D'Amore }
34432a712daSGarrett D'Amore break;
34532a712daSGarrett D'Amore case (ESCAPE_SPECIAL):
346*ffb8ebfaSGarrett D'Amore if (1 == *sz && 'c' == **start)
34732a712daSGarrett D'Amore gly = ESCAPE_NOSPACE;
34832a712daSGarrett D'Amore break;
34932a712daSGarrett D'Amore default:
35032a712daSGarrett D'Amore break;
35132a712daSGarrett D'Amore }
35232a712daSGarrett D'Amore
35332a712daSGarrett D'Amore return(gly);
35432a712daSGarrett D'Amore }
35532a712daSGarrett D'Amore
35632a712daSGarrett D'Amore void *
mandoc_calloc(size_t num,size_t size)35732a712daSGarrett D'Amore mandoc_calloc(size_t num, size_t size)
35832a712daSGarrett D'Amore {
35932a712daSGarrett D'Amore void *ptr;
36032a712daSGarrett D'Amore
36132a712daSGarrett D'Amore ptr = calloc(num, size);
36232a712daSGarrett D'Amore if (NULL == ptr) {
36332a712daSGarrett D'Amore perror(NULL);
36432a712daSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR);
36532a712daSGarrett D'Amore }
36632a712daSGarrett D'Amore
36732a712daSGarrett D'Amore return(ptr);
36832a712daSGarrett D'Amore }
36932a712daSGarrett D'Amore
37032a712daSGarrett D'Amore
37132a712daSGarrett D'Amore void *
mandoc_malloc(size_t size)37232a712daSGarrett D'Amore mandoc_malloc(size_t size)
37332a712daSGarrett D'Amore {
37432a712daSGarrett D'Amore void *ptr;
37532a712daSGarrett D'Amore
37632a712daSGarrett D'Amore ptr = malloc(size);
37732a712daSGarrett D'Amore if (NULL == ptr) {
37832a712daSGarrett D'Amore perror(NULL);
37932a712daSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR);
38032a712daSGarrett D'Amore }
38132a712daSGarrett D'Amore
38232a712daSGarrett D'Amore return(ptr);
38332a712daSGarrett D'Amore }
38432a712daSGarrett D'Amore
38532a712daSGarrett D'Amore
38632a712daSGarrett D'Amore void *
mandoc_realloc(void * ptr,size_t size)38732a712daSGarrett D'Amore mandoc_realloc(void *ptr, size_t size)
38832a712daSGarrett D'Amore {
38932a712daSGarrett D'Amore
39032a712daSGarrett D'Amore ptr = realloc(ptr, size);
39132a712daSGarrett D'Amore if (NULL == ptr) {
39232a712daSGarrett D'Amore perror(NULL);
39332a712daSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR);
39432a712daSGarrett D'Amore }
39532a712daSGarrett D'Amore
39632a712daSGarrett D'Amore return(ptr);
39732a712daSGarrett D'Amore }
39832a712daSGarrett D'Amore
39932a712daSGarrett D'Amore char *
mandoc_strndup(const char * ptr,size_t sz)40032a712daSGarrett D'Amore mandoc_strndup(const char *ptr, size_t sz)
40132a712daSGarrett D'Amore {
40232a712daSGarrett D'Amore char *p;
40332a712daSGarrett D'Amore
40432a712daSGarrett D'Amore p = mandoc_malloc(sz + 1);
40532a712daSGarrett D'Amore memcpy(p, ptr, sz);
40632a712daSGarrett D'Amore p[(int)sz] = '\0';
40732a712daSGarrett D'Amore return(p);
40832a712daSGarrett D'Amore }
40932a712daSGarrett D'Amore
41032a712daSGarrett D'Amore char *
mandoc_strdup(const char * ptr)41132a712daSGarrett D'Amore mandoc_strdup(const char *ptr)
41232a712daSGarrett D'Amore {
41332a712daSGarrett D'Amore char *p;
41432a712daSGarrett D'Amore
41532a712daSGarrett D'Amore p = strdup(ptr);
41632a712daSGarrett D'Amore if (NULL == p) {
41732a712daSGarrett D'Amore perror(NULL);
41832a712daSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR);
41932a712daSGarrett D'Amore }
42032a712daSGarrett D'Amore
42132a712daSGarrett D'Amore return(p);
42232a712daSGarrett D'Amore }
42332a712daSGarrett D'Amore
42432a712daSGarrett D'Amore /*
42532a712daSGarrett D'Amore * Parse a quoted or unquoted roff-style request or macro argument.
42632a712daSGarrett D'Amore * Return a pointer to the parsed argument, which is either the original
42732a712daSGarrett D'Amore * pointer or advanced by one byte in case the argument is quoted.
428*ffb8ebfaSGarrett D'Amore * NUL-terminate the argument in place.
42932a712daSGarrett D'Amore * Collapse pairs of quotes inside quoted arguments.
43032a712daSGarrett D'Amore * Advance the argument pointer to the next argument,
431*ffb8ebfaSGarrett D'Amore * or to the NUL byte terminating the argument line.
43232a712daSGarrett D'Amore */
43332a712daSGarrett D'Amore char *
mandoc_getarg(struct mparse * parse,char ** cpp,int ln,int * pos)43432a712daSGarrett D'Amore mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos)
43532a712daSGarrett D'Amore {
43632a712daSGarrett D'Amore char *start, *cp;
43732a712daSGarrett D'Amore int quoted, pairs, white;
43832a712daSGarrett D'Amore
43932a712daSGarrett D'Amore /* Quoting can only start with a new word. */
44032a712daSGarrett D'Amore start = *cpp;
44132a712daSGarrett D'Amore quoted = 0;
44232a712daSGarrett D'Amore if ('"' == *start) {
44332a712daSGarrett D'Amore quoted = 1;
44432a712daSGarrett D'Amore start++;
44532a712daSGarrett D'Amore }
44632a712daSGarrett D'Amore
44732a712daSGarrett D'Amore pairs = 0;
44832a712daSGarrett D'Amore white = 0;
44932a712daSGarrett D'Amore for (cp = start; '\0' != *cp; cp++) {
450*ffb8ebfaSGarrett D'Amore
451*ffb8ebfaSGarrett D'Amore /*
452*ffb8ebfaSGarrett D'Amore * Move the following text left
453*ffb8ebfaSGarrett D'Amore * after quoted quotes and after "\\" and "\t".
454*ffb8ebfaSGarrett D'Amore */
45532a712daSGarrett D'Amore if (pairs)
45632a712daSGarrett D'Amore cp[-pairs] = cp[0];
457*ffb8ebfaSGarrett D'Amore
45832a712daSGarrett D'Amore if ('\\' == cp[0]) {
459*ffb8ebfaSGarrett D'Amore /*
460*ffb8ebfaSGarrett D'Amore * In copy mode, translate double to single
461*ffb8ebfaSGarrett D'Amore * backslashes and backslash-t to literal tabs.
462*ffb8ebfaSGarrett D'Amore */
463*ffb8ebfaSGarrett D'Amore switch (cp[1]) {
464*ffb8ebfaSGarrett D'Amore case ('t'):
465*ffb8ebfaSGarrett D'Amore cp[0] = '\t';
466*ffb8ebfaSGarrett D'Amore /* FALLTHROUGH */
467*ffb8ebfaSGarrett D'Amore case ('\\'):
46832a712daSGarrett D'Amore pairs++;
46932a712daSGarrett D'Amore cp++;
470*ffb8ebfaSGarrett D'Amore break;
471*ffb8ebfaSGarrett D'Amore case (' '):
47232a712daSGarrett D'Amore /* Skip escaped blanks. */
473*ffb8ebfaSGarrett D'Amore if (0 == quoted)
47432a712daSGarrett D'Amore cp++;
475*ffb8ebfaSGarrett D'Amore break;
476*ffb8ebfaSGarrett D'Amore default:
477*ffb8ebfaSGarrett D'Amore break;
478*ffb8ebfaSGarrett D'Amore }
47932a712daSGarrett D'Amore } else if (0 == quoted) {
48032a712daSGarrett D'Amore if (' ' == cp[0]) {
48132a712daSGarrett D'Amore /* Unescaped blanks end unquoted args. */
48232a712daSGarrett D'Amore white = 1;
48332a712daSGarrett D'Amore break;
48432a712daSGarrett D'Amore }
48532a712daSGarrett D'Amore } else if ('"' == cp[0]) {
48632a712daSGarrett D'Amore if ('"' == cp[1]) {
48732a712daSGarrett D'Amore /* Quoted quotes collapse. */
48832a712daSGarrett D'Amore pairs++;
48932a712daSGarrett D'Amore cp++;
49032a712daSGarrett D'Amore } else {
49132a712daSGarrett D'Amore /* Unquoted quotes end quoted args. */
49232a712daSGarrett D'Amore quoted = 2;
49332a712daSGarrett D'Amore break;
49432a712daSGarrett D'Amore }
49532a712daSGarrett D'Amore }
49632a712daSGarrett D'Amore }
49732a712daSGarrett D'Amore
49832a712daSGarrett D'Amore /* Quoted argument without a closing quote. */
49932a712daSGarrett D'Amore if (1 == quoted)
50032a712daSGarrett D'Amore mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL);
50132a712daSGarrett D'Amore
502*ffb8ebfaSGarrett D'Amore /* NUL-terminate this argument and move to the next one. */
50332a712daSGarrett D'Amore if (pairs)
50432a712daSGarrett D'Amore cp[-pairs] = '\0';
50532a712daSGarrett D'Amore if ('\0' != *cp) {
50632a712daSGarrett D'Amore *cp++ = '\0';
50732a712daSGarrett D'Amore while (' ' == *cp)
50832a712daSGarrett D'Amore cp++;
50932a712daSGarrett D'Amore }
51032a712daSGarrett D'Amore *pos += (int)(cp - start) + (quoted ? 1 : 0);
51132a712daSGarrett D'Amore *cpp = cp;
51232a712daSGarrett D'Amore
51332a712daSGarrett D'Amore if ('\0' == *cp && (white || ' ' == cp[-1]))
51432a712daSGarrett D'Amore mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL);
51532a712daSGarrett D'Amore
51632a712daSGarrett D'Amore return(start);
51732a712daSGarrett D'Amore }
51832a712daSGarrett D'Amore
51932a712daSGarrett D'Amore static int
a2time(time_t * t,const char * fmt,const char * p)52032a712daSGarrett D'Amore a2time(time_t *t, const char *fmt, const char *p)
52132a712daSGarrett D'Amore {
52232a712daSGarrett D'Amore struct tm tm;
52332a712daSGarrett D'Amore char *pp;
52432a712daSGarrett D'Amore
52532a712daSGarrett D'Amore memset(&tm, 0, sizeof(struct tm));
52632a712daSGarrett D'Amore
52732a712daSGarrett D'Amore pp = NULL;
52832a712daSGarrett D'Amore #ifdef HAVE_STRPTIME
52932a712daSGarrett D'Amore pp = strptime(p, fmt, &tm);
53032a712daSGarrett D'Amore #endif
53132a712daSGarrett D'Amore if (NULL != pp && '\0' == *pp) {
53232a712daSGarrett D'Amore *t = mktime(&tm);
53332a712daSGarrett D'Amore return(1);
53432a712daSGarrett D'Amore }
53532a712daSGarrett D'Amore
53632a712daSGarrett D'Amore return(0);
53732a712daSGarrett D'Amore }
53832a712daSGarrett D'Amore
53932a712daSGarrett D'Amore static char *
time2a(time_t t)54032a712daSGarrett D'Amore time2a(time_t t)
54132a712daSGarrett D'Amore {
54232a712daSGarrett D'Amore struct tm *tm;
54332a712daSGarrett D'Amore char *buf, *p;
54432a712daSGarrett D'Amore size_t ssz;
54532a712daSGarrett D'Amore int isz;
54632a712daSGarrett D'Amore
54732a712daSGarrett D'Amore tm = localtime(&t);
54832a712daSGarrett D'Amore
54932a712daSGarrett D'Amore /*
55032a712daSGarrett D'Amore * Reserve space:
55132a712daSGarrett D'Amore * up to 9 characters for the month (September) + blank
55232a712daSGarrett D'Amore * up to 2 characters for the day + comma + blank
55332a712daSGarrett D'Amore * 4 characters for the year and a terminating '\0'
55432a712daSGarrett D'Amore */
55532a712daSGarrett D'Amore p = buf = mandoc_malloc(10 + 4 + 4 + 1);
55632a712daSGarrett D'Amore
55732a712daSGarrett D'Amore if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm)))
55832a712daSGarrett D'Amore goto fail;
55932a712daSGarrett D'Amore p += (int)ssz;
56032a712daSGarrett D'Amore
56132a712daSGarrett D'Amore if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)))
56232a712daSGarrett D'Amore goto fail;
56332a712daSGarrett D'Amore p += isz;
56432a712daSGarrett D'Amore
56532a712daSGarrett D'Amore if (0 == strftime(p, 4 + 1, "%Y", tm))
56632a712daSGarrett D'Amore goto fail;
56732a712daSGarrett D'Amore return(buf);
56832a712daSGarrett D'Amore
56932a712daSGarrett D'Amore fail:
57032a712daSGarrett D'Amore free(buf);
57132a712daSGarrett D'Amore return(NULL);
57232a712daSGarrett D'Amore }
57332a712daSGarrett D'Amore
57432a712daSGarrett D'Amore char *
mandoc_normdate(struct mparse * parse,char * in,int ln,int pos)57532a712daSGarrett D'Amore mandoc_normdate(struct mparse *parse, char *in, int ln, int pos)
57632a712daSGarrett D'Amore {
57732a712daSGarrett D'Amore char *out;
57832a712daSGarrett D'Amore time_t t;
57932a712daSGarrett D'Amore
58032a712daSGarrett D'Amore if (NULL == in || '\0' == *in ||
58132a712daSGarrett D'Amore 0 == strcmp(in, "$" "Mdocdate$")) {
58232a712daSGarrett D'Amore mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL);
58332a712daSGarrett D'Amore time(&t);
58432a712daSGarrett D'Amore }
58532a712daSGarrett D'Amore else if (a2time(&t, "%Y-%m-%d", in))
58632a712daSGarrett D'Amore t = 0;
58732a712daSGarrett D'Amore else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) &&
58832a712daSGarrett D'Amore !a2time(&t, "%b %d, %Y", in)) {
58932a712daSGarrett D'Amore mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL);
59032a712daSGarrett D'Amore t = 0;
59132a712daSGarrett D'Amore }
59232a712daSGarrett D'Amore out = t ? time2a(t) : NULL;
59332a712daSGarrett D'Amore return(out ? out : mandoc_strdup(in));
59432a712daSGarrett D'Amore }
59532a712daSGarrett D'Amore
59632a712daSGarrett D'Amore int
mandoc_eos(const char * p,size_t sz,int enclosed)59732a712daSGarrett D'Amore mandoc_eos(const char *p, size_t sz, int enclosed)
59832a712daSGarrett D'Amore {
59932a712daSGarrett D'Amore const char *q;
60032a712daSGarrett D'Amore int found;
60132a712daSGarrett D'Amore
60232a712daSGarrett D'Amore if (0 == sz)
60332a712daSGarrett D'Amore return(0);
60432a712daSGarrett D'Amore
60532a712daSGarrett D'Amore /*
60632a712daSGarrett D'Amore * End-of-sentence recognition must include situations where
60732a712daSGarrett D'Amore * some symbols, such as `)', allow prior EOS punctuation to
60832a712daSGarrett D'Amore * propagate outward.
60932a712daSGarrett D'Amore */
61032a712daSGarrett D'Amore
61132a712daSGarrett D'Amore found = 0;
61232a712daSGarrett D'Amore for (q = p + (int)sz - 1; q >= p; q--) {
61332a712daSGarrett D'Amore switch (*q) {
61432a712daSGarrett D'Amore case ('\"'):
61532a712daSGarrett D'Amore /* FALLTHROUGH */
61632a712daSGarrett D'Amore case ('\''):
61732a712daSGarrett D'Amore /* FALLTHROUGH */
61832a712daSGarrett D'Amore case (']'):
61932a712daSGarrett D'Amore /* FALLTHROUGH */
62032a712daSGarrett D'Amore case (')'):
62132a712daSGarrett D'Amore if (0 == found)
62232a712daSGarrett D'Amore enclosed = 1;
62332a712daSGarrett D'Amore break;
62432a712daSGarrett D'Amore case ('.'):
62532a712daSGarrett D'Amore /* FALLTHROUGH */
62632a712daSGarrett D'Amore case ('!'):
62732a712daSGarrett D'Amore /* FALLTHROUGH */
62832a712daSGarrett D'Amore case ('?'):
62932a712daSGarrett D'Amore found = 1;
63032a712daSGarrett D'Amore break;
63132a712daSGarrett D'Amore default:
63232a712daSGarrett D'Amore return(found && (!enclosed || isalnum((unsigned char)*q)));
63332a712daSGarrett D'Amore }
63432a712daSGarrett D'Amore }
63532a712daSGarrett D'Amore
63632a712daSGarrett D'Amore return(found && !enclosed);
63732a712daSGarrett D'Amore }
63832a712daSGarrett D'Amore
63932a712daSGarrett D'Amore /*
64032a712daSGarrett D'Amore * Convert a string to a long that may not be <0.
64132a712daSGarrett D'Amore * If the string is invalid, or is less than 0, return -1.
64232a712daSGarrett D'Amore */
64332a712daSGarrett D'Amore int
mandoc_strntoi(const char * p,size_t sz,int base)64432a712daSGarrett D'Amore mandoc_strntoi(const char *p, size_t sz, int base)
64532a712daSGarrett D'Amore {
64632a712daSGarrett D'Amore char buf[32];
64732a712daSGarrett D'Amore char *ep;
64832a712daSGarrett D'Amore long v;
64932a712daSGarrett D'Amore
65032a712daSGarrett D'Amore if (sz > 31)
65132a712daSGarrett D'Amore return(-1);
65232a712daSGarrett D'Amore
65332a712daSGarrett D'Amore memcpy(buf, p, sz);
65432a712daSGarrett D'Amore buf[(int)sz] = '\0';
65532a712daSGarrett D'Amore
65632a712daSGarrett D'Amore errno = 0;
65732a712daSGarrett D'Amore v = strtol(buf, &ep, base);
65832a712daSGarrett D'Amore
65932a712daSGarrett D'Amore if (buf[0] == '\0' || *ep != '\0')
66032a712daSGarrett D'Amore return(-1);
66132a712daSGarrett D'Amore
66232a712daSGarrett D'Amore if (v > INT_MAX)
66332a712daSGarrett D'Amore v = INT_MAX;
66432a712daSGarrett D'Amore if (v < INT_MIN)
66532a712daSGarrett D'Amore v = INT_MIN;
66632a712daSGarrett D'Amore
66732a712daSGarrett D'Amore return((int)v);
66832a712daSGarrett D'Amore }
669