1*95c635efSGarrett D'Amore /* $Id: mandoc.c,v 1.62 2011/12/03 16:08:51 schwarze Exp $ */ 2*95c635efSGarrett D'Amore /* 3*95c635efSGarrett D'Amore * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*95c635efSGarrett D'Amore * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5*95c635efSGarrett D'Amore * 6*95c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 7*95c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 8*95c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 9*95c635efSGarrett D'Amore * 10*95c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11*95c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12*95c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13*95c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14*95c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15*95c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16*95c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17*95c635efSGarrett D'Amore */ 18*95c635efSGarrett D'Amore #ifdef HAVE_CONFIG_H 19*95c635efSGarrett D'Amore #include "config.h" 20*95c635efSGarrett D'Amore #endif 21*95c635efSGarrett D'Amore 22*95c635efSGarrett D'Amore #include <sys/types.h> 23*95c635efSGarrett D'Amore 24*95c635efSGarrett D'Amore #include <assert.h> 25*95c635efSGarrett D'Amore #include <ctype.h> 26*95c635efSGarrett D'Amore #include <errno.h> 27*95c635efSGarrett D'Amore #include <limits.h> 28*95c635efSGarrett D'Amore #include <stdlib.h> 29*95c635efSGarrett D'Amore #include <stdio.h> 30*95c635efSGarrett D'Amore #include <string.h> 31*95c635efSGarrett D'Amore #include <time.h> 32*95c635efSGarrett D'Amore 33*95c635efSGarrett D'Amore #include "mandoc.h" 34*95c635efSGarrett D'Amore #include "libmandoc.h" 35*95c635efSGarrett D'Amore 36*95c635efSGarrett D'Amore #define DATESIZE 32 37*95c635efSGarrett D'Amore 38*95c635efSGarrett D'Amore static int a2time(time_t *, const char *, const char *); 39*95c635efSGarrett D'Amore static char *time2a(time_t); 40*95c635efSGarrett D'Amore static int numescape(const char *); 41*95c635efSGarrett D'Amore 42*95c635efSGarrett D'Amore /* 43*95c635efSGarrett D'Amore * Pass over recursive numerical expressions. This context of this 44*95c635efSGarrett D'Amore * function is important: it's only called within character-terminating 45*95c635efSGarrett D'Amore * escapes (e.g., \s[xxxyyy]), so all we need to do is handle initial 46*95c635efSGarrett D'Amore * recursion: we don't care about what's in these blocks. 47*95c635efSGarrett D'Amore * This returns the number of characters skipped or -1 if an error 48*95c635efSGarrett D'Amore * occurs (the caller should bail). 49*95c635efSGarrett D'Amore */ 50*95c635efSGarrett D'Amore static int 51*95c635efSGarrett D'Amore numescape(const char *start) 52*95c635efSGarrett D'Amore { 53*95c635efSGarrett D'Amore int i; 54*95c635efSGarrett D'Amore size_t sz; 55*95c635efSGarrett D'Amore const char *cp; 56*95c635efSGarrett D'Amore 57*95c635efSGarrett D'Amore i = 0; 58*95c635efSGarrett D'Amore 59*95c635efSGarrett D'Amore /* The expression consists of a subexpression. */ 60*95c635efSGarrett D'Amore 61*95c635efSGarrett D'Amore if ('\\' == start[i]) { 62*95c635efSGarrett D'Amore cp = &start[++i]; 63*95c635efSGarrett D'Amore /* 64*95c635efSGarrett D'Amore * Read past the end of the subexpression. 65*95c635efSGarrett D'Amore * Bail immediately on errors. 66*95c635efSGarrett D'Amore */ 67*95c635efSGarrett D'Amore if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) 68*95c635efSGarrett D'Amore return(-1); 69*95c635efSGarrett D'Amore return(i + cp - &start[i]); 70*95c635efSGarrett D'Amore } 71*95c635efSGarrett D'Amore 72*95c635efSGarrett D'Amore if ('(' != start[i++]) 73*95c635efSGarrett D'Amore return(0); 74*95c635efSGarrett D'Amore 75*95c635efSGarrett D'Amore /* 76*95c635efSGarrett D'Amore * A parenthesised subexpression. Read until the closing 77*95c635efSGarrett D'Amore * parenthesis, making sure to handle any nested subexpressions 78*95c635efSGarrett D'Amore * that might ruin our parse. 79*95c635efSGarrett D'Amore */ 80*95c635efSGarrett D'Amore 81*95c635efSGarrett D'Amore while (')' != start[i]) { 82*95c635efSGarrett D'Amore sz = strcspn(&start[i], ")\\"); 83*95c635efSGarrett D'Amore i += (int)sz; 84*95c635efSGarrett D'Amore 85*95c635efSGarrett D'Amore if ('\0' == start[i]) 86*95c635efSGarrett D'Amore return(-1); 87*95c635efSGarrett D'Amore else if ('\\' != start[i]) 88*95c635efSGarrett D'Amore continue; 89*95c635efSGarrett D'Amore 90*95c635efSGarrett D'Amore cp = &start[++i]; 91*95c635efSGarrett D'Amore if (ESCAPE_ERROR == mandoc_escape(&cp, NULL, NULL)) 92*95c635efSGarrett D'Amore return(-1); 93*95c635efSGarrett D'Amore i += cp - &start[i]; 94*95c635efSGarrett D'Amore } 95*95c635efSGarrett D'Amore 96*95c635efSGarrett D'Amore /* Read past the terminating ')'. */ 97*95c635efSGarrett D'Amore return(++i); 98*95c635efSGarrett D'Amore } 99*95c635efSGarrett D'Amore 100*95c635efSGarrett D'Amore enum mandoc_esc 101*95c635efSGarrett D'Amore mandoc_escape(const char **end, const char **start, int *sz) 102*95c635efSGarrett D'Amore { 103*95c635efSGarrett D'Amore char c, term, numeric; 104*95c635efSGarrett D'Amore int i, lim, ssz, rlim; 105*95c635efSGarrett D'Amore const char *cp, *rstart; 106*95c635efSGarrett D'Amore enum mandoc_esc gly; 107*95c635efSGarrett D'Amore 108*95c635efSGarrett D'Amore cp = *end; 109*95c635efSGarrett D'Amore rstart = cp; 110*95c635efSGarrett D'Amore if (start) 111*95c635efSGarrett D'Amore *start = rstart; 112*95c635efSGarrett D'Amore i = lim = 0; 113*95c635efSGarrett D'Amore gly = ESCAPE_ERROR; 114*95c635efSGarrett D'Amore term = numeric = '\0'; 115*95c635efSGarrett D'Amore 116*95c635efSGarrett D'Amore switch ((c = cp[i++])) { 117*95c635efSGarrett D'Amore /* 118*95c635efSGarrett D'Amore * First the glyphs. There are several different forms of 119*95c635efSGarrett D'Amore * these, but each eventually returns a substring of the glyph 120*95c635efSGarrett D'Amore * name. 121*95c635efSGarrett D'Amore */ 122*95c635efSGarrett D'Amore case ('('): 123*95c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 124*95c635efSGarrett D'Amore lim = 2; 125*95c635efSGarrett D'Amore break; 126*95c635efSGarrett D'Amore case ('['): 127*95c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 128*95c635efSGarrett D'Amore /* 129*95c635efSGarrett D'Amore * Unicode escapes are defined in groff as \[uXXXX] to 130*95c635efSGarrett D'Amore * \[u10FFFF], where the contained value must be a valid 131*95c635efSGarrett D'Amore * Unicode codepoint. Here, however, only check whether 132*95c635efSGarrett D'Amore * it's not a zero-width escape. 133*95c635efSGarrett D'Amore */ 134*95c635efSGarrett D'Amore if ('u' == cp[i] && ']' != cp[i + 1]) 135*95c635efSGarrett D'Amore gly = ESCAPE_UNICODE; 136*95c635efSGarrett D'Amore term = ']'; 137*95c635efSGarrett D'Amore break; 138*95c635efSGarrett D'Amore case ('C'): 139*95c635efSGarrett D'Amore if ('\'' != cp[i]) 140*95c635efSGarrett D'Amore return(ESCAPE_ERROR); 141*95c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 142*95c635efSGarrett D'Amore term = '\''; 143*95c635efSGarrett D'Amore break; 144*95c635efSGarrett D'Amore 145*95c635efSGarrett D'Amore /* 146*95c635efSGarrett D'Amore * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where 147*95c635efSGarrett D'Amore * 'X' is the trigger. These have opaque sub-strings. 148*95c635efSGarrett D'Amore */ 149*95c635efSGarrett D'Amore case ('F'): 150*95c635efSGarrett D'Amore /* FALLTHROUGH */ 151*95c635efSGarrett D'Amore case ('g'): 152*95c635efSGarrett D'Amore /* FALLTHROUGH */ 153*95c635efSGarrett D'Amore case ('k'): 154*95c635efSGarrett D'Amore /* FALLTHROUGH */ 155*95c635efSGarrett D'Amore case ('M'): 156*95c635efSGarrett D'Amore /* FALLTHROUGH */ 157*95c635efSGarrett D'Amore case ('m'): 158*95c635efSGarrett D'Amore /* FALLTHROUGH */ 159*95c635efSGarrett D'Amore case ('n'): 160*95c635efSGarrett D'Amore /* FALLTHROUGH */ 161*95c635efSGarrett D'Amore case ('V'): 162*95c635efSGarrett D'Amore /* FALLTHROUGH */ 163*95c635efSGarrett D'Amore case ('Y'): 164*95c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 165*95c635efSGarrett D'Amore /* FALLTHROUGH */ 166*95c635efSGarrett D'Amore case ('f'): 167*95c635efSGarrett D'Amore if (ESCAPE_ERROR == gly) 168*95c635efSGarrett D'Amore gly = ESCAPE_FONT; 169*95c635efSGarrett D'Amore 170*95c635efSGarrett D'Amore rstart= &cp[i]; 171*95c635efSGarrett D'Amore if (start) 172*95c635efSGarrett D'Amore *start = rstart; 173*95c635efSGarrett D'Amore 174*95c635efSGarrett D'Amore switch (cp[i++]) { 175*95c635efSGarrett D'Amore case ('('): 176*95c635efSGarrett D'Amore lim = 2; 177*95c635efSGarrett D'Amore break; 178*95c635efSGarrett D'Amore case ('['): 179*95c635efSGarrett D'Amore term = ']'; 180*95c635efSGarrett D'Amore break; 181*95c635efSGarrett D'Amore default: 182*95c635efSGarrett D'Amore lim = 1; 183*95c635efSGarrett D'Amore i--; 184*95c635efSGarrett D'Amore break; 185*95c635efSGarrett D'Amore } 186*95c635efSGarrett D'Amore break; 187*95c635efSGarrett D'Amore 188*95c635efSGarrett D'Amore /* 189*95c635efSGarrett D'Amore * These escapes are of the form \X'Y', where 'X' is the trigger 190*95c635efSGarrett D'Amore * and 'Y' is any string. These have opaque sub-strings. 191*95c635efSGarrett D'Amore */ 192*95c635efSGarrett D'Amore case ('A'): 193*95c635efSGarrett D'Amore /* FALLTHROUGH */ 194*95c635efSGarrett D'Amore case ('b'): 195*95c635efSGarrett D'Amore /* FALLTHROUGH */ 196*95c635efSGarrett D'Amore case ('D'): 197*95c635efSGarrett D'Amore /* FALLTHROUGH */ 198*95c635efSGarrett D'Amore case ('o'): 199*95c635efSGarrett D'Amore /* FALLTHROUGH */ 200*95c635efSGarrett D'Amore case ('R'): 201*95c635efSGarrett D'Amore /* FALLTHROUGH */ 202*95c635efSGarrett D'Amore case ('X'): 203*95c635efSGarrett D'Amore /* FALLTHROUGH */ 204*95c635efSGarrett D'Amore case ('Z'): 205*95c635efSGarrett D'Amore if ('\'' != cp[i++]) 206*95c635efSGarrett D'Amore return(ESCAPE_ERROR); 207*95c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 208*95c635efSGarrett D'Amore term = '\''; 209*95c635efSGarrett D'Amore break; 210*95c635efSGarrett D'Amore 211*95c635efSGarrett D'Amore /* 212*95c635efSGarrett D'Amore * These escapes are of the form \X'N', where 'X' is the trigger 213*95c635efSGarrett D'Amore * and 'N' resolves to a numerical expression. 214*95c635efSGarrett D'Amore */ 215*95c635efSGarrett D'Amore case ('B'): 216*95c635efSGarrett D'Amore /* FALLTHROUGH */ 217*95c635efSGarrett D'Amore case ('h'): 218*95c635efSGarrett D'Amore /* FALLTHROUGH */ 219*95c635efSGarrett D'Amore case ('H'): 220*95c635efSGarrett D'Amore /* FALLTHROUGH */ 221*95c635efSGarrett D'Amore case ('L'): 222*95c635efSGarrett D'Amore /* FALLTHROUGH */ 223*95c635efSGarrett D'Amore case ('l'): 224*95c635efSGarrett D'Amore gly = ESCAPE_NUMBERED; 225*95c635efSGarrett D'Amore /* FALLTHROUGH */ 226*95c635efSGarrett D'Amore case ('S'): 227*95c635efSGarrett D'Amore /* FALLTHROUGH */ 228*95c635efSGarrett D'Amore case ('v'): 229*95c635efSGarrett D'Amore /* FALLTHROUGH */ 230*95c635efSGarrett D'Amore case ('w'): 231*95c635efSGarrett D'Amore /* FALLTHROUGH */ 232*95c635efSGarrett D'Amore case ('x'): 233*95c635efSGarrett D'Amore if (ESCAPE_ERROR == gly) 234*95c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 235*95c635efSGarrett D'Amore if ('\'' != cp[i++]) 236*95c635efSGarrett D'Amore return(ESCAPE_ERROR); 237*95c635efSGarrett D'Amore term = numeric = '\''; 238*95c635efSGarrett D'Amore break; 239*95c635efSGarrett D'Amore 240*95c635efSGarrett D'Amore /* 241*95c635efSGarrett D'Amore * Special handling for the numbered character escape. 242*95c635efSGarrett D'Amore * XXX Do any other escapes need similar handling? 243*95c635efSGarrett D'Amore */ 244*95c635efSGarrett D'Amore case ('N'): 245*95c635efSGarrett D'Amore if ('\0' == cp[i]) 246*95c635efSGarrett D'Amore return(ESCAPE_ERROR); 247*95c635efSGarrett D'Amore *end = &cp[++i]; 248*95c635efSGarrett D'Amore if (isdigit((unsigned char)cp[i-1])) 249*95c635efSGarrett D'Amore return(ESCAPE_IGNORE); 250*95c635efSGarrett D'Amore while (isdigit((unsigned char)**end)) 251*95c635efSGarrett D'Amore (*end)++; 252*95c635efSGarrett D'Amore if (start) 253*95c635efSGarrett D'Amore *start = &cp[i]; 254*95c635efSGarrett D'Amore if (sz) 255*95c635efSGarrett D'Amore *sz = *end - &cp[i]; 256*95c635efSGarrett D'Amore if ('\0' != **end) 257*95c635efSGarrett D'Amore (*end)++; 258*95c635efSGarrett D'Amore return(ESCAPE_NUMBERED); 259*95c635efSGarrett D'Amore 260*95c635efSGarrett D'Amore /* 261*95c635efSGarrett D'Amore * Sizes get a special category of their own. 262*95c635efSGarrett D'Amore */ 263*95c635efSGarrett D'Amore case ('s'): 264*95c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 265*95c635efSGarrett D'Amore 266*95c635efSGarrett D'Amore rstart = &cp[i]; 267*95c635efSGarrett D'Amore if (start) 268*95c635efSGarrett D'Amore *start = rstart; 269*95c635efSGarrett D'Amore 270*95c635efSGarrett D'Amore /* See +/- counts as a sign. */ 271*95c635efSGarrett D'Amore c = cp[i]; 272*95c635efSGarrett D'Amore if ('+' == c || '-' == c || ASCII_HYPH == c) 273*95c635efSGarrett D'Amore ++i; 274*95c635efSGarrett D'Amore 275*95c635efSGarrett D'Amore switch (cp[i++]) { 276*95c635efSGarrett D'Amore case ('('): 277*95c635efSGarrett D'Amore lim = 2; 278*95c635efSGarrett D'Amore break; 279*95c635efSGarrett D'Amore case ('['): 280*95c635efSGarrett D'Amore term = numeric = ']'; 281*95c635efSGarrett D'Amore break; 282*95c635efSGarrett D'Amore case ('\''): 283*95c635efSGarrett D'Amore term = numeric = '\''; 284*95c635efSGarrett D'Amore break; 285*95c635efSGarrett D'Amore default: 286*95c635efSGarrett D'Amore lim = 1; 287*95c635efSGarrett D'Amore i--; 288*95c635efSGarrett D'Amore break; 289*95c635efSGarrett D'Amore } 290*95c635efSGarrett D'Amore 291*95c635efSGarrett D'Amore /* See +/- counts as a sign. */ 292*95c635efSGarrett D'Amore c = cp[i]; 293*95c635efSGarrett D'Amore if ('+' == c || '-' == c || ASCII_HYPH == c) 294*95c635efSGarrett D'Amore ++i; 295*95c635efSGarrett D'Amore 296*95c635efSGarrett D'Amore break; 297*95c635efSGarrett D'Amore 298*95c635efSGarrett D'Amore /* 299*95c635efSGarrett D'Amore * Anything else is assumed to be a glyph. 300*95c635efSGarrett D'Amore */ 301*95c635efSGarrett D'Amore default: 302*95c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 303*95c635efSGarrett D'Amore lim = 1; 304*95c635efSGarrett D'Amore i--; 305*95c635efSGarrett D'Amore break; 306*95c635efSGarrett D'Amore } 307*95c635efSGarrett D'Amore 308*95c635efSGarrett D'Amore assert(ESCAPE_ERROR != gly); 309*95c635efSGarrett D'Amore 310*95c635efSGarrett D'Amore rstart = &cp[i]; 311*95c635efSGarrett D'Amore if (start) 312*95c635efSGarrett D'Amore *start = rstart; 313*95c635efSGarrett D'Amore 314*95c635efSGarrett D'Amore /* 315*95c635efSGarrett D'Amore * If a terminating block has been specified, we need to 316*95c635efSGarrett D'Amore * handle the case of recursion, which could have their 317*95c635efSGarrett D'Amore * own terminating blocks that mess up our parse. This, by the 318*95c635efSGarrett D'Amore * way, means that the "start" and "size" values will be 319*95c635efSGarrett D'Amore * effectively meaningless. 320*95c635efSGarrett D'Amore */ 321*95c635efSGarrett D'Amore 322*95c635efSGarrett D'Amore ssz = 0; 323*95c635efSGarrett D'Amore if (numeric && -1 == (ssz = numescape(&cp[i]))) 324*95c635efSGarrett D'Amore return(ESCAPE_ERROR); 325*95c635efSGarrett D'Amore 326*95c635efSGarrett D'Amore i += ssz; 327*95c635efSGarrett D'Amore rlim = -1; 328*95c635efSGarrett D'Amore 329*95c635efSGarrett D'Amore /* 330*95c635efSGarrett D'Amore * We have a character terminator. Try to read up to that 331*95c635efSGarrett D'Amore * character. If we can't (i.e., we hit the nil), then return 332*95c635efSGarrett D'Amore * an error; if we can, calculate our length, read past the 333*95c635efSGarrett D'Amore * terminating character, and exit. 334*95c635efSGarrett D'Amore */ 335*95c635efSGarrett D'Amore 336*95c635efSGarrett D'Amore if ('\0' != term) { 337*95c635efSGarrett D'Amore *end = strchr(&cp[i], term); 338*95c635efSGarrett D'Amore if ('\0' == *end) 339*95c635efSGarrett D'Amore return(ESCAPE_ERROR); 340*95c635efSGarrett D'Amore 341*95c635efSGarrett D'Amore rlim = *end - &cp[i]; 342*95c635efSGarrett D'Amore if (sz) 343*95c635efSGarrett D'Amore *sz = rlim; 344*95c635efSGarrett D'Amore (*end)++; 345*95c635efSGarrett D'Amore goto out; 346*95c635efSGarrett D'Amore } 347*95c635efSGarrett D'Amore 348*95c635efSGarrett D'Amore assert(lim > 0); 349*95c635efSGarrett D'Amore 350*95c635efSGarrett D'Amore /* 351*95c635efSGarrett D'Amore * We have a numeric limit. If the string is shorter than that, 352*95c635efSGarrett D'Amore * stop and return an error. Else adjust our endpoint, length, 353*95c635efSGarrett D'Amore * and return the current glyph. 354*95c635efSGarrett D'Amore */ 355*95c635efSGarrett D'Amore 356*95c635efSGarrett D'Amore if ((size_t)lim > strlen(&cp[i])) 357*95c635efSGarrett D'Amore return(ESCAPE_ERROR); 358*95c635efSGarrett D'Amore 359*95c635efSGarrett D'Amore rlim = lim; 360*95c635efSGarrett D'Amore if (sz) 361*95c635efSGarrett D'Amore *sz = rlim; 362*95c635efSGarrett D'Amore 363*95c635efSGarrett D'Amore *end = &cp[i] + lim; 364*95c635efSGarrett D'Amore 365*95c635efSGarrett D'Amore out: 366*95c635efSGarrett D'Amore assert(rlim >= 0 && rstart); 367*95c635efSGarrett D'Amore 368*95c635efSGarrett D'Amore /* Run post-processors. */ 369*95c635efSGarrett D'Amore 370*95c635efSGarrett D'Amore switch (gly) { 371*95c635efSGarrett D'Amore case (ESCAPE_FONT): 372*95c635efSGarrett D'Amore /* 373*95c635efSGarrett D'Amore * Pretend that the constant-width font modes are the 374*95c635efSGarrett D'Amore * same as the regular font modes. 375*95c635efSGarrett D'Amore */ 376*95c635efSGarrett D'Amore if (2 == rlim && 'C' == *rstart) 377*95c635efSGarrett D'Amore rstart++; 378*95c635efSGarrett D'Amore else if (1 != rlim) 379*95c635efSGarrett D'Amore break; 380*95c635efSGarrett D'Amore 381*95c635efSGarrett D'Amore switch (*rstart) { 382*95c635efSGarrett D'Amore case ('3'): 383*95c635efSGarrett D'Amore /* FALLTHROUGH */ 384*95c635efSGarrett D'Amore case ('B'): 385*95c635efSGarrett D'Amore gly = ESCAPE_FONTBOLD; 386*95c635efSGarrett D'Amore break; 387*95c635efSGarrett D'Amore case ('2'): 388*95c635efSGarrett D'Amore /* FALLTHROUGH */ 389*95c635efSGarrett D'Amore case ('I'): 390*95c635efSGarrett D'Amore gly = ESCAPE_FONTITALIC; 391*95c635efSGarrett D'Amore break; 392*95c635efSGarrett D'Amore case ('P'): 393*95c635efSGarrett D'Amore gly = ESCAPE_FONTPREV; 394*95c635efSGarrett D'Amore break; 395*95c635efSGarrett D'Amore case ('1'): 396*95c635efSGarrett D'Amore /* FALLTHROUGH */ 397*95c635efSGarrett D'Amore case ('R'): 398*95c635efSGarrett D'Amore gly = ESCAPE_FONTROMAN; 399*95c635efSGarrett D'Amore break; 400*95c635efSGarrett D'Amore } 401*95c635efSGarrett D'Amore break; 402*95c635efSGarrett D'Amore case (ESCAPE_SPECIAL): 403*95c635efSGarrett D'Amore if (1 != rlim) 404*95c635efSGarrett D'Amore break; 405*95c635efSGarrett D'Amore if ('c' == *rstart) 406*95c635efSGarrett D'Amore gly = ESCAPE_NOSPACE; 407*95c635efSGarrett D'Amore break; 408*95c635efSGarrett D'Amore default: 409*95c635efSGarrett D'Amore break; 410*95c635efSGarrett D'Amore } 411*95c635efSGarrett D'Amore 412*95c635efSGarrett D'Amore return(gly); 413*95c635efSGarrett D'Amore } 414*95c635efSGarrett D'Amore 415*95c635efSGarrett D'Amore void * 416*95c635efSGarrett D'Amore mandoc_calloc(size_t num, size_t size) 417*95c635efSGarrett D'Amore { 418*95c635efSGarrett D'Amore void *ptr; 419*95c635efSGarrett D'Amore 420*95c635efSGarrett D'Amore ptr = calloc(num, size); 421*95c635efSGarrett D'Amore if (NULL == ptr) { 422*95c635efSGarrett D'Amore perror(NULL); 423*95c635efSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR); 424*95c635efSGarrett D'Amore } 425*95c635efSGarrett D'Amore 426*95c635efSGarrett D'Amore return(ptr); 427*95c635efSGarrett D'Amore } 428*95c635efSGarrett D'Amore 429*95c635efSGarrett D'Amore 430*95c635efSGarrett D'Amore void * 431*95c635efSGarrett D'Amore mandoc_malloc(size_t size) 432*95c635efSGarrett D'Amore { 433*95c635efSGarrett D'Amore void *ptr; 434*95c635efSGarrett D'Amore 435*95c635efSGarrett D'Amore ptr = malloc(size); 436*95c635efSGarrett D'Amore if (NULL == ptr) { 437*95c635efSGarrett D'Amore perror(NULL); 438*95c635efSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR); 439*95c635efSGarrett D'Amore } 440*95c635efSGarrett D'Amore 441*95c635efSGarrett D'Amore return(ptr); 442*95c635efSGarrett D'Amore } 443*95c635efSGarrett D'Amore 444*95c635efSGarrett D'Amore 445*95c635efSGarrett D'Amore void * 446*95c635efSGarrett D'Amore mandoc_realloc(void *ptr, size_t size) 447*95c635efSGarrett D'Amore { 448*95c635efSGarrett D'Amore 449*95c635efSGarrett D'Amore ptr = realloc(ptr, size); 450*95c635efSGarrett D'Amore if (NULL == ptr) { 451*95c635efSGarrett D'Amore perror(NULL); 452*95c635efSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR); 453*95c635efSGarrett D'Amore } 454*95c635efSGarrett D'Amore 455*95c635efSGarrett D'Amore return(ptr); 456*95c635efSGarrett D'Amore } 457*95c635efSGarrett D'Amore 458*95c635efSGarrett D'Amore char * 459*95c635efSGarrett D'Amore mandoc_strndup(const char *ptr, size_t sz) 460*95c635efSGarrett D'Amore { 461*95c635efSGarrett D'Amore char *p; 462*95c635efSGarrett D'Amore 463*95c635efSGarrett D'Amore p = mandoc_malloc(sz + 1); 464*95c635efSGarrett D'Amore memcpy(p, ptr, sz); 465*95c635efSGarrett D'Amore p[(int)sz] = '\0'; 466*95c635efSGarrett D'Amore return(p); 467*95c635efSGarrett D'Amore } 468*95c635efSGarrett D'Amore 469*95c635efSGarrett D'Amore char * 470*95c635efSGarrett D'Amore mandoc_strdup(const char *ptr) 471*95c635efSGarrett D'Amore { 472*95c635efSGarrett D'Amore char *p; 473*95c635efSGarrett D'Amore 474*95c635efSGarrett D'Amore p = strdup(ptr); 475*95c635efSGarrett D'Amore if (NULL == p) { 476*95c635efSGarrett D'Amore perror(NULL); 477*95c635efSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR); 478*95c635efSGarrett D'Amore } 479*95c635efSGarrett D'Amore 480*95c635efSGarrett D'Amore return(p); 481*95c635efSGarrett D'Amore } 482*95c635efSGarrett D'Amore 483*95c635efSGarrett D'Amore /* 484*95c635efSGarrett D'Amore * Parse a quoted or unquoted roff-style request or macro argument. 485*95c635efSGarrett D'Amore * Return a pointer to the parsed argument, which is either the original 486*95c635efSGarrett D'Amore * pointer or advanced by one byte in case the argument is quoted. 487*95c635efSGarrett D'Amore * Null-terminate the argument in place. 488*95c635efSGarrett D'Amore * Collapse pairs of quotes inside quoted arguments. 489*95c635efSGarrett D'Amore * Advance the argument pointer to the next argument, 490*95c635efSGarrett D'Amore * or to the null byte terminating the argument line. 491*95c635efSGarrett D'Amore */ 492*95c635efSGarrett D'Amore char * 493*95c635efSGarrett D'Amore mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) 494*95c635efSGarrett D'Amore { 495*95c635efSGarrett D'Amore char *start, *cp; 496*95c635efSGarrett D'Amore int quoted, pairs, white; 497*95c635efSGarrett D'Amore 498*95c635efSGarrett D'Amore /* Quoting can only start with a new word. */ 499*95c635efSGarrett D'Amore start = *cpp; 500*95c635efSGarrett D'Amore quoted = 0; 501*95c635efSGarrett D'Amore if ('"' == *start) { 502*95c635efSGarrett D'Amore quoted = 1; 503*95c635efSGarrett D'Amore start++; 504*95c635efSGarrett D'Amore } 505*95c635efSGarrett D'Amore 506*95c635efSGarrett D'Amore pairs = 0; 507*95c635efSGarrett D'Amore white = 0; 508*95c635efSGarrett D'Amore for (cp = start; '\0' != *cp; cp++) { 509*95c635efSGarrett D'Amore /* Move left after quoted quotes and escaped backslashes. */ 510*95c635efSGarrett D'Amore if (pairs) 511*95c635efSGarrett D'Amore cp[-pairs] = cp[0]; 512*95c635efSGarrett D'Amore if ('\\' == cp[0]) { 513*95c635efSGarrett D'Amore if ('\\' == cp[1]) { 514*95c635efSGarrett D'Amore /* Poor man's copy mode. */ 515*95c635efSGarrett D'Amore pairs++; 516*95c635efSGarrett D'Amore cp++; 517*95c635efSGarrett D'Amore } else if (0 == quoted && ' ' == cp[1]) 518*95c635efSGarrett D'Amore /* Skip escaped blanks. */ 519*95c635efSGarrett D'Amore cp++; 520*95c635efSGarrett D'Amore } else if (0 == quoted) { 521*95c635efSGarrett D'Amore if (' ' == cp[0]) { 522*95c635efSGarrett D'Amore /* Unescaped blanks end unquoted args. */ 523*95c635efSGarrett D'Amore white = 1; 524*95c635efSGarrett D'Amore break; 525*95c635efSGarrett D'Amore } 526*95c635efSGarrett D'Amore } else if ('"' == cp[0]) { 527*95c635efSGarrett D'Amore if ('"' == cp[1]) { 528*95c635efSGarrett D'Amore /* Quoted quotes collapse. */ 529*95c635efSGarrett D'Amore pairs++; 530*95c635efSGarrett D'Amore cp++; 531*95c635efSGarrett D'Amore } else { 532*95c635efSGarrett D'Amore /* Unquoted quotes end quoted args. */ 533*95c635efSGarrett D'Amore quoted = 2; 534*95c635efSGarrett D'Amore break; 535*95c635efSGarrett D'Amore } 536*95c635efSGarrett D'Amore } 537*95c635efSGarrett D'Amore } 538*95c635efSGarrett D'Amore 539*95c635efSGarrett D'Amore /* Quoted argument without a closing quote. */ 540*95c635efSGarrett D'Amore if (1 == quoted) 541*95c635efSGarrett D'Amore mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); 542*95c635efSGarrett D'Amore 543*95c635efSGarrett D'Amore /* Null-terminate this argument and move to the next one. */ 544*95c635efSGarrett D'Amore if (pairs) 545*95c635efSGarrett D'Amore cp[-pairs] = '\0'; 546*95c635efSGarrett D'Amore if ('\0' != *cp) { 547*95c635efSGarrett D'Amore *cp++ = '\0'; 548*95c635efSGarrett D'Amore while (' ' == *cp) 549*95c635efSGarrett D'Amore cp++; 550*95c635efSGarrett D'Amore } 551*95c635efSGarrett D'Amore *pos += (int)(cp - start) + (quoted ? 1 : 0); 552*95c635efSGarrett D'Amore *cpp = cp; 553*95c635efSGarrett D'Amore 554*95c635efSGarrett D'Amore if ('\0' == *cp && (white || ' ' == cp[-1])) 555*95c635efSGarrett D'Amore mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); 556*95c635efSGarrett D'Amore 557*95c635efSGarrett D'Amore return(start); 558*95c635efSGarrett D'Amore } 559*95c635efSGarrett D'Amore 560*95c635efSGarrett D'Amore static int 561*95c635efSGarrett D'Amore a2time(time_t *t, const char *fmt, const char *p) 562*95c635efSGarrett D'Amore { 563*95c635efSGarrett D'Amore struct tm tm; 564*95c635efSGarrett D'Amore char *pp; 565*95c635efSGarrett D'Amore 566*95c635efSGarrett D'Amore memset(&tm, 0, sizeof(struct tm)); 567*95c635efSGarrett D'Amore 568*95c635efSGarrett D'Amore pp = NULL; 569*95c635efSGarrett D'Amore #ifdef HAVE_STRPTIME 570*95c635efSGarrett D'Amore pp = strptime(p, fmt, &tm); 571*95c635efSGarrett D'Amore #endif 572*95c635efSGarrett D'Amore if (NULL != pp && '\0' == *pp) { 573*95c635efSGarrett D'Amore *t = mktime(&tm); 574*95c635efSGarrett D'Amore return(1); 575*95c635efSGarrett D'Amore } 576*95c635efSGarrett D'Amore 577*95c635efSGarrett D'Amore return(0); 578*95c635efSGarrett D'Amore } 579*95c635efSGarrett D'Amore 580*95c635efSGarrett D'Amore static char * 581*95c635efSGarrett D'Amore time2a(time_t t) 582*95c635efSGarrett D'Amore { 583*95c635efSGarrett D'Amore struct tm *tm; 584*95c635efSGarrett D'Amore char *buf, *p; 585*95c635efSGarrett D'Amore size_t ssz; 586*95c635efSGarrett D'Amore int isz; 587*95c635efSGarrett D'Amore 588*95c635efSGarrett D'Amore tm = localtime(&t); 589*95c635efSGarrett D'Amore 590*95c635efSGarrett D'Amore /* 591*95c635efSGarrett D'Amore * Reserve space: 592*95c635efSGarrett D'Amore * up to 9 characters for the month (September) + blank 593*95c635efSGarrett D'Amore * up to 2 characters for the day + comma + blank 594*95c635efSGarrett D'Amore * 4 characters for the year and a terminating '\0' 595*95c635efSGarrett D'Amore */ 596*95c635efSGarrett D'Amore p = buf = mandoc_malloc(10 + 4 + 4 + 1); 597*95c635efSGarrett D'Amore 598*95c635efSGarrett D'Amore if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm))) 599*95c635efSGarrett D'Amore goto fail; 600*95c635efSGarrett D'Amore p += (int)ssz; 601*95c635efSGarrett D'Amore 602*95c635efSGarrett D'Amore if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday))) 603*95c635efSGarrett D'Amore goto fail; 604*95c635efSGarrett D'Amore p += isz; 605*95c635efSGarrett D'Amore 606*95c635efSGarrett D'Amore if (0 == strftime(p, 4 + 1, "%Y", tm)) 607*95c635efSGarrett D'Amore goto fail; 608*95c635efSGarrett D'Amore return(buf); 609*95c635efSGarrett D'Amore 610*95c635efSGarrett D'Amore fail: 611*95c635efSGarrett D'Amore free(buf); 612*95c635efSGarrett D'Amore return(NULL); 613*95c635efSGarrett D'Amore } 614*95c635efSGarrett D'Amore 615*95c635efSGarrett D'Amore char * 616*95c635efSGarrett D'Amore mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) 617*95c635efSGarrett D'Amore { 618*95c635efSGarrett D'Amore char *out; 619*95c635efSGarrett D'Amore time_t t; 620*95c635efSGarrett D'Amore 621*95c635efSGarrett D'Amore if (NULL == in || '\0' == *in || 622*95c635efSGarrett D'Amore 0 == strcmp(in, "$" "Mdocdate$")) { 623*95c635efSGarrett D'Amore mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); 624*95c635efSGarrett D'Amore time(&t); 625*95c635efSGarrett D'Amore } 626*95c635efSGarrett D'Amore else if (a2time(&t, "%Y-%m-%d", in)) 627*95c635efSGarrett D'Amore t = 0; 628*95c635efSGarrett D'Amore else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && 629*95c635efSGarrett D'Amore !a2time(&t, "%b %d, %Y", in)) { 630*95c635efSGarrett D'Amore mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); 631*95c635efSGarrett D'Amore t = 0; 632*95c635efSGarrett D'Amore } 633*95c635efSGarrett D'Amore out = t ? time2a(t) : NULL; 634*95c635efSGarrett D'Amore return(out ? out : mandoc_strdup(in)); 635*95c635efSGarrett D'Amore } 636*95c635efSGarrett D'Amore 637*95c635efSGarrett D'Amore int 638*95c635efSGarrett D'Amore mandoc_eos(const char *p, size_t sz, int enclosed) 639*95c635efSGarrett D'Amore { 640*95c635efSGarrett D'Amore const char *q; 641*95c635efSGarrett D'Amore int found; 642*95c635efSGarrett D'Amore 643*95c635efSGarrett D'Amore if (0 == sz) 644*95c635efSGarrett D'Amore return(0); 645*95c635efSGarrett D'Amore 646*95c635efSGarrett D'Amore /* 647*95c635efSGarrett D'Amore * End-of-sentence recognition must include situations where 648*95c635efSGarrett D'Amore * some symbols, such as `)', allow prior EOS punctuation to 649*95c635efSGarrett D'Amore * propagate outward. 650*95c635efSGarrett D'Amore */ 651*95c635efSGarrett D'Amore 652*95c635efSGarrett D'Amore found = 0; 653*95c635efSGarrett D'Amore for (q = p + (int)sz - 1; q >= p; q--) { 654*95c635efSGarrett D'Amore switch (*q) { 655*95c635efSGarrett D'Amore case ('\"'): 656*95c635efSGarrett D'Amore /* FALLTHROUGH */ 657*95c635efSGarrett D'Amore case ('\''): 658*95c635efSGarrett D'Amore /* FALLTHROUGH */ 659*95c635efSGarrett D'Amore case (']'): 660*95c635efSGarrett D'Amore /* FALLTHROUGH */ 661*95c635efSGarrett D'Amore case (')'): 662*95c635efSGarrett D'Amore if (0 == found) 663*95c635efSGarrett D'Amore enclosed = 1; 664*95c635efSGarrett D'Amore break; 665*95c635efSGarrett D'Amore case ('.'): 666*95c635efSGarrett D'Amore /* FALLTHROUGH */ 667*95c635efSGarrett D'Amore case ('!'): 668*95c635efSGarrett D'Amore /* FALLTHROUGH */ 669*95c635efSGarrett D'Amore case ('?'): 670*95c635efSGarrett D'Amore found = 1; 671*95c635efSGarrett D'Amore break; 672*95c635efSGarrett D'Amore default: 673*95c635efSGarrett D'Amore return(found && (!enclosed || isalnum((unsigned char)*q))); 674*95c635efSGarrett D'Amore } 675*95c635efSGarrett D'Amore } 676*95c635efSGarrett D'Amore 677*95c635efSGarrett D'Amore return(found && !enclosed); 678*95c635efSGarrett D'Amore } 679*95c635efSGarrett D'Amore 680*95c635efSGarrett D'Amore /* 681*95c635efSGarrett D'Amore * Find out whether a line is a macro line or not. If it is, adjust the 682*95c635efSGarrett D'Amore * current position and return one; if it isn't, return zero and don't 683*95c635efSGarrett D'Amore * change the current position. 684*95c635efSGarrett D'Amore */ 685*95c635efSGarrett D'Amore int 686*95c635efSGarrett D'Amore mandoc_getcontrol(const char *cp, int *ppos) 687*95c635efSGarrett D'Amore { 688*95c635efSGarrett D'Amore int pos; 689*95c635efSGarrett D'Amore 690*95c635efSGarrett D'Amore pos = *ppos; 691*95c635efSGarrett D'Amore 692*95c635efSGarrett D'Amore if ('\\' == cp[pos] && '.' == cp[pos + 1]) 693*95c635efSGarrett D'Amore pos += 2; 694*95c635efSGarrett D'Amore else if ('.' == cp[pos] || '\'' == cp[pos]) 695*95c635efSGarrett D'Amore pos++; 696*95c635efSGarrett D'Amore else 697*95c635efSGarrett D'Amore return(0); 698*95c635efSGarrett D'Amore 699*95c635efSGarrett D'Amore while (' ' == cp[pos] || '\t' == cp[pos]) 700*95c635efSGarrett D'Amore pos++; 701*95c635efSGarrett D'Amore 702*95c635efSGarrett D'Amore *ppos = pos; 703*95c635efSGarrett D'Amore return(1); 704*95c635efSGarrett D'Amore } 705*95c635efSGarrett D'Amore 706*95c635efSGarrett D'Amore /* 707*95c635efSGarrett D'Amore * Convert a string to a long that may not be <0. 708*95c635efSGarrett D'Amore * If the string is invalid, or is less than 0, return -1. 709*95c635efSGarrett D'Amore */ 710*95c635efSGarrett D'Amore int 711*95c635efSGarrett D'Amore mandoc_strntoi(const char *p, size_t sz, int base) 712*95c635efSGarrett D'Amore { 713*95c635efSGarrett D'Amore char buf[32]; 714*95c635efSGarrett D'Amore char *ep; 715*95c635efSGarrett D'Amore long v; 716*95c635efSGarrett D'Amore 717*95c635efSGarrett D'Amore if (sz > 31) 718*95c635efSGarrett D'Amore return(-1); 719*95c635efSGarrett D'Amore 720*95c635efSGarrett D'Amore memcpy(buf, p, sz); 721*95c635efSGarrett D'Amore buf[(int)sz] = '\0'; 722*95c635efSGarrett D'Amore 723*95c635efSGarrett D'Amore errno = 0; 724*95c635efSGarrett D'Amore v = strtol(buf, &ep, base); 725*95c635efSGarrett D'Amore 726*95c635efSGarrett D'Amore if (buf[0] == '\0' || *ep != '\0') 727*95c635efSGarrett D'Amore return(-1); 728*95c635efSGarrett D'Amore 729*95c635efSGarrett D'Amore if (v > INT_MAX) 730*95c635efSGarrett D'Amore v = INT_MAX; 731*95c635efSGarrett D'Amore if (v < INT_MIN) 732*95c635efSGarrett D'Amore v = INT_MIN; 733*95c635efSGarrett D'Amore 734*95c635efSGarrett D'Amore return((int)v); 735*95c635efSGarrett D'Amore } 736