1*61d06d6bSBaptiste Daroussin /* $Id: mandoc.c,v 1.104 2018/07/28 18:34:15 schwarze Exp $ */ 2*61d06d6bSBaptiste Daroussin /* 3*61d06d6bSBaptiste Daroussin * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4*61d06d6bSBaptiste Daroussin * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org> 5*61d06d6bSBaptiste Daroussin * 6*61d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any 7*61d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above 8*61d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies. 9*61d06d6bSBaptiste Daroussin * 10*61d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11*61d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12*61d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13*61d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14*61d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15*61d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16*61d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17*61d06d6bSBaptiste Daroussin */ 18*61d06d6bSBaptiste Daroussin #include "config.h" 19*61d06d6bSBaptiste Daroussin 20*61d06d6bSBaptiste Daroussin #include <sys/types.h> 21*61d06d6bSBaptiste Daroussin 22*61d06d6bSBaptiste Daroussin #include <assert.h> 23*61d06d6bSBaptiste Daroussin #include <ctype.h> 24*61d06d6bSBaptiste Daroussin #include <errno.h> 25*61d06d6bSBaptiste Daroussin #include <limits.h> 26*61d06d6bSBaptiste Daroussin #include <stdlib.h> 27*61d06d6bSBaptiste Daroussin #include <stdio.h> 28*61d06d6bSBaptiste Daroussin #include <string.h> 29*61d06d6bSBaptiste Daroussin #include <time.h> 30*61d06d6bSBaptiste Daroussin 31*61d06d6bSBaptiste Daroussin #include "mandoc_aux.h" 32*61d06d6bSBaptiste Daroussin #include "mandoc.h" 33*61d06d6bSBaptiste Daroussin #include "roff.h" 34*61d06d6bSBaptiste Daroussin #include "libmandoc.h" 35*61d06d6bSBaptiste Daroussin 36*61d06d6bSBaptiste Daroussin static int a2time(time_t *, const char *, const char *); 37*61d06d6bSBaptiste Daroussin static char *time2a(time_t); 38*61d06d6bSBaptiste Daroussin 39*61d06d6bSBaptiste Daroussin 40*61d06d6bSBaptiste Daroussin enum mandoc_esc 41*61d06d6bSBaptiste Daroussin mandoc_escape(const char **end, const char **start, int *sz) 42*61d06d6bSBaptiste Daroussin { 43*61d06d6bSBaptiste Daroussin const char *local_start; 44*61d06d6bSBaptiste Daroussin int local_sz; 45*61d06d6bSBaptiste Daroussin char term; 46*61d06d6bSBaptiste Daroussin enum mandoc_esc gly; 47*61d06d6bSBaptiste Daroussin 48*61d06d6bSBaptiste Daroussin /* 49*61d06d6bSBaptiste Daroussin * When the caller doesn't provide return storage, 50*61d06d6bSBaptiste Daroussin * use local storage. 51*61d06d6bSBaptiste Daroussin */ 52*61d06d6bSBaptiste Daroussin 53*61d06d6bSBaptiste Daroussin if (NULL == start) 54*61d06d6bSBaptiste Daroussin start = &local_start; 55*61d06d6bSBaptiste Daroussin if (NULL == sz) 56*61d06d6bSBaptiste Daroussin sz = &local_sz; 57*61d06d6bSBaptiste Daroussin 58*61d06d6bSBaptiste Daroussin /* 59*61d06d6bSBaptiste Daroussin * Beyond the backslash, at least one input character 60*61d06d6bSBaptiste Daroussin * is part of the escape sequence. With one exception 61*61d06d6bSBaptiste Daroussin * (see below), that character won't be returned. 62*61d06d6bSBaptiste Daroussin */ 63*61d06d6bSBaptiste Daroussin 64*61d06d6bSBaptiste Daroussin gly = ESCAPE_ERROR; 65*61d06d6bSBaptiste Daroussin *start = ++*end; 66*61d06d6bSBaptiste Daroussin *sz = 0; 67*61d06d6bSBaptiste Daroussin term = '\0'; 68*61d06d6bSBaptiste Daroussin 69*61d06d6bSBaptiste Daroussin switch ((*start)[-1]) { 70*61d06d6bSBaptiste Daroussin /* 71*61d06d6bSBaptiste Daroussin * First the glyphs. There are several different forms of 72*61d06d6bSBaptiste Daroussin * these, but each eventually returns a substring of the glyph 73*61d06d6bSBaptiste Daroussin * name. 74*61d06d6bSBaptiste Daroussin */ 75*61d06d6bSBaptiste Daroussin case '(': 76*61d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 77*61d06d6bSBaptiste Daroussin *sz = 2; 78*61d06d6bSBaptiste Daroussin break; 79*61d06d6bSBaptiste Daroussin case '[': 80*61d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 81*61d06d6bSBaptiste Daroussin term = ']'; 82*61d06d6bSBaptiste Daroussin break; 83*61d06d6bSBaptiste Daroussin case 'C': 84*61d06d6bSBaptiste Daroussin if ('\'' != **start) 85*61d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 86*61d06d6bSBaptiste Daroussin *start = ++*end; 87*61d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 88*61d06d6bSBaptiste Daroussin term = '\''; 89*61d06d6bSBaptiste Daroussin break; 90*61d06d6bSBaptiste Daroussin 91*61d06d6bSBaptiste Daroussin /* 92*61d06d6bSBaptiste Daroussin * Escapes taking no arguments at all. 93*61d06d6bSBaptiste Daroussin */ 94*61d06d6bSBaptiste Daroussin case 'd': 95*61d06d6bSBaptiste Daroussin case 'u': 96*61d06d6bSBaptiste Daroussin case ',': 97*61d06d6bSBaptiste Daroussin case '/': 98*61d06d6bSBaptiste Daroussin return ESCAPE_IGNORE; 99*61d06d6bSBaptiste Daroussin case 'p': 100*61d06d6bSBaptiste Daroussin return ESCAPE_BREAK; 101*61d06d6bSBaptiste Daroussin 102*61d06d6bSBaptiste Daroussin /* 103*61d06d6bSBaptiste Daroussin * The \z escape is supposed to output the following 104*61d06d6bSBaptiste Daroussin * character without advancing the cursor position. 105*61d06d6bSBaptiste Daroussin * Since we are mostly dealing with terminal mode, 106*61d06d6bSBaptiste Daroussin * let us just skip the next character. 107*61d06d6bSBaptiste Daroussin */ 108*61d06d6bSBaptiste Daroussin case 'z': 109*61d06d6bSBaptiste Daroussin return ESCAPE_SKIPCHAR; 110*61d06d6bSBaptiste Daroussin 111*61d06d6bSBaptiste Daroussin /* 112*61d06d6bSBaptiste Daroussin * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where 113*61d06d6bSBaptiste Daroussin * 'X' is the trigger. These have opaque sub-strings. 114*61d06d6bSBaptiste Daroussin */ 115*61d06d6bSBaptiste Daroussin case 'F': 116*61d06d6bSBaptiste Daroussin case 'g': 117*61d06d6bSBaptiste Daroussin case 'k': 118*61d06d6bSBaptiste Daroussin case 'M': 119*61d06d6bSBaptiste Daroussin case 'm': 120*61d06d6bSBaptiste Daroussin case 'n': 121*61d06d6bSBaptiste Daroussin case 'V': 122*61d06d6bSBaptiste Daroussin case 'Y': 123*61d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 124*61d06d6bSBaptiste Daroussin /* FALLTHROUGH */ 125*61d06d6bSBaptiste Daroussin case 'f': 126*61d06d6bSBaptiste Daroussin if (ESCAPE_ERROR == gly) 127*61d06d6bSBaptiste Daroussin gly = ESCAPE_FONT; 128*61d06d6bSBaptiste Daroussin switch (**start) { 129*61d06d6bSBaptiste Daroussin case '(': 130*61d06d6bSBaptiste Daroussin *start = ++*end; 131*61d06d6bSBaptiste Daroussin *sz = 2; 132*61d06d6bSBaptiste Daroussin break; 133*61d06d6bSBaptiste Daroussin case '[': 134*61d06d6bSBaptiste Daroussin *start = ++*end; 135*61d06d6bSBaptiste Daroussin term = ']'; 136*61d06d6bSBaptiste Daroussin break; 137*61d06d6bSBaptiste Daroussin default: 138*61d06d6bSBaptiste Daroussin *sz = 1; 139*61d06d6bSBaptiste Daroussin break; 140*61d06d6bSBaptiste Daroussin } 141*61d06d6bSBaptiste Daroussin break; 142*61d06d6bSBaptiste Daroussin 143*61d06d6bSBaptiste Daroussin /* 144*61d06d6bSBaptiste Daroussin * These escapes are of the form \X'Y', where 'X' is the trigger 145*61d06d6bSBaptiste Daroussin * and 'Y' is any string. These have opaque sub-strings. 146*61d06d6bSBaptiste Daroussin * The \B and \w escapes are handled in roff.c, roff_res(). 147*61d06d6bSBaptiste Daroussin */ 148*61d06d6bSBaptiste Daroussin case 'A': 149*61d06d6bSBaptiste Daroussin case 'b': 150*61d06d6bSBaptiste Daroussin case 'D': 151*61d06d6bSBaptiste Daroussin case 'R': 152*61d06d6bSBaptiste Daroussin case 'X': 153*61d06d6bSBaptiste Daroussin case 'Z': 154*61d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 155*61d06d6bSBaptiste Daroussin /* FALLTHROUGH */ 156*61d06d6bSBaptiste Daroussin case 'o': 157*61d06d6bSBaptiste Daroussin if (**start == '\0') 158*61d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 159*61d06d6bSBaptiste Daroussin if (gly == ESCAPE_ERROR) 160*61d06d6bSBaptiste Daroussin gly = ESCAPE_OVERSTRIKE; 161*61d06d6bSBaptiste Daroussin term = **start; 162*61d06d6bSBaptiste Daroussin *start = ++*end; 163*61d06d6bSBaptiste Daroussin break; 164*61d06d6bSBaptiste Daroussin 165*61d06d6bSBaptiste Daroussin /* 166*61d06d6bSBaptiste Daroussin * These escapes are of the form \X'N', where 'X' is the trigger 167*61d06d6bSBaptiste Daroussin * and 'N' resolves to a numerical expression. 168*61d06d6bSBaptiste Daroussin */ 169*61d06d6bSBaptiste Daroussin case 'h': 170*61d06d6bSBaptiste Daroussin case 'H': 171*61d06d6bSBaptiste Daroussin case 'L': 172*61d06d6bSBaptiste Daroussin case 'l': 173*61d06d6bSBaptiste Daroussin case 'S': 174*61d06d6bSBaptiste Daroussin case 'v': 175*61d06d6bSBaptiste Daroussin case 'x': 176*61d06d6bSBaptiste Daroussin if (strchr(" %&()*+-./0123456789:<=>", **start)) { 177*61d06d6bSBaptiste Daroussin if ('\0' != **start) 178*61d06d6bSBaptiste Daroussin ++*end; 179*61d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 180*61d06d6bSBaptiste Daroussin } 181*61d06d6bSBaptiste Daroussin switch ((*start)[-1]) { 182*61d06d6bSBaptiste Daroussin case 'h': 183*61d06d6bSBaptiste Daroussin gly = ESCAPE_HORIZ; 184*61d06d6bSBaptiste Daroussin break; 185*61d06d6bSBaptiste Daroussin case 'l': 186*61d06d6bSBaptiste Daroussin gly = ESCAPE_HLINE; 187*61d06d6bSBaptiste Daroussin break; 188*61d06d6bSBaptiste Daroussin default: 189*61d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 190*61d06d6bSBaptiste Daroussin break; 191*61d06d6bSBaptiste Daroussin } 192*61d06d6bSBaptiste Daroussin term = **start; 193*61d06d6bSBaptiste Daroussin *start = ++*end; 194*61d06d6bSBaptiste Daroussin break; 195*61d06d6bSBaptiste Daroussin 196*61d06d6bSBaptiste Daroussin /* 197*61d06d6bSBaptiste Daroussin * Special handling for the numbered character escape. 198*61d06d6bSBaptiste Daroussin * XXX Do any other escapes need similar handling? 199*61d06d6bSBaptiste Daroussin */ 200*61d06d6bSBaptiste Daroussin case 'N': 201*61d06d6bSBaptiste Daroussin if ('\0' == **start) 202*61d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 203*61d06d6bSBaptiste Daroussin (*end)++; 204*61d06d6bSBaptiste Daroussin if (isdigit((unsigned char)**start)) { 205*61d06d6bSBaptiste Daroussin *sz = 1; 206*61d06d6bSBaptiste Daroussin return ESCAPE_IGNORE; 207*61d06d6bSBaptiste Daroussin } 208*61d06d6bSBaptiste Daroussin (*start)++; 209*61d06d6bSBaptiste Daroussin while (isdigit((unsigned char)**end)) 210*61d06d6bSBaptiste Daroussin (*end)++; 211*61d06d6bSBaptiste Daroussin *sz = *end - *start; 212*61d06d6bSBaptiste Daroussin if ('\0' != **end) 213*61d06d6bSBaptiste Daroussin (*end)++; 214*61d06d6bSBaptiste Daroussin return ESCAPE_NUMBERED; 215*61d06d6bSBaptiste Daroussin 216*61d06d6bSBaptiste Daroussin /* 217*61d06d6bSBaptiste Daroussin * Sizes get a special category of their own. 218*61d06d6bSBaptiste Daroussin */ 219*61d06d6bSBaptiste Daroussin case 's': 220*61d06d6bSBaptiste Daroussin gly = ESCAPE_IGNORE; 221*61d06d6bSBaptiste Daroussin 222*61d06d6bSBaptiste Daroussin /* See +/- counts as a sign. */ 223*61d06d6bSBaptiste Daroussin if ('+' == **end || '-' == **end || ASCII_HYPH == **end) 224*61d06d6bSBaptiste Daroussin *start = ++*end; 225*61d06d6bSBaptiste Daroussin 226*61d06d6bSBaptiste Daroussin switch (**end) { 227*61d06d6bSBaptiste Daroussin case '(': 228*61d06d6bSBaptiste Daroussin *start = ++*end; 229*61d06d6bSBaptiste Daroussin *sz = 2; 230*61d06d6bSBaptiste Daroussin break; 231*61d06d6bSBaptiste Daroussin case '[': 232*61d06d6bSBaptiste Daroussin *start = ++*end; 233*61d06d6bSBaptiste Daroussin term = ']'; 234*61d06d6bSBaptiste Daroussin break; 235*61d06d6bSBaptiste Daroussin case '\'': 236*61d06d6bSBaptiste Daroussin *start = ++*end; 237*61d06d6bSBaptiste Daroussin term = '\''; 238*61d06d6bSBaptiste Daroussin break; 239*61d06d6bSBaptiste Daroussin case '3': 240*61d06d6bSBaptiste Daroussin case '2': 241*61d06d6bSBaptiste Daroussin case '1': 242*61d06d6bSBaptiste Daroussin *sz = (*end)[-1] == 's' && 243*61d06d6bSBaptiste Daroussin isdigit((unsigned char)(*end)[1]) ? 2 : 1; 244*61d06d6bSBaptiste Daroussin break; 245*61d06d6bSBaptiste Daroussin default: 246*61d06d6bSBaptiste Daroussin *sz = 1; 247*61d06d6bSBaptiste Daroussin break; 248*61d06d6bSBaptiste Daroussin } 249*61d06d6bSBaptiste Daroussin 250*61d06d6bSBaptiste Daroussin break; 251*61d06d6bSBaptiste Daroussin 252*61d06d6bSBaptiste Daroussin /* 253*61d06d6bSBaptiste Daroussin * Anything else is assumed to be a glyph. 254*61d06d6bSBaptiste Daroussin * In this case, pass back the character after the backslash. 255*61d06d6bSBaptiste Daroussin */ 256*61d06d6bSBaptiste Daroussin default: 257*61d06d6bSBaptiste Daroussin gly = ESCAPE_SPECIAL; 258*61d06d6bSBaptiste Daroussin *start = --*end; 259*61d06d6bSBaptiste Daroussin *sz = 1; 260*61d06d6bSBaptiste Daroussin break; 261*61d06d6bSBaptiste Daroussin } 262*61d06d6bSBaptiste Daroussin 263*61d06d6bSBaptiste Daroussin assert(ESCAPE_ERROR != gly); 264*61d06d6bSBaptiste Daroussin 265*61d06d6bSBaptiste Daroussin /* 266*61d06d6bSBaptiste Daroussin * Read up to the terminating character, 267*61d06d6bSBaptiste Daroussin * paying attention to nested escapes. 268*61d06d6bSBaptiste Daroussin */ 269*61d06d6bSBaptiste Daroussin 270*61d06d6bSBaptiste Daroussin if ('\0' != term) { 271*61d06d6bSBaptiste Daroussin while (**end != term) { 272*61d06d6bSBaptiste Daroussin switch (**end) { 273*61d06d6bSBaptiste Daroussin case '\0': 274*61d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 275*61d06d6bSBaptiste Daroussin case '\\': 276*61d06d6bSBaptiste Daroussin (*end)++; 277*61d06d6bSBaptiste Daroussin if (ESCAPE_ERROR == 278*61d06d6bSBaptiste Daroussin mandoc_escape(end, NULL, NULL)) 279*61d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 280*61d06d6bSBaptiste Daroussin break; 281*61d06d6bSBaptiste Daroussin default: 282*61d06d6bSBaptiste Daroussin (*end)++; 283*61d06d6bSBaptiste Daroussin break; 284*61d06d6bSBaptiste Daroussin } 285*61d06d6bSBaptiste Daroussin } 286*61d06d6bSBaptiste Daroussin *sz = (*end)++ - *start; 287*61d06d6bSBaptiste Daroussin } else { 288*61d06d6bSBaptiste Daroussin assert(*sz > 0); 289*61d06d6bSBaptiste Daroussin if ((size_t)*sz > strlen(*start)) 290*61d06d6bSBaptiste Daroussin return ESCAPE_ERROR; 291*61d06d6bSBaptiste Daroussin *end += *sz; 292*61d06d6bSBaptiste Daroussin } 293*61d06d6bSBaptiste Daroussin 294*61d06d6bSBaptiste Daroussin /* Run post-processors. */ 295*61d06d6bSBaptiste Daroussin 296*61d06d6bSBaptiste Daroussin switch (gly) { 297*61d06d6bSBaptiste Daroussin case ESCAPE_FONT: 298*61d06d6bSBaptiste Daroussin if (2 == *sz) { 299*61d06d6bSBaptiste Daroussin if ('C' == **start) { 300*61d06d6bSBaptiste Daroussin /* 301*61d06d6bSBaptiste Daroussin * Treat constant-width font modes 302*61d06d6bSBaptiste Daroussin * just like regular font modes. 303*61d06d6bSBaptiste Daroussin */ 304*61d06d6bSBaptiste Daroussin (*start)++; 305*61d06d6bSBaptiste Daroussin (*sz)--; 306*61d06d6bSBaptiste Daroussin } else { 307*61d06d6bSBaptiste Daroussin if ('B' == (*start)[0] && 'I' == (*start)[1]) 308*61d06d6bSBaptiste Daroussin gly = ESCAPE_FONTBI; 309*61d06d6bSBaptiste Daroussin break; 310*61d06d6bSBaptiste Daroussin } 311*61d06d6bSBaptiste Daroussin } else if (1 != *sz) 312*61d06d6bSBaptiste Daroussin break; 313*61d06d6bSBaptiste Daroussin 314*61d06d6bSBaptiste Daroussin switch (**start) { 315*61d06d6bSBaptiste Daroussin case '3': 316*61d06d6bSBaptiste Daroussin case 'B': 317*61d06d6bSBaptiste Daroussin gly = ESCAPE_FONTBOLD; 318*61d06d6bSBaptiste Daroussin break; 319*61d06d6bSBaptiste Daroussin case '2': 320*61d06d6bSBaptiste Daroussin case 'I': 321*61d06d6bSBaptiste Daroussin gly = ESCAPE_FONTITALIC; 322*61d06d6bSBaptiste Daroussin break; 323*61d06d6bSBaptiste Daroussin case 'P': 324*61d06d6bSBaptiste Daroussin gly = ESCAPE_FONTPREV; 325*61d06d6bSBaptiste Daroussin break; 326*61d06d6bSBaptiste Daroussin case '1': 327*61d06d6bSBaptiste Daroussin case 'R': 328*61d06d6bSBaptiste Daroussin gly = ESCAPE_FONTROMAN; 329*61d06d6bSBaptiste Daroussin break; 330*61d06d6bSBaptiste Daroussin } 331*61d06d6bSBaptiste Daroussin break; 332*61d06d6bSBaptiste Daroussin case ESCAPE_SPECIAL: 333*61d06d6bSBaptiste Daroussin if (1 == *sz && 'c' == **start) 334*61d06d6bSBaptiste Daroussin gly = ESCAPE_NOSPACE; 335*61d06d6bSBaptiste Daroussin /* 336*61d06d6bSBaptiste Daroussin * Unicode escapes are defined in groff as \[u0000] 337*61d06d6bSBaptiste Daroussin * to \[u10FFFF], where the contained value must be 338*61d06d6bSBaptiste Daroussin * a valid Unicode codepoint. Here, however, only 339*61d06d6bSBaptiste Daroussin * check the length and range. 340*61d06d6bSBaptiste Daroussin */ 341*61d06d6bSBaptiste Daroussin if (**start != 'u' || *sz < 5 || *sz > 7) 342*61d06d6bSBaptiste Daroussin break; 343*61d06d6bSBaptiste Daroussin if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) 344*61d06d6bSBaptiste Daroussin break; 345*61d06d6bSBaptiste Daroussin if (*sz == 6 && (*start)[1] == '0') 346*61d06d6bSBaptiste Daroussin break; 347*61d06d6bSBaptiste Daroussin if (*sz == 5 && (*start)[1] == 'D' && 348*61d06d6bSBaptiste Daroussin strchr("89ABCDEF", (*start)[2]) != NULL) 349*61d06d6bSBaptiste Daroussin break; 350*61d06d6bSBaptiste Daroussin if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") 351*61d06d6bSBaptiste Daroussin + 1 == *sz) 352*61d06d6bSBaptiste Daroussin gly = ESCAPE_UNICODE; 353*61d06d6bSBaptiste Daroussin break; 354*61d06d6bSBaptiste Daroussin default: 355*61d06d6bSBaptiste Daroussin break; 356*61d06d6bSBaptiste Daroussin } 357*61d06d6bSBaptiste Daroussin 358*61d06d6bSBaptiste Daroussin return gly; 359*61d06d6bSBaptiste Daroussin } 360*61d06d6bSBaptiste Daroussin 361*61d06d6bSBaptiste Daroussin /* 362*61d06d6bSBaptiste Daroussin * Parse a quoted or unquoted roff-style request or macro argument. 363*61d06d6bSBaptiste Daroussin * Return a pointer to the parsed argument, which is either the original 364*61d06d6bSBaptiste Daroussin * pointer or advanced by one byte in case the argument is quoted. 365*61d06d6bSBaptiste Daroussin * NUL-terminate the argument in place. 366*61d06d6bSBaptiste Daroussin * Collapse pairs of quotes inside quoted arguments. 367*61d06d6bSBaptiste Daroussin * Advance the argument pointer to the next argument, 368*61d06d6bSBaptiste Daroussin * or to the NUL byte terminating the argument line. 369*61d06d6bSBaptiste Daroussin */ 370*61d06d6bSBaptiste Daroussin char * 371*61d06d6bSBaptiste Daroussin mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) 372*61d06d6bSBaptiste Daroussin { 373*61d06d6bSBaptiste Daroussin char *start, *cp; 374*61d06d6bSBaptiste Daroussin int quoted, pairs, white; 375*61d06d6bSBaptiste Daroussin 376*61d06d6bSBaptiste Daroussin /* Quoting can only start with a new word. */ 377*61d06d6bSBaptiste Daroussin start = *cpp; 378*61d06d6bSBaptiste Daroussin quoted = 0; 379*61d06d6bSBaptiste Daroussin if ('"' == *start) { 380*61d06d6bSBaptiste Daroussin quoted = 1; 381*61d06d6bSBaptiste Daroussin start++; 382*61d06d6bSBaptiste Daroussin } 383*61d06d6bSBaptiste Daroussin 384*61d06d6bSBaptiste Daroussin pairs = 0; 385*61d06d6bSBaptiste Daroussin white = 0; 386*61d06d6bSBaptiste Daroussin for (cp = start; '\0' != *cp; cp++) { 387*61d06d6bSBaptiste Daroussin 388*61d06d6bSBaptiste Daroussin /* 389*61d06d6bSBaptiste Daroussin * Move the following text left 390*61d06d6bSBaptiste Daroussin * after quoted quotes and after "\\" and "\t". 391*61d06d6bSBaptiste Daroussin */ 392*61d06d6bSBaptiste Daroussin if (pairs) 393*61d06d6bSBaptiste Daroussin cp[-pairs] = cp[0]; 394*61d06d6bSBaptiste Daroussin 395*61d06d6bSBaptiste Daroussin if ('\\' == cp[0]) { 396*61d06d6bSBaptiste Daroussin /* 397*61d06d6bSBaptiste Daroussin * In copy mode, translate double to single 398*61d06d6bSBaptiste Daroussin * backslashes and backslash-t to literal tabs. 399*61d06d6bSBaptiste Daroussin */ 400*61d06d6bSBaptiste Daroussin switch (cp[1]) { 401*61d06d6bSBaptiste Daroussin case 't': 402*61d06d6bSBaptiste Daroussin cp[0] = '\t'; 403*61d06d6bSBaptiste Daroussin /* FALLTHROUGH */ 404*61d06d6bSBaptiste Daroussin case '\\': 405*61d06d6bSBaptiste Daroussin pairs++; 406*61d06d6bSBaptiste Daroussin cp++; 407*61d06d6bSBaptiste Daroussin break; 408*61d06d6bSBaptiste Daroussin case ' ': 409*61d06d6bSBaptiste Daroussin /* Skip escaped blanks. */ 410*61d06d6bSBaptiste Daroussin if (0 == quoted) 411*61d06d6bSBaptiste Daroussin cp++; 412*61d06d6bSBaptiste Daroussin break; 413*61d06d6bSBaptiste Daroussin default: 414*61d06d6bSBaptiste Daroussin break; 415*61d06d6bSBaptiste Daroussin } 416*61d06d6bSBaptiste Daroussin } else if (0 == quoted) { 417*61d06d6bSBaptiste Daroussin if (' ' == cp[0]) { 418*61d06d6bSBaptiste Daroussin /* Unescaped blanks end unquoted args. */ 419*61d06d6bSBaptiste Daroussin white = 1; 420*61d06d6bSBaptiste Daroussin break; 421*61d06d6bSBaptiste Daroussin } 422*61d06d6bSBaptiste Daroussin } else if ('"' == cp[0]) { 423*61d06d6bSBaptiste Daroussin if ('"' == cp[1]) { 424*61d06d6bSBaptiste Daroussin /* Quoted quotes collapse. */ 425*61d06d6bSBaptiste Daroussin pairs++; 426*61d06d6bSBaptiste Daroussin cp++; 427*61d06d6bSBaptiste Daroussin } else { 428*61d06d6bSBaptiste Daroussin /* Unquoted quotes end quoted args. */ 429*61d06d6bSBaptiste Daroussin quoted = 2; 430*61d06d6bSBaptiste Daroussin break; 431*61d06d6bSBaptiste Daroussin } 432*61d06d6bSBaptiste Daroussin } 433*61d06d6bSBaptiste Daroussin } 434*61d06d6bSBaptiste Daroussin 435*61d06d6bSBaptiste Daroussin /* Quoted argument without a closing quote. */ 436*61d06d6bSBaptiste Daroussin if (1 == quoted) 437*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL); 438*61d06d6bSBaptiste Daroussin 439*61d06d6bSBaptiste Daroussin /* NUL-terminate this argument and move to the next one. */ 440*61d06d6bSBaptiste Daroussin if (pairs) 441*61d06d6bSBaptiste Daroussin cp[-pairs] = '\0'; 442*61d06d6bSBaptiste Daroussin if ('\0' != *cp) { 443*61d06d6bSBaptiste Daroussin *cp++ = '\0'; 444*61d06d6bSBaptiste Daroussin while (' ' == *cp) 445*61d06d6bSBaptiste Daroussin cp++; 446*61d06d6bSBaptiste Daroussin } 447*61d06d6bSBaptiste Daroussin *pos += (int)(cp - start) + (quoted ? 1 : 0); 448*61d06d6bSBaptiste Daroussin *cpp = cp; 449*61d06d6bSBaptiste Daroussin 450*61d06d6bSBaptiste Daroussin if ('\0' == *cp && (white || ' ' == cp[-1])) 451*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL); 452*61d06d6bSBaptiste Daroussin 453*61d06d6bSBaptiste Daroussin return start; 454*61d06d6bSBaptiste Daroussin } 455*61d06d6bSBaptiste Daroussin 456*61d06d6bSBaptiste Daroussin static int 457*61d06d6bSBaptiste Daroussin a2time(time_t *t, const char *fmt, const char *p) 458*61d06d6bSBaptiste Daroussin { 459*61d06d6bSBaptiste Daroussin struct tm tm; 460*61d06d6bSBaptiste Daroussin char *pp; 461*61d06d6bSBaptiste Daroussin 462*61d06d6bSBaptiste Daroussin memset(&tm, 0, sizeof(struct tm)); 463*61d06d6bSBaptiste Daroussin 464*61d06d6bSBaptiste Daroussin pp = NULL; 465*61d06d6bSBaptiste Daroussin #if HAVE_STRPTIME 466*61d06d6bSBaptiste Daroussin pp = strptime(p, fmt, &tm); 467*61d06d6bSBaptiste Daroussin #endif 468*61d06d6bSBaptiste Daroussin if (NULL != pp && '\0' == *pp) { 469*61d06d6bSBaptiste Daroussin *t = mktime(&tm); 470*61d06d6bSBaptiste Daroussin return 1; 471*61d06d6bSBaptiste Daroussin } 472*61d06d6bSBaptiste Daroussin 473*61d06d6bSBaptiste Daroussin return 0; 474*61d06d6bSBaptiste Daroussin } 475*61d06d6bSBaptiste Daroussin 476*61d06d6bSBaptiste Daroussin static char * 477*61d06d6bSBaptiste Daroussin time2a(time_t t) 478*61d06d6bSBaptiste Daroussin { 479*61d06d6bSBaptiste Daroussin struct tm *tm; 480*61d06d6bSBaptiste Daroussin char *buf, *p; 481*61d06d6bSBaptiste Daroussin size_t ssz; 482*61d06d6bSBaptiste Daroussin int isz; 483*61d06d6bSBaptiste Daroussin 484*61d06d6bSBaptiste Daroussin tm = localtime(&t); 485*61d06d6bSBaptiste Daroussin if (tm == NULL) 486*61d06d6bSBaptiste Daroussin return NULL; 487*61d06d6bSBaptiste Daroussin 488*61d06d6bSBaptiste Daroussin /* 489*61d06d6bSBaptiste Daroussin * Reserve space: 490*61d06d6bSBaptiste Daroussin * up to 9 characters for the month (September) + blank 491*61d06d6bSBaptiste Daroussin * up to 2 characters for the day + comma + blank 492*61d06d6bSBaptiste Daroussin * 4 characters for the year and a terminating '\0' 493*61d06d6bSBaptiste Daroussin */ 494*61d06d6bSBaptiste Daroussin 495*61d06d6bSBaptiste Daroussin p = buf = mandoc_malloc(10 + 4 + 4 + 1); 496*61d06d6bSBaptiste Daroussin 497*61d06d6bSBaptiste Daroussin if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) 498*61d06d6bSBaptiste Daroussin goto fail; 499*61d06d6bSBaptiste Daroussin p += (int)ssz; 500*61d06d6bSBaptiste Daroussin 501*61d06d6bSBaptiste Daroussin /* 502*61d06d6bSBaptiste Daroussin * The output format is just "%d" here, not "%2d" or "%02d". 503*61d06d6bSBaptiste Daroussin * That's also the reason why we can't just format the 504*61d06d6bSBaptiste Daroussin * date as a whole with "%B %e, %Y" or "%B %d, %Y". 505*61d06d6bSBaptiste Daroussin * Besides, the present approach is less prone to buffer 506*61d06d6bSBaptiste Daroussin * overflows, in case anybody should ever introduce the bug 507*61d06d6bSBaptiste Daroussin * of looking at LC_TIME. 508*61d06d6bSBaptiste Daroussin */ 509*61d06d6bSBaptiste Daroussin 510*61d06d6bSBaptiste Daroussin if ((isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday)) == -1) 511*61d06d6bSBaptiste Daroussin goto fail; 512*61d06d6bSBaptiste Daroussin p += isz; 513*61d06d6bSBaptiste Daroussin 514*61d06d6bSBaptiste Daroussin if (strftime(p, 4 + 1, "%Y", tm) == 0) 515*61d06d6bSBaptiste Daroussin goto fail; 516*61d06d6bSBaptiste Daroussin return buf; 517*61d06d6bSBaptiste Daroussin 518*61d06d6bSBaptiste Daroussin fail: 519*61d06d6bSBaptiste Daroussin free(buf); 520*61d06d6bSBaptiste Daroussin return NULL; 521*61d06d6bSBaptiste Daroussin } 522*61d06d6bSBaptiste Daroussin 523*61d06d6bSBaptiste Daroussin char * 524*61d06d6bSBaptiste Daroussin mandoc_normdate(struct roff_man *man, char *in, int ln, int pos) 525*61d06d6bSBaptiste Daroussin { 526*61d06d6bSBaptiste Daroussin char *cp; 527*61d06d6bSBaptiste Daroussin time_t t; 528*61d06d6bSBaptiste Daroussin 529*61d06d6bSBaptiste Daroussin /* No date specified: use today's date. */ 530*61d06d6bSBaptiste Daroussin 531*61d06d6bSBaptiste Daroussin if (in == NULL || *in == '\0' || strcmp(in, "$" "Mdocdate$") == 0) { 532*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_MISSING, man->parse, ln, pos, NULL); 533*61d06d6bSBaptiste Daroussin return time2a(time(NULL)); 534*61d06d6bSBaptiste Daroussin } 535*61d06d6bSBaptiste Daroussin 536*61d06d6bSBaptiste Daroussin /* Valid mdoc(7) date format. */ 537*61d06d6bSBaptiste Daroussin 538*61d06d6bSBaptiste Daroussin if (a2time(&t, "$" "Mdocdate: %b %d %Y $", in) || 539*61d06d6bSBaptiste Daroussin a2time(&t, "%b %d, %Y", in)) { 540*61d06d6bSBaptiste Daroussin cp = time2a(t); 541*61d06d6bSBaptiste Daroussin if (t > time(NULL) + 86400) 542*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse, 543*61d06d6bSBaptiste Daroussin ln, pos, cp); 544*61d06d6bSBaptiste Daroussin else if (*in != '$' && strcmp(in, cp) != 0) 545*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_NORM, man->parse, 546*61d06d6bSBaptiste Daroussin ln, pos, cp); 547*61d06d6bSBaptiste Daroussin return cp; 548*61d06d6bSBaptiste Daroussin } 549*61d06d6bSBaptiste Daroussin 550*61d06d6bSBaptiste Daroussin /* In man(7), do not warn about the legacy format. */ 551*61d06d6bSBaptiste Daroussin 552*61d06d6bSBaptiste Daroussin if (a2time(&t, "%Y-%m-%d", in) == 0) 553*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_BAD, man->parse, ln, pos, in); 554*61d06d6bSBaptiste Daroussin else if (t > time(NULL) + 86400) 555*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_DATE_FUTURE, man->parse, ln, pos, in); 556*61d06d6bSBaptiste Daroussin else if (man->macroset == MACROSET_MDOC) 557*61d06d6bSBaptiste Daroussin mandoc_vmsg(MANDOCERR_DATE_LEGACY, man->parse, 558*61d06d6bSBaptiste Daroussin ln, pos, "Dd %s", in); 559*61d06d6bSBaptiste Daroussin 560*61d06d6bSBaptiste Daroussin /* Use any non-mdoc(7) date verbatim. */ 561*61d06d6bSBaptiste Daroussin 562*61d06d6bSBaptiste Daroussin return mandoc_strdup(in); 563*61d06d6bSBaptiste Daroussin } 564*61d06d6bSBaptiste Daroussin 565*61d06d6bSBaptiste Daroussin int 566*61d06d6bSBaptiste Daroussin mandoc_eos(const char *p, size_t sz) 567*61d06d6bSBaptiste Daroussin { 568*61d06d6bSBaptiste Daroussin const char *q; 569*61d06d6bSBaptiste Daroussin int enclosed, found; 570*61d06d6bSBaptiste Daroussin 571*61d06d6bSBaptiste Daroussin if (0 == sz) 572*61d06d6bSBaptiste Daroussin return 0; 573*61d06d6bSBaptiste Daroussin 574*61d06d6bSBaptiste Daroussin /* 575*61d06d6bSBaptiste Daroussin * End-of-sentence recognition must include situations where 576*61d06d6bSBaptiste Daroussin * some symbols, such as `)', allow prior EOS punctuation to 577*61d06d6bSBaptiste Daroussin * propagate outward. 578*61d06d6bSBaptiste Daroussin */ 579*61d06d6bSBaptiste Daroussin 580*61d06d6bSBaptiste Daroussin enclosed = found = 0; 581*61d06d6bSBaptiste Daroussin for (q = p + (int)sz - 1; q >= p; q--) { 582*61d06d6bSBaptiste Daroussin switch (*q) { 583*61d06d6bSBaptiste Daroussin case '\"': 584*61d06d6bSBaptiste Daroussin case '\'': 585*61d06d6bSBaptiste Daroussin case ']': 586*61d06d6bSBaptiste Daroussin case ')': 587*61d06d6bSBaptiste Daroussin if (0 == found) 588*61d06d6bSBaptiste Daroussin enclosed = 1; 589*61d06d6bSBaptiste Daroussin break; 590*61d06d6bSBaptiste Daroussin case '.': 591*61d06d6bSBaptiste Daroussin case '!': 592*61d06d6bSBaptiste Daroussin case '?': 593*61d06d6bSBaptiste Daroussin found = 1; 594*61d06d6bSBaptiste Daroussin break; 595*61d06d6bSBaptiste Daroussin default: 596*61d06d6bSBaptiste Daroussin return found && 597*61d06d6bSBaptiste Daroussin (!enclosed || isalnum((unsigned char)*q)); 598*61d06d6bSBaptiste Daroussin } 599*61d06d6bSBaptiste Daroussin } 600*61d06d6bSBaptiste Daroussin 601*61d06d6bSBaptiste Daroussin return found && !enclosed; 602*61d06d6bSBaptiste Daroussin } 603*61d06d6bSBaptiste Daroussin 604*61d06d6bSBaptiste Daroussin /* 605*61d06d6bSBaptiste Daroussin * Convert a string to a long that may not be <0. 606*61d06d6bSBaptiste Daroussin * If the string is invalid, or is less than 0, return -1. 607*61d06d6bSBaptiste Daroussin */ 608*61d06d6bSBaptiste Daroussin int 609*61d06d6bSBaptiste Daroussin mandoc_strntoi(const char *p, size_t sz, int base) 610*61d06d6bSBaptiste Daroussin { 611*61d06d6bSBaptiste Daroussin char buf[32]; 612*61d06d6bSBaptiste Daroussin char *ep; 613*61d06d6bSBaptiste Daroussin long v; 614*61d06d6bSBaptiste Daroussin 615*61d06d6bSBaptiste Daroussin if (sz > 31) 616*61d06d6bSBaptiste Daroussin return -1; 617*61d06d6bSBaptiste Daroussin 618*61d06d6bSBaptiste Daroussin memcpy(buf, p, sz); 619*61d06d6bSBaptiste Daroussin buf[(int)sz] = '\0'; 620*61d06d6bSBaptiste Daroussin 621*61d06d6bSBaptiste Daroussin errno = 0; 622*61d06d6bSBaptiste Daroussin v = strtol(buf, &ep, base); 623*61d06d6bSBaptiste Daroussin 624*61d06d6bSBaptiste Daroussin if (buf[0] == '\0' || *ep != '\0') 625*61d06d6bSBaptiste Daroussin return -1; 626*61d06d6bSBaptiste Daroussin 627*61d06d6bSBaptiste Daroussin if (v > INT_MAX) 628*61d06d6bSBaptiste Daroussin v = INT_MAX; 629*61d06d6bSBaptiste Daroussin if (v < INT_MIN) 630*61d06d6bSBaptiste Daroussin v = INT_MIN; 631*61d06d6bSBaptiste Daroussin 632*61d06d6bSBaptiste Daroussin return (int)v; 633*61d06d6bSBaptiste Daroussin } 634