1*260e9a87SYuri Pankov /* $Id: mandoc.c,v 1.92 2015/02/20 23:55:10 schwarze Exp $ */ 295c635efSGarrett D'Amore /* 3*260e9a87SYuri Pankov * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4*260e9a87SYuri Pankov * Copyright (c) 2011-2015 Ingo Schwarze <schwarze@openbsd.org> 595c635efSGarrett D'Amore * 695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 995c635efSGarrett D'Amore * 1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1795c635efSGarrett D'Amore */ 1895c635efSGarrett D'Amore #include "config.h" 1995c635efSGarrett D'Amore 2095c635efSGarrett D'Amore #include <sys/types.h> 2195c635efSGarrett D'Amore 2295c635efSGarrett D'Amore #include <assert.h> 2395c635efSGarrett D'Amore #include <ctype.h> 2495c635efSGarrett D'Amore #include <errno.h> 2595c635efSGarrett D'Amore #include <limits.h> 2695c635efSGarrett D'Amore #include <stdlib.h> 2795c635efSGarrett D'Amore #include <stdio.h> 2895c635efSGarrett D'Amore #include <string.h> 2995c635efSGarrett D'Amore #include <time.h> 3095c635efSGarrett D'Amore 3195c635efSGarrett D'Amore #include "mandoc.h" 32*260e9a87SYuri Pankov #include "mandoc_aux.h" 3395c635efSGarrett D'Amore #include "libmandoc.h" 3495c635efSGarrett D'Amore 3595c635efSGarrett D'Amore #define DATESIZE 32 3695c635efSGarrett D'Amore 3795c635efSGarrett D'Amore static int a2time(time_t *, const char *, const char *); 3895c635efSGarrett D'Amore static char *time2a(time_t); 3995c635efSGarrett D'Amore 4095c635efSGarrett D'Amore 4195c635efSGarrett D'Amore enum mandoc_esc 4295c635efSGarrett D'Amore mandoc_escape(const char **end, const char **start, int *sz) 4395c635efSGarrett D'Amore { 44698f87a4SGarrett D'Amore const char *local_start; 45698f87a4SGarrett D'Amore int local_sz; 46698f87a4SGarrett D'Amore char term; 4795c635efSGarrett D'Amore enum mandoc_esc gly; 4895c635efSGarrett D'Amore 49698f87a4SGarrett D'Amore /* 50698f87a4SGarrett D'Amore * When the caller doesn't provide return storage, 51698f87a4SGarrett D'Amore * use local storage. 52698f87a4SGarrett D'Amore */ 5395c635efSGarrett D'Amore 54698f87a4SGarrett D'Amore if (NULL == start) 55698f87a4SGarrett D'Amore start = &local_start; 56698f87a4SGarrett D'Amore if (NULL == sz) 57698f87a4SGarrett D'Amore sz = &local_sz; 58698f87a4SGarrett D'Amore 59698f87a4SGarrett D'Amore /* 60698f87a4SGarrett D'Amore * Beyond the backslash, at least one input character 61698f87a4SGarrett D'Amore * is part of the escape sequence. With one exception 62698f87a4SGarrett D'Amore * (see below), that character won't be returned. 63698f87a4SGarrett D'Amore */ 64698f87a4SGarrett D'Amore 65698f87a4SGarrett D'Amore gly = ESCAPE_ERROR; 66698f87a4SGarrett D'Amore *start = ++*end; 67698f87a4SGarrett D'Amore *sz = 0; 68698f87a4SGarrett D'Amore term = '\0'; 69698f87a4SGarrett D'Amore 70698f87a4SGarrett D'Amore switch ((*start)[-1]) { 7195c635efSGarrett D'Amore /* 7295c635efSGarrett D'Amore * First the glyphs. There are several different forms of 7395c635efSGarrett D'Amore * these, but each eventually returns a substring of the glyph 7495c635efSGarrett D'Amore * name. 7595c635efSGarrett D'Amore */ 76*260e9a87SYuri Pankov case '(': 7795c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 78698f87a4SGarrett D'Amore *sz = 2; 7995c635efSGarrett D'Amore break; 80*260e9a87SYuri Pankov case '[': 8195c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 8295c635efSGarrett D'Amore term = ']'; 8395c635efSGarrett D'Amore break; 84*260e9a87SYuri Pankov case 'C': 85698f87a4SGarrett D'Amore if ('\'' != **start) 8695c635efSGarrett D'Amore return(ESCAPE_ERROR); 87698f87a4SGarrett D'Amore *start = ++*end; 8895c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 8995c635efSGarrett D'Amore term = '\''; 9095c635efSGarrett D'Amore break; 9195c635efSGarrett D'Amore 9295c635efSGarrett D'Amore /* 93698f87a4SGarrett D'Amore * Escapes taking no arguments at all. 94698f87a4SGarrett D'Amore */ 95*260e9a87SYuri Pankov case 'd': 96698f87a4SGarrett D'Amore /* FALLTHROUGH */ 97*260e9a87SYuri Pankov case 'u': 98698f87a4SGarrett D'Amore return(ESCAPE_IGNORE); 99698f87a4SGarrett D'Amore 100698f87a4SGarrett D'Amore /* 101698f87a4SGarrett D'Amore * The \z escape is supposed to output the following 102698f87a4SGarrett D'Amore * character without advancing the cursor position. 103698f87a4SGarrett D'Amore * Since we are mostly dealing with terminal mode, 104698f87a4SGarrett D'Amore * let us just skip the next character. 105698f87a4SGarrett D'Amore */ 106*260e9a87SYuri Pankov case 'z': 107698f87a4SGarrett D'Amore return(ESCAPE_SKIPCHAR); 108698f87a4SGarrett D'Amore 109698f87a4SGarrett D'Amore /* 11095c635efSGarrett D'Amore * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where 11195c635efSGarrett D'Amore * 'X' is the trigger. These have opaque sub-strings. 11295c635efSGarrett D'Amore */ 113*260e9a87SYuri Pankov case 'F': 11495c635efSGarrett D'Amore /* FALLTHROUGH */ 115*260e9a87SYuri Pankov case 'g': 11695c635efSGarrett D'Amore /* FALLTHROUGH */ 117*260e9a87SYuri Pankov case 'k': 11895c635efSGarrett D'Amore /* FALLTHROUGH */ 119*260e9a87SYuri Pankov case 'M': 12095c635efSGarrett D'Amore /* FALLTHROUGH */ 121*260e9a87SYuri Pankov case 'm': 12295c635efSGarrett D'Amore /* FALLTHROUGH */ 123*260e9a87SYuri Pankov case 'n': 12495c635efSGarrett D'Amore /* FALLTHROUGH */ 125*260e9a87SYuri Pankov case 'V': 12695c635efSGarrett D'Amore /* FALLTHROUGH */ 127*260e9a87SYuri Pankov case 'Y': 12895c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 12995c635efSGarrett D'Amore /* FALLTHROUGH */ 130*260e9a87SYuri Pankov case 'f': 13195c635efSGarrett D'Amore if (ESCAPE_ERROR == gly) 13295c635efSGarrett D'Amore gly = ESCAPE_FONT; 133698f87a4SGarrett D'Amore switch (**start) { 134*260e9a87SYuri Pankov case '(': 135698f87a4SGarrett D'Amore *start = ++*end; 136698f87a4SGarrett D'Amore *sz = 2; 13795c635efSGarrett D'Amore break; 138*260e9a87SYuri Pankov case '[': 139698f87a4SGarrett D'Amore *start = ++*end; 14095c635efSGarrett D'Amore term = ']'; 14195c635efSGarrett D'Amore break; 14295c635efSGarrett D'Amore default: 143698f87a4SGarrett D'Amore *sz = 1; 14495c635efSGarrett D'Amore break; 14595c635efSGarrett D'Amore } 14695c635efSGarrett D'Amore break; 14795c635efSGarrett D'Amore 14895c635efSGarrett D'Amore /* 14995c635efSGarrett D'Amore * These escapes are of the form \X'Y', where 'X' is the trigger 15095c635efSGarrett D'Amore * and 'Y' is any string. These have opaque sub-strings. 151*260e9a87SYuri Pankov * The \B and \w escapes are handled in roff.c, roff_res(). 15295c635efSGarrett D'Amore */ 153*260e9a87SYuri Pankov case 'A': 15495c635efSGarrett D'Amore /* FALLTHROUGH */ 155*260e9a87SYuri Pankov case 'b': 15695c635efSGarrett D'Amore /* FALLTHROUGH */ 157*260e9a87SYuri Pankov case 'D': 158698f87a4SGarrett D'Amore /* FALLTHROUGH */ 159*260e9a87SYuri Pankov case 'R': 16095c635efSGarrett D'Amore /* FALLTHROUGH */ 161*260e9a87SYuri Pankov case 'X': 16295c635efSGarrett D'Amore /* FALLTHROUGH */ 163*260e9a87SYuri Pankov case 'Z': 16495c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 165*260e9a87SYuri Pankov /* FALLTHROUGH */ 166*260e9a87SYuri Pankov case 'o': 167*260e9a87SYuri Pankov if (**start == '\0') 168*260e9a87SYuri Pankov return(ESCAPE_ERROR); 169*260e9a87SYuri Pankov if (gly == ESCAPE_ERROR) 170*260e9a87SYuri Pankov gly = ESCAPE_OVERSTRIKE; 171*260e9a87SYuri Pankov term = **start; 172698f87a4SGarrett D'Amore *start = ++*end; 17395c635efSGarrett D'Amore break; 17495c635efSGarrett D'Amore 17595c635efSGarrett D'Amore /* 17695c635efSGarrett D'Amore * These escapes are of the form \X'N', where 'X' is the trigger 17795c635efSGarrett D'Amore * and 'N' resolves to a numerical expression. 17895c635efSGarrett D'Amore */ 179*260e9a87SYuri Pankov case 'h': 18095c635efSGarrett D'Amore /* FALLTHROUGH */ 181*260e9a87SYuri Pankov case 'H': 18295c635efSGarrett D'Amore /* FALLTHROUGH */ 183*260e9a87SYuri Pankov case 'L': 18495c635efSGarrett D'Amore /* FALLTHROUGH */ 185*260e9a87SYuri Pankov case 'l': 18695c635efSGarrett D'Amore /* FALLTHROUGH */ 187*260e9a87SYuri Pankov case 'S': 18895c635efSGarrett D'Amore /* FALLTHROUGH */ 189*260e9a87SYuri Pankov case 'v': 19095c635efSGarrett D'Amore /* FALLTHROUGH */ 191*260e9a87SYuri Pankov case 'x': 192*260e9a87SYuri Pankov if (strchr(" %&()*+-./0123456789:<=>", **start)) { 193*260e9a87SYuri Pankov if ('\0' != **start) 194*260e9a87SYuri Pankov ++*end; 19595c635efSGarrett D'Amore return(ESCAPE_ERROR); 196*260e9a87SYuri Pankov } 197698f87a4SGarrett D'Amore gly = ESCAPE_IGNORE; 198*260e9a87SYuri Pankov term = **start; 199698f87a4SGarrett D'Amore *start = ++*end; 20095c635efSGarrett D'Amore break; 20195c635efSGarrett D'Amore 20295c635efSGarrett D'Amore /* 20395c635efSGarrett D'Amore * Special handling for the numbered character escape. 20495c635efSGarrett D'Amore * XXX Do any other escapes need similar handling? 20595c635efSGarrett D'Amore */ 206*260e9a87SYuri Pankov case 'N': 207698f87a4SGarrett D'Amore if ('\0' == **start) 20895c635efSGarrett D'Amore return(ESCAPE_ERROR); 209698f87a4SGarrett D'Amore (*end)++; 210698f87a4SGarrett D'Amore if (isdigit((unsigned char)**start)) { 211698f87a4SGarrett D'Amore *sz = 1; 21295c635efSGarrett D'Amore return(ESCAPE_IGNORE); 213698f87a4SGarrett D'Amore } 214698f87a4SGarrett D'Amore (*start)++; 21595c635efSGarrett D'Amore while (isdigit((unsigned char)**end)) 21695c635efSGarrett D'Amore (*end)++; 217698f87a4SGarrett D'Amore *sz = *end - *start; 21895c635efSGarrett D'Amore if ('\0' != **end) 21995c635efSGarrett D'Amore (*end)++; 22095c635efSGarrett D'Amore return(ESCAPE_NUMBERED); 22195c635efSGarrett D'Amore 22295c635efSGarrett D'Amore /* 22395c635efSGarrett D'Amore * Sizes get a special category of their own. 22495c635efSGarrett D'Amore */ 225*260e9a87SYuri Pankov case 's': 22695c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 22795c635efSGarrett D'Amore 22895c635efSGarrett D'Amore /* See +/- counts as a sign. */ 229698f87a4SGarrett D'Amore if ('+' == **end || '-' == **end || ASCII_HYPH == **end) 230*260e9a87SYuri Pankov *start = ++*end; 23195c635efSGarrett D'Amore 232698f87a4SGarrett D'Amore switch (**end) { 233*260e9a87SYuri Pankov case '(': 234698f87a4SGarrett D'Amore *start = ++*end; 235698f87a4SGarrett D'Amore *sz = 2; 23695c635efSGarrett D'Amore break; 237*260e9a87SYuri Pankov case '[': 238698f87a4SGarrett D'Amore *start = ++*end; 239698f87a4SGarrett D'Amore term = ']'; 24095c635efSGarrett D'Amore break; 241*260e9a87SYuri Pankov case '\'': 242698f87a4SGarrett D'Amore *start = ++*end; 243698f87a4SGarrett D'Amore term = '\''; 24495c635efSGarrett D'Amore break; 245*260e9a87SYuri Pankov case '3': 246*260e9a87SYuri Pankov /* FALLTHROUGH */ 247*260e9a87SYuri Pankov case '2': 248*260e9a87SYuri Pankov /* FALLTHROUGH */ 249*260e9a87SYuri Pankov case '1': 250*260e9a87SYuri Pankov *sz = (*end)[-1] == 's' && 251*260e9a87SYuri Pankov isdigit((unsigned char)(*end)[1]) ? 2 : 1; 252*260e9a87SYuri Pankov break; 25395c635efSGarrett D'Amore default: 254698f87a4SGarrett D'Amore *sz = 1; 25595c635efSGarrett D'Amore break; 25695c635efSGarrett D'Amore } 25795c635efSGarrett D'Amore 25895c635efSGarrett D'Amore break; 25995c635efSGarrett D'Amore 26095c635efSGarrett D'Amore /* 26195c635efSGarrett D'Amore * Anything else is assumed to be a glyph. 262698f87a4SGarrett D'Amore * In this case, pass back the character after the backslash. 26395c635efSGarrett D'Amore */ 26495c635efSGarrett D'Amore default: 26595c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 266698f87a4SGarrett D'Amore *start = --*end; 267698f87a4SGarrett D'Amore *sz = 1; 26895c635efSGarrett D'Amore break; 26995c635efSGarrett D'Amore } 27095c635efSGarrett D'Amore 27195c635efSGarrett D'Amore assert(ESCAPE_ERROR != gly); 27295c635efSGarrett D'Amore 27395c635efSGarrett D'Amore /* 274698f87a4SGarrett D'Amore * Read up to the terminating character, 275698f87a4SGarrett D'Amore * paying attention to nested escapes. 27695c635efSGarrett D'Amore */ 27795c635efSGarrett D'Amore 27895c635efSGarrett D'Amore if ('\0' != term) { 279698f87a4SGarrett D'Amore while (**end != term) { 280698f87a4SGarrett D'Amore switch (**end) { 281*260e9a87SYuri Pankov case '\0': 28295c635efSGarrett D'Amore return(ESCAPE_ERROR); 283*260e9a87SYuri Pankov case '\\': 28495c635efSGarrett D'Amore (*end)++; 285698f87a4SGarrett D'Amore if (ESCAPE_ERROR == 286698f87a4SGarrett D'Amore mandoc_escape(end, NULL, NULL)) 28795c635efSGarrett D'Amore return(ESCAPE_ERROR); 288698f87a4SGarrett D'Amore break; 289698f87a4SGarrett D'Amore default: 290698f87a4SGarrett D'Amore (*end)++; 291698f87a4SGarrett D'Amore break; 292698f87a4SGarrett D'Amore } 293698f87a4SGarrett D'Amore } 294698f87a4SGarrett D'Amore *sz = (*end)++ - *start; 295698f87a4SGarrett D'Amore } else { 296698f87a4SGarrett D'Amore assert(*sz > 0); 297698f87a4SGarrett D'Amore if ((size_t)*sz > strlen(*start)) 298698f87a4SGarrett D'Amore return(ESCAPE_ERROR); 299698f87a4SGarrett D'Amore *end += *sz; 300698f87a4SGarrett D'Amore } 30195c635efSGarrett D'Amore 30295c635efSGarrett D'Amore /* Run post-processors. */ 30395c635efSGarrett D'Amore 30495c635efSGarrett D'Amore switch (gly) { 305*260e9a87SYuri Pankov case ESCAPE_FONT: 306698f87a4SGarrett D'Amore if (2 == *sz) { 307698f87a4SGarrett D'Amore if ('C' == **start) { 30895c635efSGarrett D'Amore /* 309698f87a4SGarrett D'Amore * Treat constant-width font modes 310698f87a4SGarrett D'Amore * just like regular font modes. 31195c635efSGarrett D'Amore */ 312698f87a4SGarrett D'Amore (*start)++; 313698f87a4SGarrett D'Amore (*sz)--; 314698f87a4SGarrett D'Amore } else { 315698f87a4SGarrett D'Amore if ('B' == (*start)[0] && 'I' == (*start)[1]) 316698f87a4SGarrett D'Amore gly = ESCAPE_FONTBI; 317698f87a4SGarrett D'Amore break; 318698f87a4SGarrett D'Amore } 319698f87a4SGarrett D'Amore } else if (1 != *sz) 32095c635efSGarrett D'Amore break; 32195c635efSGarrett D'Amore 322698f87a4SGarrett D'Amore switch (**start) { 323*260e9a87SYuri Pankov case '3': 32495c635efSGarrett D'Amore /* FALLTHROUGH */ 325*260e9a87SYuri Pankov case 'B': 32695c635efSGarrett D'Amore gly = ESCAPE_FONTBOLD; 32795c635efSGarrett D'Amore break; 328*260e9a87SYuri Pankov case '2': 32995c635efSGarrett D'Amore /* FALLTHROUGH */ 330*260e9a87SYuri Pankov case 'I': 33195c635efSGarrett D'Amore gly = ESCAPE_FONTITALIC; 33295c635efSGarrett D'Amore break; 333*260e9a87SYuri Pankov case 'P': 33495c635efSGarrett D'Amore gly = ESCAPE_FONTPREV; 33595c635efSGarrett D'Amore break; 336*260e9a87SYuri Pankov case '1': 33795c635efSGarrett D'Amore /* FALLTHROUGH */ 338*260e9a87SYuri Pankov case 'R': 33995c635efSGarrett D'Amore gly = ESCAPE_FONTROMAN; 34095c635efSGarrett D'Amore break; 34195c635efSGarrett D'Amore } 34295c635efSGarrett D'Amore break; 343*260e9a87SYuri Pankov case ESCAPE_SPECIAL: 344698f87a4SGarrett D'Amore if (1 == *sz && 'c' == **start) 34595c635efSGarrett D'Amore gly = ESCAPE_NOSPACE; 346*260e9a87SYuri Pankov /* 347*260e9a87SYuri Pankov * Unicode escapes are defined in groff as \[u0000] 348*260e9a87SYuri Pankov * to \[u10FFFF], where the contained value must be 349*260e9a87SYuri Pankov * a valid Unicode codepoint. Here, however, only 350*260e9a87SYuri Pankov * check the length and range. 351*260e9a87SYuri Pankov */ 352*260e9a87SYuri Pankov if (**start != 'u' || *sz < 5 || *sz > 7) 353*260e9a87SYuri Pankov break; 354*260e9a87SYuri Pankov if (*sz == 7 && ((*start)[1] != '1' || (*start)[2] != '0')) 355*260e9a87SYuri Pankov break; 356*260e9a87SYuri Pankov if (*sz == 6 && (*start)[1] == '0') 357*260e9a87SYuri Pankov break; 358*260e9a87SYuri Pankov if ((int)strspn(*start + 1, "0123456789ABCDEFabcdef") 359*260e9a87SYuri Pankov + 1 == *sz) 360*260e9a87SYuri Pankov gly = ESCAPE_UNICODE; 36195c635efSGarrett D'Amore break; 36295c635efSGarrett D'Amore default: 36395c635efSGarrett D'Amore break; 36495c635efSGarrett D'Amore } 36595c635efSGarrett D'Amore 36695c635efSGarrett D'Amore return(gly); 36795c635efSGarrett D'Amore } 36895c635efSGarrett D'Amore 36995c635efSGarrett D'Amore /* 37095c635efSGarrett D'Amore * Parse a quoted or unquoted roff-style request or macro argument. 37195c635efSGarrett D'Amore * Return a pointer to the parsed argument, which is either the original 37295c635efSGarrett D'Amore * pointer or advanced by one byte in case the argument is quoted. 373698f87a4SGarrett D'Amore * NUL-terminate the argument in place. 37495c635efSGarrett D'Amore * Collapse pairs of quotes inside quoted arguments. 37595c635efSGarrett D'Amore * Advance the argument pointer to the next argument, 376698f87a4SGarrett D'Amore * or to the NUL byte terminating the argument line. 37795c635efSGarrett D'Amore */ 37895c635efSGarrett D'Amore char * 37995c635efSGarrett D'Amore mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) 38095c635efSGarrett D'Amore { 38195c635efSGarrett D'Amore char *start, *cp; 38295c635efSGarrett D'Amore int quoted, pairs, white; 38395c635efSGarrett D'Amore 38495c635efSGarrett D'Amore /* Quoting can only start with a new word. */ 38595c635efSGarrett D'Amore start = *cpp; 38695c635efSGarrett D'Amore quoted = 0; 38795c635efSGarrett D'Amore if ('"' == *start) { 38895c635efSGarrett D'Amore quoted = 1; 38995c635efSGarrett D'Amore start++; 39095c635efSGarrett D'Amore } 39195c635efSGarrett D'Amore 39295c635efSGarrett D'Amore pairs = 0; 39395c635efSGarrett D'Amore white = 0; 39495c635efSGarrett D'Amore for (cp = start; '\0' != *cp; cp++) { 395698f87a4SGarrett D'Amore 396698f87a4SGarrett D'Amore /* 397698f87a4SGarrett D'Amore * Move the following text left 398698f87a4SGarrett D'Amore * after quoted quotes and after "\\" and "\t". 399698f87a4SGarrett D'Amore */ 40095c635efSGarrett D'Amore if (pairs) 40195c635efSGarrett D'Amore cp[-pairs] = cp[0]; 402698f87a4SGarrett D'Amore 40395c635efSGarrett D'Amore if ('\\' == cp[0]) { 404698f87a4SGarrett D'Amore /* 405698f87a4SGarrett D'Amore * In copy mode, translate double to single 406698f87a4SGarrett D'Amore * backslashes and backslash-t to literal tabs. 407698f87a4SGarrett D'Amore */ 408698f87a4SGarrett D'Amore switch (cp[1]) { 409*260e9a87SYuri Pankov case 't': 410698f87a4SGarrett D'Amore cp[0] = '\t'; 411698f87a4SGarrett D'Amore /* FALLTHROUGH */ 412*260e9a87SYuri Pankov case '\\': 41395c635efSGarrett D'Amore pairs++; 41495c635efSGarrett D'Amore cp++; 415698f87a4SGarrett D'Amore break; 416*260e9a87SYuri Pankov case ' ': 41795c635efSGarrett D'Amore /* Skip escaped blanks. */ 418698f87a4SGarrett D'Amore if (0 == quoted) 41995c635efSGarrett D'Amore cp++; 420698f87a4SGarrett D'Amore break; 421698f87a4SGarrett D'Amore default: 422698f87a4SGarrett D'Amore break; 423698f87a4SGarrett D'Amore } 42495c635efSGarrett D'Amore } else if (0 == quoted) { 42595c635efSGarrett D'Amore if (' ' == cp[0]) { 42695c635efSGarrett D'Amore /* Unescaped blanks end unquoted args. */ 42795c635efSGarrett D'Amore white = 1; 42895c635efSGarrett D'Amore break; 42995c635efSGarrett D'Amore } 43095c635efSGarrett D'Amore } else if ('"' == cp[0]) { 43195c635efSGarrett D'Amore if ('"' == cp[1]) { 43295c635efSGarrett D'Amore /* Quoted quotes collapse. */ 43395c635efSGarrett D'Amore pairs++; 43495c635efSGarrett D'Amore cp++; 43595c635efSGarrett D'Amore } else { 43695c635efSGarrett D'Amore /* Unquoted quotes end quoted args. */ 43795c635efSGarrett D'Amore quoted = 2; 43895c635efSGarrett D'Amore break; 43995c635efSGarrett D'Amore } 44095c635efSGarrett D'Amore } 44195c635efSGarrett D'Amore } 44295c635efSGarrett D'Amore 44395c635efSGarrett D'Amore /* Quoted argument without a closing quote. */ 44495c635efSGarrett D'Amore if (1 == quoted) 445*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_ARG_QUOTE, parse, ln, *pos, NULL); 44695c635efSGarrett D'Amore 447698f87a4SGarrett D'Amore /* NUL-terminate this argument and move to the next one. */ 44895c635efSGarrett D'Amore if (pairs) 44995c635efSGarrett D'Amore cp[-pairs] = '\0'; 45095c635efSGarrett D'Amore if ('\0' != *cp) { 45195c635efSGarrett D'Amore *cp++ = '\0'; 45295c635efSGarrett D'Amore while (' ' == *cp) 45395c635efSGarrett D'Amore cp++; 45495c635efSGarrett D'Amore } 45595c635efSGarrett D'Amore *pos += (int)(cp - start) + (quoted ? 1 : 0); 45695c635efSGarrett D'Amore *cpp = cp; 45795c635efSGarrett D'Amore 45895c635efSGarrett D'Amore if ('\0' == *cp && (white || ' ' == cp[-1])) 459*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_SPACE_EOL, parse, ln, *pos, NULL); 46095c635efSGarrett D'Amore 46195c635efSGarrett D'Amore return(start); 46295c635efSGarrett D'Amore } 46395c635efSGarrett D'Amore 46495c635efSGarrett D'Amore static int 46595c635efSGarrett D'Amore a2time(time_t *t, const char *fmt, const char *p) 46695c635efSGarrett D'Amore { 46795c635efSGarrett D'Amore struct tm tm; 46895c635efSGarrett D'Amore char *pp; 46995c635efSGarrett D'Amore 47095c635efSGarrett D'Amore memset(&tm, 0, sizeof(struct tm)); 47195c635efSGarrett D'Amore 47295c635efSGarrett D'Amore pp = NULL; 473*260e9a87SYuri Pankov #if HAVE_STRPTIME 47495c635efSGarrett D'Amore pp = strptime(p, fmt, &tm); 47595c635efSGarrett D'Amore #endif 47695c635efSGarrett D'Amore if (NULL != pp && '\0' == *pp) { 47795c635efSGarrett D'Amore *t = mktime(&tm); 47895c635efSGarrett D'Amore return(1); 47995c635efSGarrett D'Amore } 48095c635efSGarrett D'Amore 48195c635efSGarrett D'Amore return(0); 48295c635efSGarrett D'Amore } 48395c635efSGarrett D'Amore 48495c635efSGarrett D'Amore static char * 48595c635efSGarrett D'Amore time2a(time_t t) 48695c635efSGarrett D'Amore { 48795c635efSGarrett D'Amore struct tm *tm; 48895c635efSGarrett D'Amore char *buf, *p; 48995c635efSGarrett D'Amore size_t ssz; 49095c635efSGarrett D'Amore int isz; 49195c635efSGarrett D'Amore 49295c635efSGarrett D'Amore tm = localtime(&t); 493*260e9a87SYuri Pankov if (tm == NULL) 494*260e9a87SYuri Pankov return(NULL); 49595c635efSGarrett D'Amore 49695c635efSGarrett D'Amore /* 49795c635efSGarrett D'Amore * Reserve space: 49895c635efSGarrett D'Amore * up to 9 characters for the month (September) + blank 49995c635efSGarrett D'Amore * up to 2 characters for the day + comma + blank 50095c635efSGarrett D'Amore * 4 characters for the year and a terminating '\0' 50195c635efSGarrett D'Amore */ 50295c635efSGarrett D'Amore p = buf = mandoc_malloc(10 + 4 + 4 + 1); 50395c635efSGarrett D'Amore 50495c635efSGarrett D'Amore if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm))) 50595c635efSGarrett D'Amore goto fail; 50695c635efSGarrett D'Amore p += (int)ssz; 50795c635efSGarrett D'Amore 50895c635efSGarrett D'Amore if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday))) 50995c635efSGarrett D'Amore goto fail; 51095c635efSGarrett D'Amore p += isz; 51195c635efSGarrett D'Amore 51295c635efSGarrett D'Amore if (0 == strftime(p, 4 + 1, "%Y", tm)) 51395c635efSGarrett D'Amore goto fail; 51495c635efSGarrett D'Amore return(buf); 51595c635efSGarrett D'Amore 51695c635efSGarrett D'Amore fail: 51795c635efSGarrett D'Amore free(buf); 51895c635efSGarrett D'Amore return(NULL); 51995c635efSGarrett D'Amore } 52095c635efSGarrett D'Amore 52195c635efSGarrett D'Amore char * 52295c635efSGarrett D'Amore mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) 52395c635efSGarrett D'Amore { 52495c635efSGarrett D'Amore char *out; 52595c635efSGarrett D'Amore time_t t; 52695c635efSGarrett D'Amore 52795c635efSGarrett D'Amore if (NULL == in || '\0' == *in || 52895c635efSGarrett D'Amore 0 == strcmp(in, "$" "Mdocdate$")) { 529*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_DATE_MISSING, parse, ln, pos, NULL); 53095c635efSGarrett D'Amore time(&t); 53195c635efSGarrett D'Amore } 53295c635efSGarrett D'Amore else if (a2time(&t, "%Y-%m-%d", in)) 53395c635efSGarrett D'Amore t = 0; 53495c635efSGarrett D'Amore else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && 53595c635efSGarrett D'Amore !a2time(&t, "%b %d, %Y", in)) { 536*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_DATE_BAD, parse, ln, pos, in); 53795c635efSGarrett D'Amore t = 0; 53895c635efSGarrett D'Amore } 53995c635efSGarrett D'Amore out = t ? time2a(t) : NULL; 54095c635efSGarrett D'Amore return(out ? out : mandoc_strdup(in)); 54195c635efSGarrett D'Amore } 54295c635efSGarrett D'Amore 54395c635efSGarrett D'Amore int 544*260e9a87SYuri Pankov mandoc_eos(const char *p, size_t sz) 54595c635efSGarrett D'Amore { 54695c635efSGarrett D'Amore const char *q; 547*260e9a87SYuri Pankov int enclosed, found; 54895c635efSGarrett D'Amore 54995c635efSGarrett D'Amore if (0 == sz) 55095c635efSGarrett D'Amore return(0); 55195c635efSGarrett D'Amore 55295c635efSGarrett D'Amore /* 55395c635efSGarrett D'Amore * End-of-sentence recognition must include situations where 55495c635efSGarrett D'Amore * some symbols, such as `)', allow prior EOS punctuation to 55595c635efSGarrett D'Amore * propagate outward. 55695c635efSGarrett D'Amore */ 55795c635efSGarrett D'Amore 558*260e9a87SYuri Pankov enclosed = found = 0; 55995c635efSGarrett D'Amore for (q = p + (int)sz - 1; q >= p; q--) { 56095c635efSGarrett D'Amore switch (*q) { 561*260e9a87SYuri Pankov case '\"': 56295c635efSGarrett D'Amore /* FALLTHROUGH */ 563*260e9a87SYuri Pankov case '\'': 56495c635efSGarrett D'Amore /* FALLTHROUGH */ 565*260e9a87SYuri Pankov case ']': 56695c635efSGarrett D'Amore /* FALLTHROUGH */ 567*260e9a87SYuri Pankov case ')': 56895c635efSGarrett D'Amore if (0 == found) 56995c635efSGarrett D'Amore enclosed = 1; 57095c635efSGarrett D'Amore break; 571*260e9a87SYuri Pankov case '.': 57295c635efSGarrett D'Amore /* FALLTHROUGH */ 573*260e9a87SYuri Pankov case '!': 57495c635efSGarrett D'Amore /* FALLTHROUGH */ 575*260e9a87SYuri Pankov case '?': 57695c635efSGarrett D'Amore found = 1; 57795c635efSGarrett D'Amore break; 57895c635efSGarrett D'Amore default: 57995c635efSGarrett D'Amore return(found && (!enclosed || isalnum((unsigned char)*q))); 58095c635efSGarrett D'Amore } 58195c635efSGarrett D'Amore } 58295c635efSGarrett D'Amore 58395c635efSGarrett D'Amore return(found && !enclosed); 58495c635efSGarrett D'Amore } 58595c635efSGarrett D'Amore 58695c635efSGarrett D'Amore /* 58795c635efSGarrett D'Amore * Convert a string to a long that may not be <0. 58895c635efSGarrett D'Amore * If the string is invalid, or is less than 0, return -1. 58995c635efSGarrett D'Amore */ 59095c635efSGarrett D'Amore int 59195c635efSGarrett D'Amore mandoc_strntoi(const char *p, size_t sz, int base) 59295c635efSGarrett D'Amore { 59395c635efSGarrett D'Amore char buf[32]; 59495c635efSGarrett D'Amore char *ep; 59595c635efSGarrett D'Amore long v; 59695c635efSGarrett D'Amore 59795c635efSGarrett D'Amore if (sz > 31) 59895c635efSGarrett D'Amore return(-1); 59995c635efSGarrett D'Amore 60095c635efSGarrett D'Amore memcpy(buf, p, sz); 60195c635efSGarrett D'Amore buf[(int)sz] = '\0'; 60295c635efSGarrett D'Amore 60395c635efSGarrett D'Amore errno = 0; 60495c635efSGarrett D'Amore v = strtol(buf, &ep, base); 60595c635efSGarrett D'Amore 60695c635efSGarrett D'Amore if (buf[0] == '\0' || *ep != '\0') 60795c635efSGarrett D'Amore return(-1); 60895c635efSGarrett D'Amore 60995c635efSGarrett D'Amore if (v > INT_MAX) 61095c635efSGarrett D'Amore v = INT_MAX; 61195c635efSGarrett D'Amore if (v < INT_MIN) 61295c635efSGarrett D'Amore v = INT_MIN; 61395c635efSGarrett D'Amore 61495c635efSGarrett D'Amore return((int)v); 61595c635efSGarrett D'Amore } 616