1*698f87a4SGarrett D'Amore /* $Id: mandoc.c,v 1.74 2013/12/30 18:30:32 schwarze Exp $ */ 295c635efSGarrett D'Amore /* 395c635efSGarrett D'Amore * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*698f87a4SGarrett D'Amore * Copyright (c) 2011, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> 595c635efSGarrett D'Amore * 695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 995c635efSGarrett D'Amore * 1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1795c635efSGarrett D'Amore */ 1895c635efSGarrett D'Amore #ifdef HAVE_CONFIG_H 1995c635efSGarrett D'Amore #include "config.h" 2095c635efSGarrett D'Amore #endif 2195c635efSGarrett D'Amore 2295c635efSGarrett D'Amore #include <sys/types.h> 2395c635efSGarrett D'Amore 2495c635efSGarrett D'Amore #include <assert.h> 2595c635efSGarrett D'Amore #include <ctype.h> 2695c635efSGarrett D'Amore #include <errno.h> 2795c635efSGarrett D'Amore #include <limits.h> 2895c635efSGarrett D'Amore #include <stdlib.h> 2995c635efSGarrett D'Amore #include <stdio.h> 3095c635efSGarrett D'Amore #include <string.h> 3195c635efSGarrett D'Amore #include <time.h> 3295c635efSGarrett D'Amore 3395c635efSGarrett D'Amore #include "mandoc.h" 3495c635efSGarrett D'Amore #include "libmandoc.h" 3595c635efSGarrett D'Amore 3695c635efSGarrett D'Amore #define DATESIZE 32 3795c635efSGarrett D'Amore 3895c635efSGarrett D'Amore static int a2time(time_t *, const char *, const char *); 3995c635efSGarrett D'Amore static char *time2a(time_t); 4095c635efSGarrett D'Amore 4195c635efSGarrett D'Amore 4295c635efSGarrett D'Amore enum mandoc_esc 4395c635efSGarrett D'Amore mandoc_escape(const char **end, const char **start, int *sz) 4495c635efSGarrett D'Amore { 45*698f87a4SGarrett D'Amore const char *local_start; 46*698f87a4SGarrett D'Amore int local_sz; 47*698f87a4SGarrett D'Amore char term; 4895c635efSGarrett D'Amore enum mandoc_esc gly; 4995c635efSGarrett D'Amore 50*698f87a4SGarrett D'Amore /* 51*698f87a4SGarrett D'Amore * When the caller doesn't provide return storage, 52*698f87a4SGarrett D'Amore * use local storage. 53*698f87a4SGarrett D'Amore */ 5495c635efSGarrett D'Amore 55*698f87a4SGarrett D'Amore if (NULL == start) 56*698f87a4SGarrett D'Amore start = &local_start; 57*698f87a4SGarrett D'Amore if (NULL == sz) 58*698f87a4SGarrett D'Amore sz = &local_sz; 59*698f87a4SGarrett D'Amore 60*698f87a4SGarrett D'Amore /* 61*698f87a4SGarrett D'Amore * Beyond the backslash, at least one input character 62*698f87a4SGarrett D'Amore * is part of the escape sequence. With one exception 63*698f87a4SGarrett D'Amore * (see below), that character won't be returned. 64*698f87a4SGarrett D'Amore */ 65*698f87a4SGarrett D'Amore 66*698f87a4SGarrett D'Amore gly = ESCAPE_ERROR; 67*698f87a4SGarrett D'Amore *start = ++*end; 68*698f87a4SGarrett D'Amore *sz = 0; 69*698f87a4SGarrett D'Amore term = '\0'; 70*698f87a4SGarrett D'Amore 71*698f87a4SGarrett D'Amore switch ((*start)[-1]) { 7295c635efSGarrett D'Amore /* 7395c635efSGarrett D'Amore * First the glyphs. There are several different forms of 7495c635efSGarrett D'Amore * these, but each eventually returns a substring of the glyph 7595c635efSGarrett D'Amore * name. 7695c635efSGarrett D'Amore */ 7795c635efSGarrett D'Amore case ('('): 7895c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 79*698f87a4SGarrett D'Amore *sz = 2; 8095c635efSGarrett D'Amore break; 8195c635efSGarrett D'Amore case ('['): 8295c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 8395c635efSGarrett D'Amore /* 8495c635efSGarrett D'Amore * Unicode escapes are defined in groff as \[uXXXX] to 8595c635efSGarrett D'Amore * \[u10FFFF], where the contained value must be a valid 8695c635efSGarrett D'Amore * Unicode codepoint. Here, however, only check whether 8795c635efSGarrett D'Amore * it's not a zero-width escape. 8895c635efSGarrett D'Amore */ 89*698f87a4SGarrett D'Amore if ('u' == (*start)[0] && ']' != (*start)[1]) 9095c635efSGarrett D'Amore gly = ESCAPE_UNICODE; 9195c635efSGarrett D'Amore term = ']'; 9295c635efSGarrett D'Amore break; 9395c635efSGarrett D'Amore case ('C'): 94*698f87a4SGarrett D'Amore if ('\'' != **start) 9595c635efSGarrett D'Amore return(ESCAPE_ERROR); 96*698f87a4SGarrett D'Amore *start = ++*end; 97*698f87a4SGarrett D'Amore if ('u' == (*start)[0] && '\'' != (*start)[1]) 98*698f87a4SGarrett D'Amore gly = ESCAPE_UNICODE; 99*698f87a4SGarrett D'Amore else 10095c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 10195c635efSGarrett D'Amore term = '\''; 10295c635efSGarrett D'Amore break; 10395c635efSGarrett D'Amore 10495c635efSGarrett D'Amore /* 105*698f87a4SGarrett D'Amore * Escapes taking no arguments at all. 106*698f87a4SGarrett D'Amore */ 107*698f87a4SGarrett D'Amore case ('d'): 108*698f87a4SGarrett D'Amore /* FALLTHROUGH */ 109*698f87a4SGarrett D'Amore case ('u'): 110*698f87a4SGarrett D'Amore return(ESCAPE_IGNORE); 111*698f87a4SGarrett D'Amore 112*698f87a4SGarrett D'Amore /* 113*698f87a4SGarrett D'Amore * The \z escape is supposed to output the following 114*698f87a4SGarrett D'Amore * character without advancing the cursor position. 115*698f87a4SGarrett D'Amore * Since we are mostly dealing with terminal mode, 116*698f87a4SGarrett D'Amore * let us just skip the next character. 117*698f87a4SGarrett D'Amore */ 118*698f87a4SGarrett D'Amore case ('z'): 119*698f87a4SGarrett D'Amore return(ESCAPE_SKIPCHAR); 120*698f87a4SGarrett D'Amore 121*698f87a4SGarrett D'Amore /* 12295c635efSGarrett D'Amore * Handle all triggers matching \X(xy, \Xx, and \X[xxxx], where 12395c635efSGarrett D'Amore * 'X' is the trigger. These have opaque sub-strings. 12495c635efSGarrett D'Amore */ 12595c635efSGarrett D'Amore case ('F'): 12695c635efSGarrett D'Amore /* FALLTHROUGH */ 12795c635efSGarrett D'Amore case ('g'): 12895c635efSGarrett D'Amore /* FALLTHROUGH */ 12995c635efSGarrett D'Amore case ('k'): 13095c635efSGarrett D'Amore /* FALLTHROUGH */ 13195c635efSGarrett D'Amore case ('M'): 13295c635efSGarrett D'Amore /* FALLTHROUGH */ 13395c635efSGarrett D'Amore case ('m'): 13495c635efSGarrett D'Amore /* FALLTHROUGH */ 13595c635efSGarrett D'Amore case ('n'): 13695c635efSGarrett D'Amore /* FALLTHROUGH */ 13795c635efSGarrett D'Amore case ('V'): 13895c635efSGarrett D'Amore /* FALLTHROUGH */ 13995c635efSGarrett D'Amore case ('Y'): 14095c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 14195c635efSGarrett D'Amore /* FALLTHROUGH */ 14295c635efSGarrett D'Amore case ('f'): 14395c635efSGarrett D'Amore if (ESCAPE_ERROR == gly) 14495c635efSGarrett D'Amore gly = ESCAPE_FONT; 145*698f87a4SGarrett D'Amore switch (**start) { 14695c635efSGarrett D'Amore case ('('): 147*698f87a4SGarrett D'Amore *start = ++*end; 148*698f87a4SGarrett D'Amore *sz = 2; 14995c635efSGarrett D'Amore break; 15095c635efSGarrett D'Amore case ('['): 151*698f87a4SGarrett D'Amore *start = ++*end; 15295c635efSGarrett D'Amore term = ']'; 15395c635efSGarrett D'Amore break; 15495c635efSGarrett D'Amore default: 155*698f87a4SGarrett D'Amore *sz = 1; 15695c635efSGarrett D'Amore break; 15795c635efSGarrett D'Amore } 15895c635efSGarrett D'Amore break; 15995c635efSGarrett D'Amore 16095c635efSGarrett D'Amore /* 16195c635efSGarrett D'Amore * These escapes are of the form \X'Y', where 'X' is the trigger 16295c635efSGarrett D'Amore * and 'Y' is any string. These have opaque sub-strings. 16395c635efSGarrett D'Amore */ 16495c635efSGarrett D'Amore case ('A'): 16595c635efSGarrett D'Amore /* FALLTHROUGH */ 16695c635efSGarrett D'Amore case ('b'): 16795c635efSGarrett D'Amore /* FALLTHROUGH */ 168*698f87a4SGarrett D'Amore case ('B'): 169*698f87a4SGarrett D'Amore /* FALLTHROUGH */ 17095c635efSGarrett D'Amore case ('D'): 17195c635efSGarrett D'Amore /* FALLTHROUGH */ 17295c635efSGarrett D'Amore case ('o'): 17395c635efSGarrett D'Amore /* FALLTHROUGH */ 17495c635efSGarrett D'Amore case ('R'): 17595c635efSGarrett D'Amore /* FALLTHROUGH */ 176*698f87a4SGarrett D'Amore case ('w'): 177*698f87a4SGarrett D'Amore /* FALLTHROUGH */ 17895c635efSGarrett D'Amore case ('X'): 17995c635efSGarrett D'Amore /* FALLTHROUGH */ 18095c635efSGarrett D'Amore case ('Z'): 181*698f87a4SGarrett D'Amore if ('\'' != **start) 18295c635efSGarrett D'Amore return(ESCAPE_ERROR); 18395c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 184*698f87a4SGarrett D'Amore *start = ++*end; 18595c635efSGarrett D'Amore term = '\''; 18695c635efSGarrett D'Amore break; 18795c635efSGarrett D'Amore 18895c635efSGarrett D'Amore /* 18995c635efSGarrett D'Amore * These escapes are of the form \X'N', where 'X' is the trigger 19095c635efSGarrett D'Amore * and 'N' resolves to a numerical expression. 19195c635efSGarrett D'Amore */ 19295c635efSGarrett D'Amore case ('h'): 19395c635efSGarrett D'Amore /* FALLTHROUGH */ 19495c635efSGarrett D'Amore case ('H'): 19595c635efSGarrett D'Amore /* FALLTHROUGH */ 19695c635efSGarrett D'Amore case ('L'): 19795c635efSGarrett D'Amore /* FALLTHROUGH */ 19895c635efSGarrett D'Amore case ('l'): 19995c635efSGarrett D'Amore /* FALLTHROUGH */ 20095c635efSGarrett D'Amore case ('S'): 20195c635efSGarrett D'Amore /* FALLTHROUGH */ 20295c635efSGarrett D'Amore case ('v'): 20395c635efSGarrett D'Amore /* FALLTHROUGH */ 20495c635efSGarrett D'Amore case ('x'): 205*698f87a4SGarrett D'Amore if ('\'' != **start) 20695c635efSGarrett D'Amore return(ESCAPE_ERROR); 207*698f87a4SGarrett D'Amore gly = ESCAPE_IGNORE; 208*698f87a4SGarrett D'Amore *start = ++*end; 209*698f87a4SGarrett D'Amore term = '\''; 21095c635efSGarrett D'Amore break; 21195c635efSGarrett D'Amore 21295c635efSGarrett D'Amore /* 21395c635efSGarrett D'Amore * Special handling for the numbered character escape. 21495c635efSGarrett D'Amore * XXX Do any other escapes need similar handling? 21595c635efSGarrett D'Amore */ 21695c635efSGarrett D'Amore case ('N'): 217*698f87a4SGarrett D'Amore if ('\0' == **start) 21895c635efSGarrett D'Amore return(ESCAPE_ERROR); 219*698f87a4SGarrett D'Amore (*end)++; 220*698f87a4SGarrett D'Amore if (isdigit((unsigned char)**start)) { 221*698f87a4SGarrett D'Amore *sz = 1; 22295c635efSGarrett D'Amore return(ESCAPE_IGNORE); 223*698f87a4SGarrett D'Amore } 224*698f87a4SGarrett D'Amore (*start)++; 22595c635efSGarrett D'Amore while (isdigit((unsigned char)**end)) 22695c635efSGarrett D'Amore (*end)++; 227*698f87a4SGarrett D'Amore *sz = *end - *start; 22895c635efSGarrett D'Amore if ('\0' != **end) 22995c635efSGarrett D'Amore (*end)++; 23095c635efSGarrett D'Amore return(ESCAPE_NUMBERED); 23195c635efSGarrett D'Amore 23295c635efSGarrett D'Amore /* 23395c635efSGarrett D'Amore * Sizes get a special category of their own. 23495c635efSGarrett D'Amore */ 23595c635efSGarrett D'Amore case ('s'): 23695c635efSGarrett D'Amore gly = ESCAPE_IGNORE; 23795c635efSGarrett D'Amore 23895c635efSGarrett D'Amore /* See +/- counts as a sign. */ 239*698f87a4SGarrett D'Amore if ('+' == **end || '-' == **end || ASCII_HYPH == **end) 240*698f87a4SGarrett D'Amore (*end)++; 24195c635efSGarrett D'Amore 242*698f87a4SGarrett D'Amore switch (**end) { 24395c635efSGarrett D'Amore case ('('): 244*698f87a4SGarrett D'Amore *start = ++*end; 245*698f87a4SGarrett D'Amore *sz = 2; 24695c635efSGarrett D'Amore break; 24795c635efSGarrett D'Amore case ('['): 248*698f87a4SGarrett D'Amore *start = ++*end; 249*698f87a4SGarrett D'Amore term = ']'; 25095c635efSGarrett D'Amore break; 25195c635efSGarrett D'Amore case ('\''): 252*698f87a4SGarrett D'Amore *start = ++*end; 253*698f87a4SGarrett D'Amore term = '\''; 25495c635efSGarrett D'Amore break; 25595c635efSGarrett D'Amore default: 256*698f87a4SGarrett D'Amore *sz = 1; 25795c635efSGarrett D'Amore break; 25895c635efSGarrett D'Amore } 25995c635efSGarrett D'Amore 26095c635efSGarrett D'Amore break; 26195c635efSGarrett D'Amore 26295c635efSGarrett D'Amore /* 26395c635efSGarrett D'Amore * Anything else is assumed to be a glyph. 264*698f87a4SGarrett D'Amore * In this case, pass back the character after the backslash. 26595c635efSGarrett D'Amore */ 26695c635efSGarrett D'Amore default: 26795c635efSGarrett D'Amore gly = ESCAPE_SPECIAL; 268*698f87a4SGarrett D'Amore *start = --*end; 269*698f87a4SGarrett D'Amore *sz = 1; 27095c635efSGarrett D'Amore break; 27195c635efSGarrett D'Amore } 27295c635efSGarrett D'Amore 27395c635efSGarrett D'Amore assert(ESCAPE_ERROR != gly); 27495c635efSGarrett D'Amore 27595c635efSGarrett D'Amore /* 276*698f87a4SGarrett D'Amore * Read up to the terminating character, 277*698f87a4SGarrett D'Amore * paying attention to nested escapes. 27895c635efSGarrett D'Amore */ 27995c635efSGarrett D'Amore 28095c635efSGarrett D'Amore if ('\0' != term) { 281*698f87a4SGarrett D'Amore while (**end != term) { 282*698f87a4SGarrett D'Amore switch (**end) { 283*698f87a4SGarrett D'Amore case ('\0'): 28495c635efSGarrett D'Amore return(ESCAPE_ERROR); 285*698f87a4SGarrett D'Amore case ('\\'): 28695c635efSGarrett D'Amore (*end)++; 287*698f87a4SGarrett D'Amore if (ESCAPE_ERROR == 288*698f87a4SGarrett D'Amore mandoc_escape(end, NULL, NULL)) 28995c635efSGarrett D'Amore return(ESCAPE_ERROR); 290*698f87a4SGarrett D'Amore break; 291*698f87a4SGarrett D'Amore default: 292*698f87a4SGarrett D'Amore (*end)++; 293*698f87a4SGarrett D'Amore break; 294*698f87a4SGarrett D'Amore } 295*698f87a4SGarrett D'Amore } 296*698f87a4SGarrett D'Amore *sz = (*end)++ - *start; 297*698f87a4SGarrett D'Amore } else { 298*698f87a4SGarrett D'Amore assert(*sz > 0); 299*698f87a4SGarrett D'Amore if ((size_t)*sz > strlen(*start)) 300*698f87a4SGarrett D'Amore return(ESCAPE_ERROR); 301*698f87a4SGarrett D'Amore *end += *sz; 302*698f87a4SGarrett D'Amore } 30395c635efSGarrett D'Amore 30495c635efSGarrett D'Amore /* Run post-processors. */ 30595c635efSGarrett D'Amore 30695c635efSGarrett D'Amore switch (gly) { 30795c635efSGarrett D'Amore case (ESCAPE_FONT): 308*698f87a4SGarrett D'Amore if (2 == *sz) { 309*698f87a4SGarrett D'Amore if ('C' == **start) { 31095c635efSGarrett D'Amore /* 311*698f87a4SGarrett D'Amore * Treat constant-width font modes 312*698f87a4SGarrett D'Amore * just like regular font modes. 31395c635efSGarrett D'Amore */ 314*698f87a4SGarrett D'Amore (*start)++; 315*698f87a4SGarrett D'Amore (*sz)--; 316*698f87a4SGarrett D'Amore } else { 317*698f87a4SGarrett D'Amore if ('B' == (*start)[0] && 'I' == (*start)[1]) 318*698f87a4SGarrett D'Amore gly = ESCAPE_FONTBI; 319*698f87a4SGarrett D'Amore break; 320*698f87a4SGarrett D'Amore } 321*698f87a4SGarrett D'Amore } else if (1 != *sz) 32295c635efSGarrett D'Amore break; 32395c635efSGarrett D'Amore 324*698f87a4SGarrett D'Amore switch (**start) { 32595c635efSGarrett D'Amore case ('3'): 32695c635efSGarrett D'Amore /* FALLTHROUGH */ 32795c635efSGarrett D'Amore case ('B'): 32895c635efSGarrett D'Amore gly = ESCAPE_FONTBOLD; 32995c635efSGarrett D'Amore break; 33095c635efSGarrett D'Amore case ('2'): 33195c635efSGarrett D'Amore /* FALLTHROUGH */ 33295c635efSGarrett D'Amore case ('I'): 33395c635efSGarrett D'Amore gly = ESCAPE_FONTITALIC; 33495c635efSGarrett D'Amore break; 33595c635efSGarrett D'Amore case ('P'): 33695c635efSGarrett D'Amore gly = ESCAPE_FONTPREV; 33795c635efSGarrett D'Amore break; 33895c635efSGarrett D'Amore case ('1'): 33995c635efSGarrett D'Amore /* FALLTHROUGH */ 34095c635efSGarrett D'Amore case ('R'): 34195c635efSGarrett D'Amore gly = ESCAPE_FONTROMAN; 34295c635efSGarrett D'Amore break; 34395c635efSGarrett D'Amore } 34495c635efSGarrett D'Amore break; 34595c635efSGarrett D'Amore case (ESCAPE_SPECIAL): 346*698f87a4SGarrett D'Amore if (1 == *sz && 'c' == **start) 34795c635efSGarrett D'Amore gly = ESCAPE_NOSPACE; 34895c635efSGarrett D'Amore break; 34995c635efSGarrett D'Amore default: 35095c635efSGarrett D'Amore break; 35195c635efSGarrett D'Amore } 35295c635efSGarrett D'Amore 35395c635efSGarrett D'Amore return(gly); 35495c635efSGarrett D'Amore } 35595c635efSGarrett D'Amore 35695c635efSGarrett D'Amore void * 35795c635efSGarrett D'Amore mandoc_calloc(size_t num, size_t size) 35895c635efSGarrett D'Amore { 35995c635efSGarrett D'Amore void *ptr; 36095c635efSGarrett D'Amore 36195c635efSGarrett D'Amore ptr = calloc(num, size); 36295c635efSGarrett D'Amore if (NULL == ptr) { 36395c635efSGarrett D'Amore perror(NULL); 36495c635efSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR); 36595c635efSGarrett D'Amore } 36695c635efSGarrett D'Amore 36795c635efSGarrett D'Amore return(ptr); 36895c635efSGarrett D'Amore } 36995c635efSGarrett D'Amore 37095c635efSGarrett D'Amore 37195c635efSGarrett D'Amore void * 37295c635efSGarrett D'Amore mandoc_malloc(size_t size) 37395c635efSGarrett D'Amore { 37495c635efSGarrett D'Amore void *ptr; 37595c635efSGarrett D'Amore 37695c635efSGarrett D'Amore ptr = malloc(size); 37795c635efSGarrett D'Amore if (NULL == ptr) { 37895c635efSGarrett D'Amore perror(NULL); 37995c635efSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR); 38095c635efSGarrett D'Amore } 38195c635efSGarrett D'Amore 38295c635efSGarrett D'Amore return(ptr); 38395c635efSGarrett D'Amore } 38495c635efSGarrett D'Amore 38595c635efSGarrett D'Amore 38695c635efSGarrett D'Amore void * 38795c635efSGarrett D'Amore mandoc_realloc(void *ptr, size_t size) 38895c635efSGarrett D'Amore { 38995c635efSGarrett D'Amore 39095c635efSGarrett D'Amore ptr = realloc(ptr, size); 39195c635efSGarrett D'Amore if (NULL == ptr) { 39295c635efSGarrett D'Amore perror(NULL); 39395c635efSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR); 39495c635efSGarrett D'Amore } 39595c635efSGarrett D'Amore 39695c635efSGarrett D'Amore return(ptr); 39795c635efSGarrett D'Amore } 39895c635efSGarrett D'Amore 39995c635efSGarrett D'Amore char * 40095c635efSGarrett D'Amore mandoc_strndup(const char *ptr, size_t sz) 40195c635efSGarrett D'Amore { 40295c635efSGarrett D'Amore char *p; 40395c635efSGarrett D'Amore 40495c635efSGarrett D'Amore p = mandoc_malloc(sz + 1); 40595c635efSGarrett D'Amore memcpy(p, ptr, sz); 40695c635efSGarrett D'Amore p[(int)sz] = '\0'; 40795c635efSGarrett D'Amore return(p); 40895c635efSGarrett D'Amore } 40995c635efSGarrett D'Amore 41095c635efSGarrett D'Amore char * 41195c635efSGarrett D'Amore mandoc_strdup(const char *ptr) 41295c635efSGarrett D'Amore { 41395c635efSGarrett D'Amore char *p; 41495c635efSGarrett D'Amore 41595c635efSGarrett D'Amore p = strdup(ptr); 41695c635efSGarrett D'Amore if (NULL == p) { 41795c635efSGarrett D'Amore perror(NULL); 41895c635efSGarrett D'Amore exit((int)MANDOCLEVEL_SYSERR); 41995c635efSGarrett D'Amore } 42095c635efSGarrett D'Amore 42195c635efSGarrett D'Amore return(p); 42295c635efSGarrett D'Amore } 42395c635efSGarrett D'Amore 42495c635efSGarrett D'Amore /* 42595c635efSGarrett D'Amore * Parse a quoted or unquoted roff-style request or macro argument. 42695c635efSGarrett D'Amore * Return a pointer to the parsed argument, which is either the original 42795c635efSGarrett D'Amore * pointer or advanced by one byte in case the argument is quoted. 428*698f87a4SGarrett D'Amore * NUL-terminate the argument in place. 42995c635efSGarrett D'Amore * Collapse pairs of quotes inside quoted arguments. 43095c635efSGarrett D'Amore * Advance the argument pointer to the next argument, 431*698f87a4SGarrett D'Amore * or to the NUL byte terminating the argument line. 43295c635efSGarrett D'Amore */ 43395c635efSGarrett D'Amore char * 43495c635efSGarrett D'Amore mandoc_getarg(struct mparse *parse, char **cpp, int ln, int *pos) 43595c635efSGarrett D'Amore { 43695c635efSGarrett D'Amore char *start, *cp; 43795c635efSGarrett D'Amore int quoted, pairs, white; 43895c635efSGarrett D'Amore 43995c635efSGarrett D'Amore /* Quoting can only start with a new word. */ 44095c635efSGarrett D'Amore start = *cpp; 44195c635efSGarrett D'Amore quoted = 0; 44295c635efSGarrett D'Amore if ('"' == *start) { 44395c635efSGarrett D'Amore quoted = 1; 44495c635efSGarrett D'Amore start++; 44595c635efSGarrett D'Amore } 44695c635efSGarrett D'Amore 44795c635efSGarrett D'Amore pairs = 0; 44895c635efSGarrett D'Amore white = 0; 44995c635efSGarrett D'Amore for (cp = start; '\0' != *cp; cp++) { 450*698f87a4SGarrett D'Amore 451*698f87a4SGarrett D'Amore /* 452*698f87a4SGarrett D'Amore * Move the following text left 453*698f87a4SGarrett D'Amore * after quoted quotes and after "\\" and "\t". 454*698f87a4SGarrett D'Amore */ 45595c635efSGarrett D'Amore if (pairs) 45695c635efSGarrett D'Amore cp[-pairs] = cp[0]; 457*698f87a4SGarrett D'Amore 45895c635efSGarrett D'Amore if ('\\' == cp[0]) { 459*698f87a4SGarrett D'Amore /* 460*698f87a4SGarrett D'Amore * In copy mode, translate double to single 461*698f87a4SGarrett D'Amore * backslashes and backslash-t to literal tabs. 462*698f87a4SGarrett D'Amore */ 463*698f87a4SGarrett D'Amore switch (cp[1]) { 464*698f87a4SGarrett D'Amore case ('t'): 465*698f87a4SGarrett D'Amore cp[0] = '\t'; 466*698f87a4SGarrett D'Amore /* FALLTHROUGH */ 467*698f87a4SGarrett D'Amore case ('\\'): 46895c635efSGarrett D'Amore pairs++; 46995c635efSGarrett D'Amore cp++; 470*698f87a4SGarrett D'Amore break; 471*698f87a4SGarrett D'Amore case (' '): 47295c635efSGarrett D'Amore /* Skip escaped blanks. */ 473*698f87a4SGarrett D'Amore if (0 == quoted) 47495c635efSGarrett D'Amore cp++; 475*698f87a4SGarrett D'Amore break; 476*698f87a4SGarrett D'Amore default: 477*698f87a4SGarrett D'Amore break; 478*698f87a4SGarrett D'Amore } 47995c635efSGarrett D'Amore } else if (0 == quoted) { 48095c635efSGarrett D'Amore if (' ' == cp[0]) { 48195c635efSGarrett D'Amore /* Unescaped blanks end unquoted args. */ 48295c635efSGarrett D'Amore white = 1; 48395c635efSGarrett D'Amore break; 48495c635efSGarrett D'Amore } 48595c635efSGarrett D'Amore } else if ('"' == cp[0]) { 48695c635efSGarrett D'Amore if ('"' == cp[1]) { 48795c635efSGarrett D'Amore /* Quoted quotes collapse. */ 48895c635efSGarrett D'Amore pairs++; 48995c635efSGarrett D'Amore cp++; 49095c635efSGarrett D'Amore } else { 49195c635efSGarrett D'Amore /* Unquoted quotes end quoted args. */ 49295c635efSGarrett D'Amore quoted = 2; 49395c635efSGarrett D'Amore break; 49495c635efSGarrett D'Amore } 49595c635efSGarrett D'Amore } 49695c635efSGarrett D'Amore } 49795c635efSGarrett D'Amore 49895c635efSGarrett D'Amore /* Quoted argument without a closing quote. */ 49995c635efSGarrett D'Amore if (1 == quoted) 50095c635efSGarrett D'Amore mandoc_msg(MANDOCERR_BADQUOTE, parse, ln, *pos, NULL); 50195c635efSGarrett D'Amore 502*698f87a4SGarrett D'Amore /* NUL-terminate this argument and move to the next one. */ 50395c635efSGarrett D'Amore if (pairs) 50495c635efSGarrett D'Amore cp[-pairs] = '\0'; 50595c635efSGarrett D'Amore if ('\0' != *cp) { 50695c635efSGarrett D'Amore *cp++ = '\0'; 50795c635efSGarrett D'Amore while (' ' == *cp) 50895c635efSGarrett D'Amore cp++; 50995c635efSGarrett D'Amore } 51095c635efSGarrett D'Amore *pos += (int)(cp - start) + (quoted ? 1 : 0); 51195c635efSGarrett D'Amore *cpp = cp; 51295c635efSGarrett D'Amore 51395c635efSGarrett D'Amore if ('\0' == *cp && (white || ' ' == cp[-1])) 51495c635efSGarrett D'Amore mandoc_msg(MANDOCERR_EOLNSPACE, parse, ln, *pos, NULL); 51595c635efSGarrett D'Amore 51695c635efSGarrett D'Amore return(start); 51795c635efSGarrett D'Amore } 51895c635efSGarrett D'Amore 51995c635efSGarrett D'Amore static int 52095c635efSGarrett D'Amore a2time(time_t *t, const char *fmt, const char *p) 52195c635efSGarrett D'Amore { 52295c635efSGarrett D'Amore struct tm tm; 52395c635efSGarrett D'Amore char *pp; 52495c635efSGarrett D'Amore 52595c635efSGarrett D'Amore memset(&tm, 0, sizeof(struct tm)); 52695c635efSGarrett D'Amore 52795c635efSGarrett D'Amore pp = NULL; 52895c635efSGarrett D'Amore #ifdef HAVE_STRPTIME 52995c635efSGarrett D'Amore pp = strptime(p, fmt, &tm); 53095c635efSGarrett D'Amore #endif 53195c635efSGarrett D'Amore if (NULL != pp && '\0' == *pp) { 53295c635efSGarrett D'Amore *t = mktime(&tm); 53395c635efSGarrett D'Amore return(1); 53495c635efSGarrett D'Amore } 53595c635efSGarrett D'Amore 53695c635efSGarrett D'Amore return(0); 53795c635efSGarrett D'Amore } 53895c635efSGarrett D'Amore 53995c635efSGarrett D'Amore static char * 54095c635efSGarrett D'Amore time2a(time_t t) 54195c635efSGarrett D'Amore { 54295c635efSGarrett D'Amore struct tm *tm; 54395c635efSGarrett D'Amore char *buf, *p; 54495c635efSGarrett D'Amore size_t ssz; 54595c635efSGarrett D'Amore int isz; 54695c635efSGarrett D'Amore 54795c635efSGarrett D'Amore tm = localtime(&t); 54895c635efSGarrett D'Amore 54995c635efSGarrett D'Amore /* 55095c635efSGarrett D'Amore * Reserve space: 55195c635efSGarrett D'Amore * up to 9 characters for the month (September) + blank 55295c635efSGarrett D'Amore * up to 2 characters for the day + comma + blank 55395c635efSGarrett D'Amore * 4 characters for the year and a terminating '\0' 55495c635efSGarrett D'Amore */ 55595c635efSGarrett D'Amore p = buf = mandoc_malloc(10 + 4 + 4 + 1); 55695c635efSGarrett D'Amore 55795c635efSGarrett D'Amore if (0 == (ssz = strftime(p, 10 + 1, "%B ", tm))) 55895c635efSGarrett D'Amore goto fail; 55995c635efSGarrett D'Amore p += (int)ssz; 56095c635efSGarrett D'Amore 56195c635efSGarrett D'Amore if (-1 == (isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday))) 56295c635efSGarrett D'Amore goto fail; 56395c635efSGarrett D'Amore p += isz; 56495c635efSGarrett D'Amore 56595c635efSGarrett D'Amore if (0 == strftime(p, 4 + 1, "%Y", tm)) 56695c635efSGarrett D'Amore goto fail; 56795c635efSGarrett D'Amore return(buf); 56895c635efSGarrett D'Amore 56995c635efSGarrett D'Amore fail: 57095c635efSGarrett D'Amore free(buf); 57195c635efSGarrett D'Amore return(NULL); 57295c635efSGarrett D'Amore } 57395c635efSGarrett D'Amore 57495c635efSGarrett D'Amore char * 57595c635efSGarrett D'Amore mandoc_normdate(struct mparse *parse, char *in, int ln, int pos) 57695c635efSGarrett D'Amore { 57795c635efSGarrett D'Amore char *out; 57895c635efSGarrett D'Amore time_t t; 57995c635efSGarrett D'Amore 58095c635efSGarrett D'Amore if (NULL == in || '\0' == *in || 58195c635efSGarrett D'Amore 0 == strcmp(in, "$" "Mdocdate$")) { 58295c635efSGarrett D'Amore mandoc_msg(MANDOCERR_NODATE, parse, ln, pos, NULL); 58395c635efSGarrett D'Amore time(&t); 58495c635efSGarrett D'Amore } 58595c635efSGarrett D'Amore else if (a2time(&t, "%Y-%m-%d", in)) 58695c635efSGarrett D'Amore t = 0; 58795c635efSGarrett D'Amore else if (!a2time(&t, "$" "Mdocdate: %b %d %Y $", in) && 58895c635efSGarrett D'Amore !a2time(&t, "%b %d, %Y", in)) { 58995c635efSGarrett D'Amore mandoc_msg(MANDOCERR_BADDATE, parse, ln, pos, NULL); 59095c635efSGarrett D'Amore t = 0; 59195c635efSGarrett D'Amore } 59295c635efSGarrett D'Amore out = t ? time2a(t) : NULL; 59395c635efSGarrett D'Amore return(out ? out : mandoc_strdup(in)); 59495c635efSGarrett D'Amore } 59595c635efSGarrett D'Amore 59695c635efSGarrett D'Amore int 59795c635efSGarrett D'Amore mandoc_eos(const char *p, size_t sz, int enclosed) 59895c635efSGarrett D'Amore { 59995c635efSGarrett D'Amore const char *q; 60095c635efSGarrett D'Amore int found; 60195c635efSGarrett D'Amore 60295c635efSGarrett D'Amore if (0 == sz) 60395c635efSGarrett D'Amore return(0); 60495c635efSGarrett D'Amore 60595c635efSGarrett D'Amore /* 60695c635efSGarrett D'Amore * End-of-sentence recognition must include situations where 60795c635efSGarrett D'Amore * some symbols, such as `)', allow prior EOS punctuation to 60895c635efSGarrett D'Amore * propagate outward. 60995c635efSGarrett D'Amore */ 61095c635efSGarrett D'Amore 61195c635efSGarrett D'Amore found = 0; 61295c635efSGarrett D'Amore for (q = p + (int)sz - 1; q >= p; q--) { 61395c635efSGarrett D'Amore switch (*q) { 61495c635efSGarrett D'Amore case ('\"'): 61595c635efSGarrett D'Amore /* FALLTHROUGH */ 61695c635efSGarrett D'Amore case ('\''): 61795c635efSGarrett D'Amore /* FALLTHROUGH */ 61895c635efSGarrett D'Amore case (']'): 61995c635efSGarrett D'Amore /* FALLTHROUGH */ 62095c635efSGarrett D'Amore case (')'): 62195c635efSGarrett D'Amore if (0 == found) 62295c635efSGarrett D'Amore enclosed = 1; 62395c635efSGarrett D'Amore break; 62495c635efSGarrett D'Amore case ('.'): 62595c635efSGarrett D'Amore /* FALLTHROUGH */ 62695c635efSGarrett D'Amore case ('!'): 62795c635efSGarrett D'Amore /* FALLTHROUGH */ 62895c635efSGarrett D'Amore case ('?'): 62995c635efSGarrett D'Amore found = 1; 63095c635efSGarrett D'Amore break; 63195c635efSGarrett D'Amore default: 63295c635efSGarrett D'Amore return(found && (!enclosed || isalnum((unsigned char)*q))); 63395c635efSGarrett D'Amore } 63495c635efSGarrett D'Amore } 63595c635efSGarrett D'Amore 63695c635efSGarrett D'Amore return(found && !enclosed); 63795c635efSGarrett D'Amore } 63895c635efSGarrett D'Amore 63995c635efSGarrett D'Amore /* 64095c635efSGarrett D'Amore * Convert a string to a long that may not be <0. 64195c635efSGarrett D'Amore * If the string is invalid, or is less than 0, return -1. 64295c635efSGarrett D'Amore */ 64395c635efSGarrett D'Amore int 64495c635efSGarrett D'Amore mandoc_strntoi(const char *p, size_t sz, int base) 64595c635efSGarrett D'Amore { 64695c635efSGarrett D'Amore char buf[32]; 64795c635efSGarrett D'Amore char *ep; 64895c635efSGarrett D'Amore long v; 64995c635efSGarrett D'Amore 65095c635efSGarrett D'Amore if (sz > 31) 65195c635efSGarrett D'Amore return(-1); 65295c635efSGarrett D'Amore 65395c635efSGarrett D'Amore memcpy(buf, p, sz); 65495c635efSGarrett D'Amore buf[(int)sz] = '\0'; 65595c635efSGarrett D'Amore 65695c635efSGarrett D'Amore errno = 0; 65795c635efSGarrett D'Amore v = strtol(buf, &ep, base); 65895c635efSGarrett D'Amore 65995c635efSGarrett D'Amore if (buf[0] == '\0' || *ep != '\0') 66095c635efSGarrett D'Amore return(-1); 66195c635efSGarrett D'Amore 66295c635efSGarrett D'Amore if (v > INT_MAX) 66395c635efSGarrett D'Amore v = INT_MAX; 66495c635efSGarrett D'Amore if (v < INT_MIN) 66595c635efSGarrett D'Amore v = INT_MIN; 66695c635efSGarrett D'Amore 66795c635efSGarrett D'Amore return((int)v); 66895c635efSGarrett D'Amore } 669