1 /* $Id: mandoc.c,v 1.121 2022/05/19 15:37:47 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021 4 * Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * Utility functions to handle end of sentence punctuation 20 * and dates and times, for use by mdoc(7) and man(7) parsers. 21 * Utility functions to handle fonts and numbers, 22 * for use by mandoc(1) parsers and formatters. 23 */ 24 #include "config.h" 25 26 #include <sys/types.h> 27 28 #include <assert.h> 29 #include <ctype.h> 30 #include <errno.h> 31 #include <limits.h> 32 #include <stdlib.h> 33 #include <stdio.h> 34 #include <string.h> 35 #include <time.h> 36 37 #include "mandoc_aux.h" 38 #include "mandoc.h" 39 #include "roff.h" 40 #include "libmandoc.h" 41 #include "roff_int.h" 42 43 static int a2time(time_t *, const char *, const char *); 44 static char *time2a(time_t); 45 46 47 enum mandoc_esc 48 mandoc_font(const char *cp, int sz) 49 { 50 switch (sz) { 51 case 0: 52 return ESCAPE_FONTPREV; 53 case 1: 54 switch (cp[0]) { 55 case 'B': 56 case '3': 57 return ESCAPE_FONTBOLD; 58 case 'I': 59 case '2': 60 return ESCAPE_FONTITALIC; 61 case 'P': 62 return ESCAPE_FONTPREV; 63 case 'R': 64 case '1': 65 return ESCAPE_FONTROMAN; 66 case '4': 67 return ESCAPE_FONTBI; 68 default: 69 return ESCAPE_ERROR; 70 } 71 case 2: 72 switch (cp[0]) { 73 case 'B': 74 switch (cp[1]) { 75 case 'I': 76 return ESCAPE_FONTBI; 77 default: 78 return ESCAPE_ERROR; 79 } 80 case 'C': 81 switch (cp[1]) { 82 case 'B': 83 return ESCAPE_FONTCB; 84 case 'I': 85 return ESCAPE_FONTCI; 86 case 'R': 87 case 'W': 88 return ESCAPE_FONTCR; 89 default: 90 return ESCAPE_ERROR; 91 } 92 default: 93 return ESCAPE_ERROR; 94 } 95 default: 96 return ESCAPE_ERROR; 97 } 98 } 99 100 static int 101 a2time(time_t *t, const char *fmt, const char *p) 102 { 103 struct tm tm; 104 char *pp; 105 106 memset(&tm, 0, sizeof(struct tm)); 107 108 pp = NULL; 109 #if HAVE_STRPTIME 110 pp = strptime(p, fmt, &tm); 111 #endif 112 if (NULL != pp && '\0' == *pp) { 113 *t = mktime(&tm); 114 return 1; 115 } 116 117 return 0; 118 } 119 120 static char * 121 time2a(time_t t) 122 { 123 struct tm *tm; 124 char *buf, *p; 125 size_t ssz; 126 int isz; 127 128 buf = NULL; 129 tm = localtime(&t); 130 if (tm == NULL) 131 goto fail; 132 133 /* 134 * Reserve space: 135 * up to 9 characters for the month (September) + blank 136 * up to 2 characters for the day + comma + blank 137 * 4 characters for the year and a terminating '\0' 138 */ 139 140 p = buf = mandoc_malloc(10 + 4 + 4 + 1); 141 142 if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0) 143 goto fail; 144 p += (int)ssz; 145 146 /* 147 * The output format is just "%d" here, not "%2d" or "%02d". 148 * That's also the reason why we can't just format the 149 * date as a whole with "%B %e, %Y" or "%B %d, %Y". 150 * Besides, the present approach is less prone to buffer 151 * overflows, in case anybody should ever introduce the bug 152 * of looking at LC_TIME. 153 */ 154 155 isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday); 156 if (isz < 0 || isz > 4) 157 goto fail; 158 p += isz; 159 160 if (strftime(p, 4 + 1, "%Y", tm) == 0) 161 goto fail; 162 return buf; 163 164 fail: 165 free(buf); 166 return mandoc_strdup(""); 167 } 168 169 char * 170 mandoc_normdate(struct roff_node *nch, struct roff_node *nbl) 171 { 172 char *cp; 173 time_t t; 174 175 /* No date specified. */ 176 177 if (nch == NULL) { 178 if (nbl == NULL) 179 mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL); 180 else 181 mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line, 182 nbl->pos, "%s", roff_name[nbl->tok]); 183 return mandoc_strdup(""); 184 } 185 if (*nch->string == '\0') { 186 mandoc_msg(MANDOCERR_DATE_MISSING, nch->line, 187 nch->pos, "%s", roff_name[nbl->tok]); 188 return mandoc_strdup(""); 189 } 190 if (strcmp(nch->string, "$" "Mdocdate$") == 0) 191 return time2a(time(NULL)); 192 193 /* Valid mdoc(7) date format. */ 194 195 if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) || 196 a2time(&t, "%b %d, %Y", nch->string)) { 197 cp = time2a(t); 198 if (t > time(NULL) + 86400) 199 mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, 200 nch->pos, "%s %s", roff_name[nbl->tok], cp); 201 else if (*nch->string != '$' && 202 strcmp(nch->string, cp) != 0) 203 mandoc_msg(MANDOCERR_DATE_NORM, nch->line, 204 nch->pos, "%s %s", roff_name[nbl->tok], cp); 205 return cp; 206 } 207 208 /* In man(7), do not warn about the legacy format. */ 209 210 if (a2time(&t, "%Y-%m-%d", nch->string) == 0) 211 mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos, 212 "%s %s", roff_name[nbl->tok], nch->string); 213 else if (t > time(NULL) + 86400) 214 mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos, 215 "%s %s", roff_name[nbl->tok], nch->string); 216 else if (nbl->tok == MDOC_Dd) 217 mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos, 218 "Dd %s", nch->string); 219 220 /* Use any non-mdoc(7) date verbatim. */ 221 222 return mandoc_strdup(nch->string); 223 } 224 225 int 226 mandoc_eos(const char *p, size_t sz) 227 { 228 const char *q; 229 int enclosed, found; 230 231 if (0 == sz) 232 return 0; 233 234 /* 235 * End-of-sentence recognition must include situations where 236 * some symbols, such as `)', allow prior EOS punctuation to 237 * propagate outward. 238 */ 239 240 enclosed = found = 0; 241 for (q = p + (int)sz - 1; q >= p; q--) { 242 switch (*q) { 243 case '\"': 244 case '\'': 245 case ']': 246 case ')': 247 if (0 == found) 248 enclosed = 1; 249 break; 250 case '.': 251 case '!': 252 case '?': 253 found = 1; 254 break; 255 default: 256 return found && 257 (!enclosed || isalnum((unsigned char)*q)); 258 } 259 } 260 261 return found && !enclosed; 262 } 263 264 /* 265 * Convert a string to a long that may not be <0. 266 * If the string is invalid, or is less than 0, return -1. 267 */ 268 int 269 mandoc_strntoi(const char *p, size_t sz, int base) 270 { 271 char buf[32]; 272 char *ep; 273 long v; 274 275 if (sz > 31) 276 return -1; 277 278 memcpy(buf, p, sz); 279 buf[(int)sz] = '\0'; 280 281 errno = 0; 282 v = strtol(buf, &ep, base); 283 284 if (buf[0] == '\0' || *ep != '\0') 285 return -1; 286 287 if (v > INT_MAX) 288 v = INT_MAX; 289 if (v < INT_MIN) 290 v = INT_MIN; 291 292 return (int)v; 293 } 294