xref: /freebsd/contrib/mandoc/mandoc.c (revision 6c05f3a74f30934ee60919cc97e16ec69b542b06)
1 /* $Id: mandoc.c,v 1.121 2022/05/19 15:37:47 schwarze Exp $ */
2 /*
3  * Copyright (c) 2010, 2011, 2015, 2017, 2018, 2019, 2020, 2021
4  *               Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2009, 2010 Kristaps Dzonsons <kristaps@bsd.lv>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  * Utility functions to handle end of sentence punctuation
20  * and dates and times, for use by mdoc(7) and man(7) parsers.
21  * Utility functions to handle fonts and numbers,
22  * for use by mandoc(1) parsers and formatters.
23  */
24 #include "config.h"
25 
26 #include <sys/types.h>
27 
28 #include <assert.h>
29 #include <ctype.h>
30 #include <errno.h>
31 #include <limits.h>
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <time.h>
36 
37 #include "mandoc_aux.h"
38 #include "mandoc.h"
39 #include "roff.h"
40 #include "libmandoc.h"
41 #include "roff_int.h"
42 
43 static	int	 a2time(time_t *, const char *, const char *);
44 static	char	*time2a(time_t);
45 
46 
47 enum mandoc_esc
48 mandoc_font(const char *cp, int sz)
49 {
50 	switch (sz) {
51 	case 0:
52 		return ESCAPE_FONTPREV;
53 	case 1:
54 		switch (cp[0]) {
55 		case 'B':
56 		case '3':
57 			return ESCAPE_FONTBOLD;
58 		case 'I':
59 		case '2':
60 			return ESCAPE_FONTITALIC;
61 		case 'P':
62 			return ESCAPE_FONTPREV;
63 		case 'R':
64 		case '1':
65 			return ESCAPE_FONTROMAN;
66 		case '4':
67 			return ESCAPE_FONTBI;
68 		default:
69 			return ESCAPE_ERROR;
70 		}
71 	case 2:
72 		switch (cp[0]) {
73 		case 'B':
74 			switch (cp[1]) {
75 			case 'I':
76 				return ESCAPE_FONTBI;
77 			default:
78 				return ESCAPE_ERROR;
79 			}
80 		case 'C':
81 			switch (cp[1]) {
82 			case 'B':
83 				return ESCAPE_FONTCB;
84 			case 'I':
85 				return ESCAPE_FONTCI;
86 			case 'R':
87 			case 'W':
88 				return ESCAPE_FONTCR;
89 			default:
90 				return ESCAPE_ERROR;
91 			}
92 		default:
93 			return ESCAPE_ERROR;
94 		}
95 	default:
96 		return ESCAPE_ERROR;
97 	}
98 }
99 
100 static int
101 a2time(time_t *t, const char *fmt, const char *p)
102 {
103 	struct tm	 tm;
104 	char		*pp;
105 
106 	memset(&tm, 0, sizeof(struct tm));
107 
108 	pp = NULL;
109 #if HAVE_STRPTIME
110 	pp = strptime(p, fmt, &tm);
111 #endif
112 	if (NULL != pp && '\0' == *pp) {
113 		*t = mktime(&tm);
114 		return 1;
115 	}
116 
117 	return 0;
118 }
119 
120 static char *
121 time2a(time_t t)
122 {
123 	struct tm	*tm;
124 	char		*buf, *p;
125 	size_t		 ssz;
126 	int		 isz;
127 
128 	buf = NULL;
129 	tm = localtime(&t);
130 	if (tm == NULL)
131 		goto fail;
132 
133 	/*
134 	 * Reserve space:
135 	 * up to 9 characters for the month (September) + blank
136 	 * up to 2 characters for the day + comma + blank
137 	 * 4 characters for the year and a terminating '\0'
138 	 */
139 
140 	p = buf = mandoc_malloc(10 + 4 + 4 + 1);
141 
142 	if ((ssz = strftime(p, 10 + 1, "%B ", tm)) == 0)
143 		goto fail;
144 	p += (int)ssz;
145 
146 	/*
147 	 * The output format is just "%d" here, not "%2d" or "%02d".
148 	 * That's also the reason why we can't just format the
149 	 * date as a whole with "%B %e, %Y" or "%B %d, %Y".
150 	 * Besides, the present approach is less prone to buffer
151 	 * overflows, in case anybody should ever introduce the bug
152 	 * of looking at LC_TIME.
153 	 */
154 
155 	isz = snprintf(p, 4 + 1, "%d, ", tm->tm_mday);
156 	if (isz < 0 || isz > 4)
157 		goto fail;
158 	p += isz;
159 
160 	if (strftime(p, 4 + 1, "%Y", tm) == 0)
161 		goto fail;
162 	return buf;
163 
164 fail:
165 	free(buf);
166 	return mandoc_strdup("");
167 }
168 
169 char *
170 mandoc_normdate(struct roff_node *nch, struct roff_node *nbl)
171 {
172 	char		*cp;
173 	time_t		 t;
174 
175 	/* No date specified. */
176 
177 	if (nch == NULL) {
178 		if (nbl == NULL)
179 			mandoc_msg(MANDOCERR_DATE_MISSING, 0, 0, NULL);
180 		else
181 			mandoc_msg(MANDOCERR_DATE_MISSING, nbl->line,
182 			    nbl->pos, "%s", roff_name[nbl->tok]);
183 		return mandoc_strdup("");
184 	}
185 	if (*nch->string == '\0') {
186 		mandoc_msg(MANDOCERR_DATE_MISSING, nch->line,
187 		    nch->pos, "%s", roff_name[nbl->tok]);
188 		return mandoc_strdup("");
189 	}
190 	if (strcmp(nch->string, "$" "Mdocdate$") == 0)
191 		return time2a(time(NULL));
192 
193 	/* Valid mdoc(7) date format. */
194 
195 	if (a2time(&t, "$" "Mdocdate: %b %d %Y $", nch->string) ||
196 	    a2time(&t, "%b %d, %Y", nch->string)) {
197 		cp = time2a(t);
198 		if (t > time(NULL) + 86400)
199 			mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line,
200 			    nch->pos, "%s %s", roff_name[nbl->tok], cp);
201 		else if (*nch->string != '$' &&
202 		    strcmp(nch->string, cp) != 0)
203 			mandoc_msg(MANDOCERR_DATE_NORM, nch->line,
204 			    nch->pos, "%s %s", roff_name[nbl->tok], cp);
205 		return cp;
206 	}
207 
208 	/* In man(7), do not warn about the legacy format. */
209 
210 	if (a2time(&t, "%Y-%m-%d", nch->string) == 0)
211 		mandoc_msg(MANDOCERR_DATE_BAD, nch->line, nch->pos,
212 		    "%s %s", roff_name[nbl->tok], nch->string);
213 	else if (t > time(NULL) + 86400)
214 		mandoc_msg(MANDOCERR_DATE_FUTURE, nch->line, nch->pos,
215 		    "%s %s", roff_name[nbl->tok], nch->string);
216 	else if (nbl->tok == MDOC_Dd)
217 		mandoc_msg(MANDOCERR_DATE_LEGACY, nch->line, nch->pos,
218 		    "Dd %s", nch->string);
219 
220 	/* Use any non-mdoc(7) date verbatim. */
221 
222 	return mandoc_strdup(nch->string);
223 }
224 
225 int
226 mandoc_eos(const char *p, size_t sz)
227 {
228 	const char	*q;
229 	int		 enclosed, found;
230 
231 	if (0 == sz)
232 		return 0;
233 
234 	/*
235 	 * End-of-sentence recognition must include situations where
236 	 * some symbols, such as `)', allow prior EOS punctuation to
237 	 * propagate outward.
238 	 */
239 
240 	enclosed = found = 0;
241 	for (q = p + (int)sz - 1; q >= p; q--) {
242 		switch (*q) {
243 		case '\"':
244 		case '\'':
245 		case ']':
246 		case ')':
247 			if (0 == found)
248 				enclosed = 1;
249 			break;
250 		case '.':
251 		case '!':
252 		case '?':
253 			found = 1;
254 			break;
255 		default:
256 			return found &&
257 			    (!enclosed || isalnum((unsigned char)*q));
258 		}
259 	}
260 
261 	return found && !enclosed;
262 }
263 
264 /*
265  * Convert a string to a long that may not be <0.
266  * If the string is invalid, or is less than 0, return -1.
267  */
268 int
269 mandoc_strntoi(const char *p, size_t sz, int base)
270 {
271 	char		 buf[32];
272 	char		*ep;
273 	long		 v;
274 
275 	if (sz > 31)
276 		return -1;
277 
278 	memcpy(buf, p, sz);
279 	buf[(int)sz] = '\0';
280 
281 	errno = 0;
282 	v = strtol(buf, &ep, base);
283 
284 	if (buf[0] == '\0' || *ep != '\0')
285 		return -1;
286 
287 	if (v > INT_MAX)
288 		v = INT_MAX;
289 	if (v < INT_MIN)
290 		v = INT_MIN;
291 
292 	return (int)v;
293 }
294