xref: /illumos-gate/usr/src/cmd/mandoc/man_validate.c (revision 371584c2eae4cf827fd406ba26c14f021adaaa70)
1 /*	$OpenBSD$ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012-2016 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <errno.h>
25 #include <limits.h>
26 #include <stdarg.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <time.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc.h"
33 #include "roff.h"
34 #include "man.h"
35 #include "libmandoc.h"
36 #include "roff_int.h"
37 #include "libman.h"
38 
39 #define	CHKARGS	  struct roff_man *man, struct roff_node *n
40 
41 typedef	void	(*v_check)(CHKARGS);
42 
43 static	void	  check_par(CHKARGS);
44 static	void	  check_part(CHKARGS);
45 static	void	  check_root(CHKARGS);
46 static	void	  check_text(CHKARGS);
47 
48 static	void	  post_AT(CHKARGS);
49 static	void	  post_IP(CHKARGS);
50 static	void	  post_vs(CHKARGS);
51 static	void	  post_ft(CHKARGS);
52 static	void	  post_OP(CHKARGS);
53 static	void	  post_TH(CHKARGS);
54 static	void	  post_UC(CHKARGS);
55 static	void	  post_UR(CHKARGS);
56 
57 static	v_check man_valids[MAN_MAX] = {
58 	post_vs,    /* br */
59 	post_TH,    /* TH */
60 	NULL,       /* SH */
61 	NULL,       /* SS */
62 	NULL,       /* TP */
63 	check_par,  /* LP */
64 	check_par,  /* PP */
65 	check_par,  /* P */
66 	post_IP,    /* IP */
67 	NULL,       /* HP */
68 	NULL,       /* SM */
69 	NULL,       /* SB */
70 	NULL,       /* BI */
71 	NULL,       /* IB */
72 	NULL,       /* BR */
73 	NULL,       /* RB */
74 	NULL,       /* R */
75 	NULL,       /* B */
76 	NULL,       /* I */
77 	NULL,       /* IR */
78 	NULL,       /* RI */
79 	post_vs,    /* sp */
80 	NULL,       /* nf */
81 	NULL,       /* fi */
82 	NULL,       /* RE */
83 	check_part, /* RS */
84 	NULL,       /* DT */
85 	post_UC,    /* UC */
86 	NULL,       /* PD */
87 	post_AT,    /* AT */
88 	NULL,       /* in */
89 	post_ft,    /* ft */
90 	post_OP,    /* OP */
91 	NULL,       /* EX */
92 	NULL,       /* EE */
93 	post_UR,    /* UR */
94 	NULL,       /* UE */
95 	NULL,       /* ll */
96 };
97 
98 
99 void
100 man_node_validate(struct roff_man *man)
101 {
102 	struct roff_node *n;
103 	v_check		*cp;
104 
105 	n = man->last;
106 	man->last = man->last->child;
107 	while (man->last != NULL) {
108 		man_node_validate(man);
109 		if (man->last == n)
110 			man->last = man->last->child;
111 		else
112 			man->last = man->last->next;
113 	}
114 
115 	man->last = n;
116 	man->next = ROFF_NEXT_SIBLING;
117 	switch (n->type) {
118 	case ROFFT_TEXT:
119 		check_text(man, n);
120 		break;
121 	case ROFFT_ROOT:
122 		check_root(man, n);
123 		break;
124 	case ROFFT_EQN:
125 	case ROFFT_TBL:
126 		break;
127 	default:
128 		cp = man_valids + n->tok;
129 		if (*cp)
130 			(*cp)(man, n);
131 		if (man->last == n)
132 			man_state(man, n);
133 		break;
134 	}
135 }
136 
137 static void
138 check_root(CHKARGS)
139 {
140 
141 	assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0);
142 
143 	if (NULL == man->first->child)
144 		mandoc_msg(MANDOCERR_DOC_EMPTY, man->parse,
145 		    n->line, n->pos, NULL);
146 	else
147 		man->meta.hasbody = 1;
148 
149 	if (NULL == man->meta.title) {
150 		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
151 		    n->line, n->pos, NULL);
152 
153 		/*
154 		 * If a title hasn't been set, do so now (by
155 		 * implication, date and section also aren't set).
156 		 */
157 
158 		man->meta.title = mandoc_strdup("");
159 		man->meta.msec = mandoc_strdup("");
160 		man->meta.date = man->quick ? mandoc_strdup("") :
161 		    mandoc_normdate(man->parse, NULL, n->line, n->pos);
162 	}
163 }
164 
165 static void
166 check_text(CHKARGS)
167 {
168 	char		*cp, *p;
169 
170 	if (MAN_LITERAL & man->flags)
171 		return;
172 
173 	cp = n->string;
174 	for (p = cp; NULL != (p = strchr(p, '\t')); p++)
175 		mandoc_msg(MANDOCERR_FI_TAB, man->parse,
176 		    n->line, n->pos + (p - cp), NULL);
177 }
178 
179 static void
180 post_OP(CHKARGS)
181 {
182 
183 	if (n->child == NULL)
184 		mandoc_msg(MANDOCERR_OP_EMPTY, man->parse,
185 		    n->line, n->pos, "OP");
186 	else if (n->child->next != NULL && n->child->next->next != NULL) {
187 		n = n->child->next->next;
188 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
189 		    n->line, n->pos, "OP ... %s", n->string);
190 	}
191 }
192 
193 static void
194 post_UR(CHKARGS)
195 {
196 
197 	if (n->type == ROFFT_HEAD && n->child == NULL)
198 		mandoc_vmsg(MANDOCERR_UR_NOHEAD, man->parse,
199 		    n->line, n->pos, "UR");
200 	check_part(man, n);
201 }
202 
203 static void
204 post_ft(CHKARGS)
205 {
206 	char	*cp;
207 	int	 ok;
208 
209 	if (n->child == NULL)
210 		return;
211 
212 	ok = 0;
213 	cp = n->child->string;
214 	switch (*cp) {
215 	case '1':
216 	case '2':
217 	case '3':
218 	case '4':
219 	case 'I':
220 	case 'P':
221 	case 'R':
222 		if ('\0' == cp[1])
223 			ok = 1;
224 		break;
225 	case 'B':
226 		if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2]))
227 			ok = 1;
228 		break;
229 	case 'C':
230 		if ('W' == cp[1] && '\0' == cp[2])
231 			ok = 1;
232 		break;
233 	default:
234 		break;
235 	}
236 
237 	if (0 == ok) {
238 		mandoc_vmsg(MANDOCERR_FT_BAD, man->parse,
239 		    n->line, n->pos, "ft %s", cp);
240 		*cp = '\0';
241 	}
242 }
243 
244 static void
245 check_part(CHKARGS)
246 {
247 
248 	if (n->type == ROFFT_BODY && n->child == NULL)
249 		mandoc_msg(MANDOCERR_BLK_EMPTY, man->parse,
250 		    n->line, n->pos, man_macronames[n->tok]);
251 }
252 
253 static void
254 check_par(CHKARGS)
255 {
256 
257 	switch (n->type) {
258 	case ROFFT_BLOCK:
259 		if (n->body->child == NULL)
260 			roff_node_delete(man, n);
261 		break;
262 	case ROFFT_BODY:
263 		if (n->child == NULL)
264 			mandoc_vmsg(MANDOCERR_PAR_SKIP,
265 			    man->parse, n->line, n->pos,
266 			    "%s empty", man_macronames[n->tok]);
267 		break;
268 	case ROFFT_HEAD:
269 		if (n->child != NULL)
270 			mandoc_vmsg(MANDOCERR_ARG_SKIP,
271 			    man->parse, n->line, n->pos,
272 			    "%s %s%s", man_macronames[n->tok],
273 			    n->child->string,
274 			    n->child->next != NULL ? " ..." : "");
275 		break;
276 	default:
277 		break;
278 	}
279 }
280 
281 static void
282 post_IP(CHKARGS)
283 {
284 
285 	switch (n->type) {
286 	case ROFFT_BLOCK:
287 		if (n->head->child == NULL && n->body->child == NULL)
288 			roff_node_delete(man, n);
289 		break;
290 	case ROFFT_BODY:
291 		if (n->parent->head->child == NULL && n->child == NULL)
292 			mandoc_vmsg(MANDOCERR_PAR_SKIP,
293 			    man->parse, n->line, n->pos,
294 			    "%s empty", man_macronames[n->tok]);
295 		break;
296 	default:
297 		break;
298 	}
299 }
300 
301 static void
302 post_TH(CHKARGS)
303 {
304 	struct roff_node *nb;
305 	const char	*p;
306 
307 	free(man->meta.title);
308 	free(man->meta.vol);
309 	free(man->meta.os);
310 	free(man->meta.msec);
311 	free(man->meta.date);
312 
313 	man->meta.title = man->meta.vol = man->meta.date =
314 	    man->meta.msec = man->meta.os = NULL;
315 
316 	nb = n;
317 
318 	/* ->TITLE<- MSEC DATE OS VOL */
319 
320 	n = n->child;
321 	if (n && n->string) {
322 		for (p = n->string; '\0' != *p; p++) {
323 			/* Only warn about this once... */
324 			if (isalpha((unsigned char)*p) &&
325 			    ! isupper((unsigned char)*p)) {
326 				mandoc_vmsg(MANDOCERR_TITLE_CASE,
327 				    man->parse, n->line,
328 				    n->pos + (p - n->string),
329 				    "TH %s", n->string);
330 				break;
331 			}
332 		}
333 		man->meta.title = mandoc_strdup(n->string);
334 	} else {
335 		man->meta.title = mandoc_strdup("");
336 		mandoc_msg(MANDOCERR_TH_NOTITLE, man->parse,
337 		    nb->line, nb->pos, "TH");
338 	}
339 
340 	/* TITLE ->MSEC<- DATE OS VOL */
341 
342 	if (n)
343 		n = n->next;
344 	if (n && n->string)
345 		man->meta.msec = mandoc_strdup(n->string);
346 	else {
347 		man->meta.msec = mandoc_strdup("");
348 		mandoc_vmsg(MANDOCERR_MSEC_MISSING, man->parse,
349 		    nb->line, nb->pos, "TH %s", man->meta.title);
350 	}
351 
352 	/* TITLE MSEC ->DATE<- OS VOL */
353 
354 	if (n)
355 		n = n->next;
356 	if (n && n->string && '\0' != n->string[0]) {
357 		man->meta.date = man->quick ?
358 		    mandoc_strdup(n->string) :
359 		    mandoc_normdate(man->parse, n->string,
360 			n->line, n->pos);
361 	} else {
362 		man->meta.date = mandoc_strdup("");
363 		mandoc_msg(MANDOCERR_DATE_MISSING, man->parse,
364 		    n ? n->line : nb->line,
365 		    n ? n->pos : nb->pos, "TH");
366 	}
367 
368 	/* TITLE MSEC DATE ->OS<- VOL */
369 
370 	if (n && (n = n->next))
371 		man->meta.os = mandoc_strdup(n->string);
372 	else if (man->defos != NULL)
373 		man->meta.os = mandoc_strdup(man->defos);
374 
375 	/* TITLE MSEC DATE OS ->VOL<- */
376 	/* If missing, use the default VOL name for MSEC. */
377 
378 	if (n && (n = n->next))
379 		man->meta.vol = mandoc_strdup(n->string);
380 	else if ('\0' != man->meta.msec[0] &&
381 	    (NULL != (p = mandoc_a2msec(man->meta.msec))))
382 		man->meta.vol = mandoc_strdup(p);
383 
384 	if (n != NULL && (n = n->next) != NULL)
385 		mandoc_vmsg(MANDOCERR_ARG_EXCESS, man->parse,
386 		    n->line, n->pos, "TH ... %s", n->string);
387 
388 	/*
389 	 * Remove the `TH' node after we've processed it for our
390 	 * meta-data.
391 	 */
392 	roff_node_delete(man, man->last);
393 }
394 
395 static void
396 post_UC(CHKARGS)
397 {
398 	static const char * const bsd_versions[] = {
399 	    "3rd Berkeley Distribution",
400 	    "4th Berkeley Distribution",
401 	    "4.2 Berkeley Distribution",
402 	    "4.3 Berkeley Distribution",
403 	    "4.4 Berkeley Distribution",
404 	};
405 
406 	const char	*p, *s;
407 
408 	n = n->child;
409 
410 	if (n == NULL || n->type != ROFFT_TEXT)
411 		p = bsd_versions[0];
412 	else {
413 		s = n->string;
414 		if (0 == strcmp(s, "3"))
415 			p = bsd_versions[0];
416 		else if (0 == strcmp(s, "4"))
417 			p = bsd_versions[1];
418 		else if (0 == strcmp(s, "5"))
419 			p = bsd_versions[2];
420 		else if (0 == strcmp(s, "6"))
421 			p = bsd_versions[3];
422 		else if (0 == strcmp(s, "7"))
423 			p = bsd_versions[4];
424 		else
425 			p = bsd_versions[0];
426 	}
427 
428 	free(man->meta.os);
429 	man->meta.os = mandoc_strdup(p);
430 }
431 
432 static void
433 post_AT(CHKARGS)
434 {
435 	static const char * const unix_versions[] = {
436 	    "7th Edition",
437 	    "System III",
438 	    "System V",
439 	    "System V Release 2",
440 	};
441 
442 	struct roff_node *nn;
443 	const char	*p, *s;
444 
445 	n = n->child;
446 
447 	if (n == NULL || n->type != ROFFT_TEXT)
448 		p = unix_versions[0];
449 	else {
450 		s = n->string;
451 		if (0 == strcmp(s, "3"))
452 			p = unix_versions[0];
453 		else if (0 == strcmp(s, "4"))
454 			p = unix_versions[1];
455 		else if (0 == strcmp(s, "5")) {
456 			nn = n->next;
457 			if (nn != NULL &&
458 			    nn->type == ROFFT_TEXT &&
459 			    nn->string[0] != '\0')
460 				p = unix_versions[3];
461 			else
462 				p = unix_versions[2];
463 		} else
464 			p = unix_versions[0];
465 	}
466 
467 	free(man->meta.os);
468 	man->meta.os = mandoc_strdup(p);
469 }
470 
471 static void
472 post_vs(CHKARGS)
473 {
474 
475 	if (NULL != n->prev)
476 		return;
477 
478 	switch (n->parent->tok) {
479 	case MAN_SH:
480 	case MAN_SS:
481 		mandoc_vmsg(MANDOCERR_PAR_SKIP, man->parse, n->line, n->pos,
482 		    "%s after %s", man_macronames[n->tok],
483 		    man_macronames[n->parent->tok]);
484 		/* FALLTHROUGH */
485 	case TOKEN_NONE:
486 		/*
487 		 * Don't warn about this because it occurs in pod2man
488 		 * and would cause considerable (unfixable) warnage.
489 		 */
490 		roff_node_delete(man, n);
491 		break;
492 	default:
493 		break;
494 	}
495 }
496