1 /* $Id: man_validate.c,v 1.149 2019/06/27 15:07:30 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2018 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <errno.h> 25 #include <limits.h> 26 #include <stdarg.h> 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <string.h> 30 #include <time.h> 31 32 #include "mandoc_aux.h" 33 #include "mandoc.h" 34 #include "roff.h" 35 #include "man.h" 36 #include "libmandoc.h" 37 #include "roff_int.h" 38 #include "libman.h" 39 40 #define CHKARGS struct roff_man *man, struct roff_node *n 41 42 typedef void (*v_check)(CHKARGS); 43 44 static void check_abort(CHKARGS) __attribute__((__noreturn__)); 45 static void check_par(CHKARGS); 46 static void check_part(CHKARGS); 47 static void check_root(CHKARGS); 48 static void check_text(CHKARGS); 49 50 static void post_AT(CHKARGS); 51 static void post_EE(CHKARGS); 52 static void post_EX(CHKARGS); 53 static void post_IP(CHKARGS); 54 static void post_OP(CHKARGS); 55 static void post_SH(CHKARGS); 56 static void post_TH(CHKARGS); 57 static void post_UC(CHKARGS); 58 static void post_UR(CHKARGS); 59 static void post_in(CHKARGS); 60 61 static const v_check man_valids[MAN_MAX - MAN_TH] = { 62 post_TH, /* TH */ 63 post_SH, /* SH */ 64 post_SH, /* SS */ 65 NULL, /* TP */ 66 NULL, /* TQ */ 67 check_abort,/* LP */ 68 check_par, /* PP */ 69 check_abort,/* P */ 70 post_IP, /* IP */ 71 NULL, /* HP */ 72 NULL, /* SM */ 73 NULL, /* SB */ 74 NULL, /* BI */ 75 NULL, /* IB */ 76 NULL, /* BR */ 77 NULL, /* RB */ 78 NULL, /* R */ 79 NULL, /* B */ 80 NULL, /* I */ 81 NULL, /* IR */ 82 NULL, /* RI */ 83 NULL, /* RE */ 84 check_part, /* RS */ 85 NULL, /* DT */ 86 post_UC, /* UC */ 87 NULL, /* PD */ 88 post_AT, /* AT */ 89 post_in, /* in */ 90 NULL, /* SY */ 91 NULL, /* YS */ 92 post_OP, /* OP */ 93 post_EX, /* EX */ 94 post_EE, /* EE */ 95 post_UR, /* UR */ 96 NULL, /* UE */ 97 post_UR, /* MT */ 98 NULL, /* ME */ 99 }; 100 101 102 /* Validate the subtree rooted at man->last. */ 103 void 104 man_validate(struct roff_man *man) 105 { 106 struct roff_node *n; 107 const v_check *cp; 108 109 /* 110 * Translate obsolete macros such that later code 111 * does not need to look for them. 112 */ 113 114 n = man->last; 115 switch (n->tok) { 116 case MAN_LP: 117 case MAN_P: 118 n->tok = MAN_PP; 119 break; 120 default: 121 break; 122 } 123 124 /* 125 * Iterate over all children, recursing into each one 126 * in turn, depth-first. 127 */ 128 129 man->last = man->last->child; 130 while (man->last != NULL) { 131 man_validate(man); 132 if (man->last == n) 133 man->last = man->last->child; 134 else 135 man->last = man->last->next; 136 } 137 138 /* Finally validate the macro itself. */ 139 140 man->last = n; 141 man->next = ROFF_NEXT_SIBLING; 142 switch (n->type) { 143 case ROFFT_TEXT: 144 check_text(man, n); 145 break; 146 case ROFFT_ROOT: 147 check_root(man, n); 148 break; 149 case ROFFT_COMMENT: 150 case ROFFT_EQN: 151 case ROFFT_TBL: 152 break; 153 default: 154 if (n->tok < ROFF_MAX) { 155 roff_validate(man); 156 break; 157 } 158 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 159 cp = man_valids + (n->tok - MAN_TH); 160 if (*cp) 161 (*cp)(man, n); 162 if (man->last == n) 163 n->flags |= NODE_VALID; 164 break; 165 } 166 } 167 168 static void 169 check_root(CHKARGS) 170 { 171 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 172 173 if (n->last == NULL || n->last->type == ROFFT_COMMENT) 174 mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); 175 else 176 man->meta.hasbody = 1; 177 178 if (NULL == man->meta.title) { 179 mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); 180 181 /* 182 * If a title hasn't been set, do so now (by 183 * implication, date and section also aren't set). 184 */ 185 186 man->meta.title = mandoc_strdup(""); 187 man->meta.msec = mandoc_strdup(""); 188 man->meta.date = mandoc_normdate(man, NULL, n->line, n->pos); 189 } 190 191 if (man->meta.os_e && 192 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 193 mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, 194 man->meta.os_e == MANDOC_OS_OPENBSD ? 195 "(OpenBSD)" : "(NetBSD)"); 196 } 197 198 static void 199 check_abort(CHKARGS) 200 { 201 abort(); 202 } 203 204 static void 205 check_text(CHKARGS) 206 { 207 char *cp, *p; 208 209 if (n->flags & NODE_NOFILL) 210 return; 211 212 cp = n->string; 213 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 214 mandoc_msg(MANDOCERR_FI_TAB, 215 n->line, n->pos + (int)(p - cp), NULL); 216 } 217 218 static void 219 post_EE(CHKARGS) 220 { 221 if ((n->flags & NODE_NOFILL) == 0) 222 mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); 223 } 224 225 static void 226 post_EX(CHKARGS) 227 { 228 if (n->flags & NODE_NOFILL) 229 mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); 230 } 231 232 static void 233 post_OP(CHKARGS) 234 { 235 236 if (n->child == NULL) 237 mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); 238 else if (n->child->next != NULL && n->child->next->next != NULL) { 239 n = n->child->next->next; 240 mandoc_msg(MANDOCERR_ARG_EXCESS, 241 n->line, n->pos, "OP ... %s", n->string); 242 } 243 } 244 245 static void 246 post_SH(CHKARGS) 247 { 248 struct roff_node *nc; 249 250 if (n->type != ROFFT_BODY || (nc = n->child) == NULL) 251 return; 252 253 if (nc->tok == MAN_PP && nc->body->child != NULL) { 254 while (nc->body->last != NULL) { 255 man->next = ROFF_NEXT_CHILD; 256 roff_node_relink(man, nc->body->last); 257 man->last = n; 258 } 259 } 260 261 if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { 262 mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, 263 "%s after %s", roff_name[nc->tok], roff_name[n->tok]); 264 roff_node_delete(man, nc); 265 } 266 267 /* 268 * Trailing PP is empty, so it is deleted by check_par(). 269 * Trailing sp is significant. 270 */ 271 272 if ((nc = n->last) != NULL && nc->tok == ROFF_br) { 273 mandoc_msg(MANDOCERR_PAR_SKIP, 274 nc->line, nc->pos, "%s at the end of %s", 275 roff_name[nc->tok], roff_name[n->tok]); 276 roff_node_delete(man, nc); 277 } 278 } 279 280 static void 281 post_UR(CHKARGS) 282 { 283 if (n->type == ROFFT_HEAD && n->child == NULL) 284 mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, 285 "%s", roff_name[n->tok]); 286 check_part(man, n); 287 } 288 289 static void 290 check_part(CHKARGS) 291 { 292 293 if (n->type == ROFFT_BODY && n->child == NULL) 294 mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, 295 "%s", roff_name[n->tok]); 296 } 297 298 static void 299 check_par(CHKARGS) 300 { 301 302 switch (n->type) { 303 case ROFFT_BLOCK: 304 if (n->body->child == NULL) 305 roff_node_delete(man, n); 306 break; 307 case ROFFT_BODY: 308 if (n->child != NULL && 309 (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { 310 mandoc_msg(MANDOCERR_PAR_SKIP, 311 n->child->line, n->child->pos, 312 "%s after %s", roff_name[n->child->tok], 313 roff_name[n->tok]); 314 roff_node_delete(man, n->child); 315 } 316 if (n->child == NULL) 317 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 318 "%s empty", roff_name[n->tok]); 319 break; 320 case ROFFT_HEAD: 321 if (n->child != NULL) 322 mandoc_msg(MANDOCERR_ARG_SKIP, 323 n->line, n->pos, "%s %s%s", 324 roff_name[n->tok], n->child->string, 325 n->child->next != NULL ? " ..." : ""); 326 break; 327 default: 328 break; 329 } 330 } 331 332 static void 333 post_IP(CHKARGS) 334 { 335 336 switch (n->type) { 337 case ROFFT_BLOCK: 338 if (n->head->child == NULL && n->body->child == NULL) 339 roff_node_delete(man, n); 340 break; 341 case ROFFT_BODY: 342 if (n->parent->head->child == NULL && n->child == NULL) 343 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 344 "%s empty", roff_name[n->tok]); 345 break; 346 default: 347 break; 348 } 349 } 350 351 static void 352 post_TH(CHKARGS) 353 { 354 struct roff_node *nb; 355 const char *p; 356 357 free(man->meta.title); 358 free(man->meta.vol); 359 free(man->meta.os); 360 free(man->meta.msec); 361 free(man->meta.date); 362 363 man->meta.title = man->meta.vol = man->meta.date = 364 man->meta.msec = man->meta.os = NULL; 365 366 nb = n; 367 368 /* ->TITLE<- MSEC DATE OS VOL */ 369 370 n = n->child; 371 if (n != NULL && n->string != NULL) { 372 for (p = n->string; *p != '\0'; p++) { 373 /* Only warn about this once... */ 374 if (isalpha((unsigned char)*p) && 375 ! isupper((unsigned char)*p)) { 376 mandoc_msg(MANDOCERR_TITLE_CASE, n->line, 377 n->pos + (int)(p - n->string), 378 "TH %s", n->string); 379 break; 380 } 381 } 382 man->meta.title = mandoc_strdup(n->string); 383 } else { 384 man->meta.title = mandoc_strdup(""); 385 mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); 386 } 387 388 /* TITLE ->MSEC<- DATE OS VOL */ 389 390 if (n != NULL) 391 n = n->next; 392 if (n != NULL && n->string != NULL) 393 man->meta.msec = mandoc_strdup(n->string); 394 else { 395 man->meta.msec = mandoc_strdup(""); 396 mandoc_msg(MANDOCERR_MSEC_MISSING, 397 nb->line, nb->pos, "TH %s", man->meta.title); 398 } 399 400 /* TITLE MSEC ->DATE<- OS VOL */ 401 402 if (n != NULL) 403 n = n->next; 404 if (n != NULL && n->string != NULL && n->string[0] != '\0') 405 man->meta.date = mandoc_normdate(man, 406 n->string, n->line, n->pos); 407 else { 408 man->meta.date = mandoc_strdup(""); 409 mandoc_msg(MANDOCERR_DATE_MISSING, 410 n == NULL ? nb->line : n->line, 411 n == NULL ? nb->pos : n->pos, "TH"); 412 } 413 414 /* TITLE MSEC DATE ->OS<- VOL */ 415 416 if (n && (n = n->next)) 417 man->meta.os = mandoc_strdup(n->string); 418 else if (man->os_s != NULL) 419 man->meta.os = mandoc_strdup(man->os_s); 420 if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { 421 if (strstr(man->meta.os, "OpenBSD") != NULL) 422 man->meta.os_e = MANDOC_OS_OPENBSD; 423 else if (strstr(man->meta.os, "NetBSD") != NULL) 424 man->meta.os_e = MANDOC_OS_NETBSD; 425 } 426 427 /* TITLE MSEC DATE OS ->VOL<- */ 428 /* If missing, use the default VOL name for MSEC. */ 429 430 if (n && (n = n->next)) 431 man->meta.vol = mandoc_strdup(n->string); 432 else if ('\0' != man->meta.msec[0] && 433 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 434 man->meta.vol = mandoc_strdup(p); 435 436 if (n != NULL && (n = n->next) != NULL) 437 mandoc_msg(MANDOCERR_ARG_EXCESS, 438 n->line, n->pos, "TH ... %s", n->string); 439 440 /* 441 * Remove the `TH' node after we've processed it for our 442 * meta-data. 443 */ 444 roff_node_delete(man, man->last); 445 } 446 447 static void 448 post_UC(CHKARGS) 449 { 450 static const char * const bsd_versions[] = { 451 "3rd Berkeley Distribution", 452 "4th Berkeley Distribution", 453 "4.2 Berkeley Distribution", 454 "4.3 Berkeley Distribution", 455 "4.4 Berkeley Distribution", 456 }; 457 458 const char *p, *s; 459 460 n = n->child; 461 462 if (n == NULL || n->type != ROFFT_TEXT) 463 p = bsd_versions[0]; 464 else { 465 s = n->string; 466 if (0 == strcmp(s, "3")) 467 p = bsd_versions[0]; 468 else if (0 == strcmp(s, "4")) 469 p = bsd_versions[1]; 470 else if (0 == strcmp(s, "5")) 471 p = bsd_versions[2]; 472 else if (0 == strcmp(s, "6")) 473 p = bsd_versions[3]; 474 else if (0 == strcmp(s, "7")) 475 p = bsd_versions[4]; 476 else 477 p = bsd_versions[0]; 478 } 479 480 free(man->meta.os); 481 man->meta.os = mandoc_strdup(p); 482 } 483 484 static void 485 post_AT(CHKARGS) 486 { 487 static const char * const unix_versions[] = { 488 "7th Edition", 489 "System III", 490 "System V", 491 "System V Release 2", 492 }; 493 494 struct roff_node *nn; 495 const char *p, *s; 496 497 n = n->child; 498 499 if (n == NULL || n->type != ROFFT_TEXT) 500 p = unix_versions[0]; 501 else { 502 s = n->string; 503 if (0 == strcmp(s, "3")) 504 p = unix_versions[0]; 505 else if (0 == strcmp(s, "4")) 506 p = unix_versions[1]; 507 else if (0 == strcmp(s, "5")) { 508 nn = n->next; 509 if (nn != NULL && 510 nn->type == ROFFT_TEXT && 511 nn->string[0] != '\0') 512 p = unix_versions[3]; 513 else 514 p = unix_versions[2]; 515 } else 516 p = unix_versions[0]; 517 } 518 519 free(man->meta.os); 520 man->meta.os = mandoc_strdup(p); 521 } 522 523 static void 524 post_in(CHKARGS) 525 { 526 char *s; 527 528 if (n->parent->tok != MAN_TP || 529 n->parent->type != ROFFT_HEAD || 530 n->child == NULL || 531 *n->child->string == '+' || 532 *n->child->string == '-') 533 return; 534 mandoc_asprintf(&s, "+%s", n->child->string); 535 free(n->child->string); 536 n->child->string = s; 537 } 538