1 /* $Id: man_validate.c,v 1.156 2021/08/10 12:55:03 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2010, 2012-2020 Ingo Schwarze <schwarze@openbsd.org> 4 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Validation module for man(7) syntax trees used by mandoc(1). 19 */ 20 #include "config.h" 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <errno.h> 27 #include <limits.h> 28 #include <stdarg.h> 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include <time.h> 33 34 #include "mandoc_aux.h" 35 #include "mandoc.h" 36 #include "roff.h" 37 #include "man.h" 38 #include "libmandoc.h" 39 #include "roff_int.h" 40 #include "libman.h" 41 #include "tag.h" 42 43 #define CHKARGS struct roff_man *man, struct roff_node *n 44 45 typedef void (*v_check)(CHKARGS); 46 47 static void check_abort(CHKARGS) __attribute__((__noreturn__)); 48 static void check_par(CHKARGS); 49 static void check_part(CHKARGS); 50 static void check_root(CHKARGS); 51 static void check_tag(struct roff_node *, struct roff_node *); 52 static void check_text(CHKARGS); 53 54 static void post_AT(CHKARGS); 55 static void post_EE(CHKARGS); 56 static void post_EX(CHKARGS); 57 static void post_IP(CHKARGS); 58 static void post_OP(CHKARGS); 59 static void post_SH(CHKARGS); 60 static void post_TH(CHKARGS); 61 static void post_TP(CHKARGS); 62 static void post_UC(CHKARGS); 63 static void post_UR(CHKARGS); 64 static void post_in(CHKARGS); 65 66 static const v_check man_valids[MAN_MAX - MAN_TH] = { 67 post_TH, /* TH */ 68 post_SH, /* SH */ 69 post_SH, /* SS */ 70 post_TP, /* TP */ 71 post_TP, /* TQ */ 72 check_abort,/* LP */ 73 check_par, /* PP */ 74 check_abort,/* P */ 75 post_IP, /* IP */ 76 NULL, /* HP */ 77 NULL, /* SM */ 78 NULL, /* SB */ 79 NULL, /* BI */ 80 NULL, /* IB */ 81 NULL, /* BR */ 82 NULL, /* RB */ 83 NULL, /* R */ 84 NULL, /* B */ 85 NULL, /* I */ 86 NULL, /* IR */ 87 NULL, /* RI */ 88 NULL, /* RE */ 89 check_part, /* RS */ 90 NULL, /* DT */ 91 post_UC, /* UC */ 92 NULL, /* PD */ 93 post_AT, /* AT */ 94 post_in, /* in */ 95 NULL, /* SY */ 96 NULL, /* YS */ 97 post_OP, /* OP */ 98 post_EX, /* EX */ 99 post_EE, /* EE */ 100 post_UR, /* UR */ 101 NULL, /* UE */ 102 post_UR, /* MT */ 103 NULL, /* ME */ 104 }; 105 106 107 /* Validate the subtree rooted at man->last. */ 108 void 109 man_validate(struct roff_man *man) 110 { 111 struct roff_node *n; 112 const v_check *cp; 113 114 /* 115 * Translate obsolete macros such that later code 116 * does not need to look for them. 117 */ 118 119 n = man->last; 120 switch (n->tok) { 121 case MAN_LP: 122 case MAN_P: 123 n->tok = MAN_PP; 124 break; 125 default: 126 break; 127 } 128 129 /* 130 * Iterate over all children, recursing into each one 131 * in turn, depth-first. 132 */ 133 134 man->last = man->last->child; 135 while (man->last != NULL) { 136 man_validate(man); 137 if (man->last == n) 138 man->last = man->last->child; 139 else 140 man->last = man->last->next; 141 } 142 143 /* Finally validate the macro itself. */ 144 145 man->last = n; 146 man->next = ROFF_NEXT_SIBLING; 147 switch (n->type) { 148 case ROFFT_TEXT: 149 check_text(man, n); 150 break; 151 case ROFFT_ROOT: 152 check_root(man, n); 153 break; 154 case ROFFT_COMMENT: 155 case ROFFT_EQN: 156 case ROFFT_TBL: 157 break; 158 default: 159 if (n->tok < ROFF_MAX) { 160 roff_validate(man); 161 break; 162 } 163 assert(n->tok >= MAN_TH && n->tok < MAN_MAX); 164 cp = man_valids + (n->tok - MAN_TH); 165 if (*cp) 166 (*cp)(man, n); 167 if (man->last == n) 168 n->flags |= NODE_VALID; 169 break; 170 } 171 } 172 173 static void 174 check_root(CHKARGS) 175 { 176 assert((man->flags & (MAN_BLINE | MAN_ELINE)) == 0); 177 178 if (n->last == NULL || n->last->type == ROFFT_COMMENT) 179 mandoc_msg(MANDOCERR_DOC_EMPTY, n->line, n->pos, NULL); 180 else 181 man->meta.hasbody = 1; 182 183 if (NULL == man->meta.title) { 184 mandoc_msg(MANDOCERR_TH_NOTITLE, n->line, n->pos, NULL); 185 186 /* 187 * If a title hasn't been set, do so now (by 188 * implication, date and section also aren't set). 189 */ 190 191 man->meta.title = mandoc_strdup(""); 192 man->meta.msec = mandoc_strdup(""); 193 man->meta.date = mandoc_normdate(NULL, NULL); 194 } 195 196 if (man->meta.os_e && 197 (man->meta.rcsids & (1 << man->meta.os_e)) == 0) 198 mandoc_msg(MANDOCERR_RCS_MISSING, 0, 0, 199 man->meta.os_e == MANDOC_OS_OPENBSD ? 200 "(OpenBSD)" : "(NetBSD)"); 201 } 202 203 static void 204 check_abort(CHKARGS) 205 { 206 abort(); 207 } 208 209 /* 210 * Skip leading whitespace, dashes, backslashes, and font escapes, 211 * then create a tag if the first following byte is a letter. 212 * Priority is high unless whitespace is present. 213 */ 214 static void 215 check_tag(struct roff_node *n, struct roff_node *nt) 216 { 217 const char *cp, *arg; 218 int prio, sz; 219 220 if (nt == NULL || nt->type != ROFFT_TEXT) 221 return; 222 223 cp = nt->string; 224 prio = TAG_STRONG; 225 for (;;) { 226 switch (*cp) { 227 case ' ': 228 case '\t': 229 prio = TAG_WEAK; 230 /* FALLTHROUGH */ 231 case '-': 232 cp++; 233 break; 234 case '\\': 235 cp++; 236 switch (mandoc_escape(&cp, &arg, &sz)) { 237 case ESCAPE_FONT: 238 case ESCAPE_FONTBOLD: 239 case ESCAPE_FONTITALIC: 240 case ESCAPE_FONTBI: 241 case ESCAPE_FONTROMAN: 242 case ESCAPE_FONTCR: 243 case ESCAPE_FONTCB: 244 case ESCAPE_FONTCI: 245 case ESCAPE_FONTPREV: 246 case ESCAPE_IGNORE: 247 break; 248 case ESCAPE_SPECIAL: 249 if (sz != 1) 250 return; 251 switch (*arg) { 252 case '-': 253 case 'e': 254 break; 255 default: 256 return; 257 } 258 break; 259 default: 260 return; 261 } 262 break; 263 default: 264 if (isalpha((unsigned char)*cp)) 265 tag_put(cp, prio, n); 266 return; 267 } 268 } 269 } 270 271 static void 272 check_text(CHKARGS) 273 { 274 char *cp, *p; 275 276 if (n->flags & NODE_NOFILL) 277 return; 278 279 cp = n->string; 280 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 281 mandoc_msg(MANDOCERR_FI_TAB, 282 n->line, n->pos + (int)(p - cp), NULL); 283 } 284 285 static void 286 post_EE(CHKARGS) 287 { 288 if ((n->flags & NODE_NOFILL) == 0) 289 mandoc_msg(MANDOCERR_FI_SKIP, n->line, n->pos, "EE"); 290 } 291 292 static void 293 post_EX(CHKARGS) 294 { 295 if (n->flags & NODE_NOFILL) 296 mandoc_msg(MANDOCERR_NF_SKIP, n->line, n->pos, "EX"); 297 } 298 299 static void 300 post_OP(CHKARGS) 301 { 302 303 if (n->child == NULL) 304 mandoc_msg(MANDOCERR_OP_EMPTY, n->line, n->pos, "OP"); 305 else if (n->child->next != NULL && n->child->next->next != NULL) { 306 n = n->child->next->next; 307 mandoc_msg(MANDOCERR_ARG_EXCESS, 308 n->line, n->pos, "OP ... %s", n->string); 309 } 310 } 311 312 static void 313 post_SH(CHKARGS) 314 { 315 struct roff_node *nc; 316 char *cp, *tag; 317 318 nc = n->child; 319 switch (n->type) { 320 case ROFFT_HEAD: 321 tag = NULL; 322 deroff(&tag, n); 323 if (tag != NULL) { 324 for (cp = tag; *cp != '\0'; cp++) 325 if (*cp == ' ') 326 *cp = '_'; 327 if (nc != NULL && nc->type == ROFFT_TEXT && 328 strcmp(nc->string, tag) == 0) 329 tag_put(NULL, TAG_STRONG, n); 330 else 331 tag_put(tag, TAG_FALLBACK, n); 332 free(tag); 333 } 334 return; 335 case ROFFT_BODY: 336 if (nc != NULL) 337 break; 338 return; 339 default: 340 return; 341 } 342 343 if (nc->tok == MAN_PP && nc->body->child != NULL) { 344 while (nc->body->last != NULL) { 345 man->next = ROFF_NEXT_CHILD; 346 roff_node_relink(man, nc->body->last); 347 man->last = n; 348 } 349 } 350 351 if (nc->tok == MAN_PP || nc->tok == ROFF_sp || nc->tok == ROFF_br) { 352 mandoc_msg(MANDOCERR_PAR_SKIP, nc->line, nc->pos, 353 "%s after %s", roff_name[nc->tok], roff_name[n->tok]); 354 roff_node_delete(man, nc); 355 } 356 357 /* 358 * Trailing PP is empty, so it is deleted by check_par(). 359 * Trailing sp is significant. 360 */ 361 362 if ((nc = n->last) != NULL && nc->tok == ROFF_br) { 363 mandoc_msg(MANDOCERR_PAR_SKIP, 364 nc->line, nc->pos, "%s at the end of %s", 365 roff_name[nc->tok], roff_name[n->tok]); 366 roff_node_delete(man, nc); 367 } 368 } 369 370 static void 371 post_UR(CHKARGS) 372 { 373 if (n->type == ROFFT_HEAD && n->child == NULL) 374 mandoc_msg(MANDOCERR_UR_NOHEAD, n->line, n->pos, 375 "%s", roff_name[n->tok]); 376 check_part(man, n); 377 } 378 379 static void 380 check_part(CHKARGS) 381 { 382 383 if (n->type == ROFFT_BODY && n->child == NULL) 384 mandoc_msg(MANDOCERR_BLK_EMPTY, n->line, n->pos, 385 "%s", roff_name[n->tok]); 386 } 387 388 static void 389 check_par(CHKARGS) 390 { 391 392 switch (n->type) { 393 case ROFFT_BLOCK: 394 if (n->body->child == NULL) 395 roff_node_delete(man, n); 396 break; 397 case ROFFT_BODY: 398 if (n->child != NULL && 399 (n->child->tok == ROFF_sp || n->child->tok == ROFF_br)) { 400 mandoc_msg(MANDOCERR_PAR_SKIP, 401 n->child->line, n->child->pos, 402 "%s after %s", roff_name[n->child->tok], 403 roff_name[n->tok]); 404 roff_node_delete(man, n->child); 405 } 406 if (n->child == NULL) 407 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 408 "%s empty", roff_name[n->tok]); 409 break; 410 case ROFFT_HEAD: 411 if (n->child != NULL) 412 mandoc_msg(MANDOCERR_ARG_SKIP, 413 n->line, n->pos, "%s %s%s", 414 roff_name[n->tok], n->child->string, 415 n->child->next != NULL ? " ..." : ""); 416 break; 417 default: 418 break; 419 } 420 } 421 422 static void 423 post_IP(CHKARGS) 424 { 425 switch (n->type) { 426 case ROFFT_BLOCK: 427 if (n->head->child == NULL && n->body->child == NULL) 428 roff_node_delete(man, n); 429 break; 430 case ROFFT_HEAD: 431 check_tag(n, n->child); 432 break; 433 case ROFFT_BODY: 434 if (n->parent->head->child == NULL && n->child == NULL) 435 mandoc_msg(MANDOCERR_PAR_SKIP, n->line, n->pos, 436 "%s empty", roff_name[n->tok]); 437 break; 438 default: 439 break; 440 } 441 } 442 443 /* 444 * The first next-line element in the head is the tag. 445 * If that's a font macro, use its first child instead. 446 */ 447 static void 448 post_TP(CHKARGS) 449 { 450 struct roff_node *nt; 451 452 if (n->type != ROFFT_HEAD || (nt = n->child) == NULL) 453 return; 454 455 while ((nt->flags & NODE_LINE) == 0) 456 if ((nt = nt->next) == NULL) 457 return; 458 459 switch (nt->tok) { 460 case MAN_B: 461 case MAN_BI: 462 case MAN_BR: 463 case MAN_I: 464 case MAN_IB: 465 case MAN_IR: 466 nt = nt->child; 467 break; 468 default: 469 break; 470 } 471 check_tag(n, nt); 472 } 473 474 static void 475 post_TH(CHKARGS) 476 { 477 struct roff_node *nb; 478 const char *p; 479 480 free(man->meta.title); 481 free(man->meta.vol); 482 free(man->meta.os); 483 free(man->meta.msec); 484 free(man->meta.date); 485 486 man->meta.title = man->meta.vol = man->meta.date = 487 man->meta.msec = man->meta.os = NULL; 488 489 nb = n; 490 491 /* ->TITLE<- MSEC DATE OS VOL */ 492 493 n = n->child; 494 if (n != NULL && n->string != NULL) { 495 for (p = n->string; *p != '\0'; p++) { 496 /* Only warn about this once... */ 497 if (isalpha((unsigned char)*p) && 498 ! isupper((unsigned char)*p)) { 499 mandoc_msg(MANDOCERR_TITLE_CASE, n->line, 500 n->pos + (int)(p - n->string), 501 "TH %s", n->string); 502 break; 503 } 504 } 505 man->meta.title = mandoc_strdup(n->string); 506 } else { 507 man->meta.title = mandoc_strdup(""); 508 mandoc_msg(MANDOCERR_TH_NOTITLE, nb->line, nb->pos, "TH"); 509 } 510 511 /* TITLE ->MSEC<- DATE OS VOL */ 512 513 if (n != NULL) 514 n = n->next; 515 if (n != NULL && n->string != NULL) { 516 man->meta.msec = mandoc_strdup(n->string); 517 if (man->filesec != '\0' && 518 man->filesec != *n->string && 519 *n->string >= '1' && *n->string <= '9') 520 mandoc_msg(MANDOCERR_MSEC_FILE, n->line, n->pos, 521 "*.%c vs TH ... %c", man->filesec, *n->string); 522 } else { 523 man->meta.msec = mandoc_strdup(""); 524 mandoc_msg(MANDOCERR_MSEC_MISSING, 525 nb->line, nb->pos, "TH %s", man->meta.title); 526 } 527 528 /* TITLE MSEC ->DATE<- OS VOL */ 529 530 if (n != NULL) 531 n = n->next; 532 if (man->quick && n != NULL) 533 man->meta.date = mandoc_strdup(""); 534 else 535 man->meta.date = mandoc_normdate(n, nb); 536 537 /* TITLE MSEC DATE ->OS<- VOL */ 538 539 if (n && (n = n->next)) 540 man->meta.os = mandoc_strdup(n->string); 541 else if (man->os_s != NULL) 542 man->meta.os = mandoc_strdup(man->os_s); 543 if (man->meta.os_e == MANDOC_OS_OTHER && man->meta.os != NULL) { 544 if (strstr(man->meta.os, "OpenBSD") != NULL) 545 man->meta.os_e = MANDOC_OS_OPENBSD; 546 else if (strstr(man->meta.os, "NetBSD") != NULL) 547 man->meta.os_e = MANDOC_OS_NETBSD; 548 } 549 550 /* TITLE MSEC DATE OS ->VOL<- */ 551 /* If missing, use the default VOL name for MSEC. */ 552 553 if (n && (n = n->next)) 554 man->meta.vol = mandoc_strdup(n->string); 555 else if ('\0' != man->meta.msec[0] && 556 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 557 man->meta.vol = mandoc_strdup(p); 558 559 if (n != NULL && (n = n->next) != NULL) 560 mandoc_msg(MANDOCERR_ARG_EXCESS, 561 n->line, n->pos, "TH ... %s", n->string); 562 563 /* 564 * Remove the `TH' node after we've processed it for our 565 * meta-data. 566 */ 567 roff_node_delete(man, man->last); 568 } 569 570 static void 571 post_UC(CHKARGS) 572 { 573 static const char * const bsd_versions[] = { 574 "3rd Berkeley Distribution", 575 "4th Berkeley Distribution", 576 "4.2 Berkeley Distribution", 577 "4.3 Berkeley Distribution", 578 "4.4 Berkeley Distribution", 579 }; 580 581 const char *p, *s; 582 583 n = n->child; 584 585 if (n == NULL || n->type != ROFFT_TEXT) 586 p = bsd_versions[0]; 587 else { 588 s = n->string; 589 if (0 == strcmp(s, "3")) 590 p = bsd_versions[0]; 591 else if (0 == strcmp(s, "4")) 592 p = bsd_versions[1]; 593 else if (0 == strcmp(s, "5")) 594 p = bsd_versions[2]; 595 else if (0 == strcmp(s, "6")) 596 p = bsd_versions[3]; 597 else if (0 == strcmp(s, "7")) 598 p = bsd_versions[4]; 599 else 600 p = bsd_versions[0]; 601 } 602 603 free(man->meta.os); 604 man->meta.os = mandoc_strdup(p); 605 } 606 607 static void 608 post_AT(CHKARGS) 609 { 610 static const char * const unix_versions[] = { 611 "7th Edition", 612 "System III", 613 "System V", 614 "System V Release 2", 615 }; 616 617 struct roff_node *nn; 618 const char *p, *s; 619 620 n = n->child; 621 622 if (n == NULL || n->type != ROFFT_TEXT) 623 p = unix_versions[0]; 624 else { 625 s = n->string; 626 if (0 == strcmp(s, "3")) 627 p = unix_versions[0]; 628 else if (0 == strcmp(s, "4")) 629 p = unix_versions[1]; 630 else if (0 == strcmp(s, "5")) { 631 nn = n->next; 632 if (nn != NULL && 633 nn->type == ROFFT_TEXT && 634 nn->string[0] != '\0') 635 p = unix_versions[3]; 636 else 637 p = unix_versions[2]; 638 } else 639 p = unix_versions[0]; 640 } 641 642 free(man->meta.os); 643 man->meta.os = mandoc_strdup(p); 644 } 645 646 static void 647 post_in(CHKARGS) 648 { 649 char *s; 650 651 if (n->parent->tok != MAN_TP || 652 n->parent->type != ROFFT_HEAD || 653 n->child == NULL || 654 *n->child->string == '+' || 655 *n->child->string == '-') 656 return; 657 mandoc_asprintf(&s, "+%s", n->child->string); 658 free(n->child->string); 659 n->child->string = s; 660 } 661