1 /* $Id: man_validate.c,v 1.86 2013/10/17 20:54:58 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #ifdef HAVE_CONFIG_H 19 #include "config.h" 20 #endif 21 22 #include <sys/types.h> 23 24 #include <assert.h> 25 #include <ctype.h> 26 #include <errno.h> 27 #include <limits.h> 28 #include <stdarg.h> 29 #include <stdlib.h> 30 #include <string.h> 31 #include <time.h> 32 33 #include "man.h" 34 #include "mandoc.h" 35 #include "libman.h" 36 #include "libmandoc.h" 37 38 #define CHKARGS struct man *man, struct man_node *n 39 40 typedef int (*v_check)(CHKARGS); 41 42 struct man_valid { 43 v_check *pres; 44 v_check *posts; 45 }; 46 47 static int check_eq0(CHKARGS); 48 static int check_eq2(CHKARGS); 49 static int check_le1(CHKARGS); 50 static int check_ge2(CHKARGS); 51 static int check_le5(CHKARGS); 52 static int check_head1(CHKARGS); 53 static int check_par(CHKARGS); 54 static int check_part(CHKARGS); 55 static int check_root(CHKARGS); 56 static void check_text(CHKARGS); 57 58 static int post_AT(CHKARGS); 59 static int post_IP(CHKARGS); 60 static int post_vs(CHKARGS); 61 static int post_fi(CHKARGS); 62 static int post_ft(CHKARGS); 63 static int post_nf(CHKARGS); 64 static int post_sec(CHKARGS); 65 static int post_TH(CHKARGS); 66 static int post_UC(CHKARGS); 67 static int pre_sec(CHKARGS); 68 69 static v_check posts_at[] = { post_AT, NULL }; 70 static v_check posts_br[] = { post_vs, check_eq0, NULL }; 71 static v_check posts_eq0[] = { check_eq0, NULL }; 72 static v_check posts_eq2[] = { check_eq2, NULL }; 73 static v_check posts_fi[] = { check_eq0, post_fi, NULL }; 74 static v_check posts_ft[] = { post_ft, NULL }; 75 static v_check posts_ip[] = { post_IP, NULL }; 76 static v_check posts_le1[] = { check_le1, NULL }; 77 static v_check posts_nf[] = { check_eq0, post_nf, NULL }; 78 static v_check posts_par[] = { check_par, NULL }; 79 static v_check posts_part[] = { check_part, NULL }; 80 static v_check posts_sec[] = { post_sec, NULL }; 81 static v_check posts_sp[] = { post_vs, check_le1, NULL }; 82 static v_check posts_th[] = { check_ge2, check_le5, post_TH, NULL }; 83 static v_check posts_uc[] = { post_UC, NULL }; 84 static v_check posts_ur[] = { check_head1, check_part, NULL }; 85 static v_check pres_sec[] = { pre_sec, NULL }; 86 87 static const struct man_valid man_valids[MAN_MAX] = { 88 { NULL, posts_br }, /* br */ 89 { NULL, posts_th }, /* TH */ 90 { pres_sec, posts_sec }, /* SH */ 91 { pres_sec, posts_sec }, /* SS */ 92 { NULL, NULL }, /* TP */ 93 { NULL, posts_par }, /* LP */ 94 { NULL, posts_par }, /* PP */ 95 { NULL, posts_par }, /* P */ 96 { NULL, posts_ip }, /* IP */ 97 { NULL, NULL }, /* HP */ 98 { NULL, NULL }, /* SM */ 99 { NULL, NULL }, /* SB */ 100 { NULL, NULL }, /* BI */ 101 { NULL, NULL }, /* IB */ 102 { NULL, NULL }, /* BR */ 103 { NULL, NULL }, /* RB */ 104 { NULL, NULL }, /* R */ 105 { NULL, NULL }, /* B */ 106 { NULL, NULL }, /* I */ 107 { NULL, NULL }, /* IR */ 108 { NULL, NULL }, /* RI */ 109 { NULL, posts_eq0 }, /* na */ 110 { NULL, posts_sp }, /* sp */ 111 { NULL, posts_nf }, /* nf */ 112 { NULL, posts_fi }, /* fi */ 113 { NULL, NULL }, /* RE */ 114 { NULL, posts_part }, /* RS */ 115 { NULL, NULL }, /* DT */ 116 { NULL, posts_uc }, /* UC */ 117 { NULL, posts_le1 }, /* PD */ 118 { NULL, posts_at }, /* AT */ 119 { NULL, NULL }, /* in */ 120 { NULL, posts_ft }, /* ft */ 121 { NULL, posts_eq2 }, /* OP */ 122 { NULL, posts_nf }, /* EX */ 123 { NULL, posts_fi }, /* EE */ 124 { NULL, posts_ur }, /* UR */ 125 { NULL, NULL }, /* UE */ 126 }; 127 128 129 int 130 man_valid_pre(struct man *man, struct man_node *n) 131 { 132 v_check *cp; 133 134 switch (n->type) { 135 case (MAN_TEXT): 136 /* FALLTHROUGH */ 137 case (MAN_ROOT): 138 /* FALLTHROUGH */ 139 case (MAN_EQN): 140 /* FALLTHROUGH */ 141 case (MAN_TBL): 142 return(1); 143 default: 144 break; 145 } 146 147 if (NULL == (cp = man_valids[n->tok].pres)) 148 return(1); 149 for ( ; *cp; cp++) 150 if ( ! (*cp)(man, n)) 151 return(0); 152 return(1); 153 } 154 155 156 int 157 man_valid_post(struct man *man) 158 { 159 v_check *cp; 160 161 if (MAN_VALID & man->last->flags) 162 return(1); 163 man->last->flags |= MAN_VALID; 164 165 switch (man->last->type) { 166 case (MAN_TEXT): 167 check_text(man, man->last); 168 return(1); 169 case (MAN_ROOT): 170 return(check_root(man, man->last)); 171 case (MAN_EQN): 172 /* FALLTHROUGH */ 173 case (MAN_TBL): 174 return(1); 175 default: 176 break; 177 } 178 179 if (NULL == (cp = man_valids[man->last->tok].posts)) 180 return(1); 181 for ( ; *cp; cp++) 182 if ( ! (*cp)(man, man->last)) 183 return(0); 184 185 return(1); 186 } 187 188 189 static int 190 check_root(CHKARGS) 191 { 192 193 if (MAN_BLINE & man->flags) 194 man_nmsg(man, n, MANDOCERR_SCOPEEXIT); 195 else if (MAN_ELINE & man->flags) 196 man_nmsg(man, n, MANDOCERR_SCOPEEXIT); 197 198 man->flags &= ~MAN_BLINE; 199 man->flags &= ~MAN_ELINE; 200 201 if (NULL == man->first->child) { 202 man_nmsg(man, n, MANDOCERR_NODOCBODY); 203 return(0); 204 } else if (NULL == man->meta.title) { 205 man_nmsg(man, n, MANDOCERR_NOTITLE); 206 207 /* 208 * If a title hasn't been set, do so now (by 209 * implication, date and section also aren't set). 210 */ 211 212 man->meta.title = mandoc_strdup("unknown"); 213 man->meta.msec = mandoc_strdup("1"); 214 man->meta.date = mandoc_normdate 215 (man->parse, NULL, n->line, n->pos); 216 } 217 218 return(1); 219 } 220 221 static void 222 check_text(CHKARGS) 223 { 224 char *cp, *p; 225 226 if (MAN_LITERAL & man->flags) 227 return; 228 229 cp = n->string; 230 for (p = cp; NULL != (p = strchr(p, '\t')); p++) 231 man_pmsg(man, n->line, (int)(p - cp), MANDOCERR_BADTAB); 232 } 233 234 #define INEQ_DEFINE(x, ineq, name) \ 235 static int \ 236 check_##name(CHKARGS) \ 237 { \ 238 if (n->nchild ineq (x)) \ 239 return(1); \ 240 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, n->pos, \ 241 "line arguments %s %d (have %d)", \ 242 #ineq, (x), n->nchild); \ 243 return(1); \ 244 } 245 246 INEQ_DEFINE(0, ==, eq0) 247 INEQ_DEFINE(2, ==, eq2) 248 INEQ_DEFINE(1, <=, le1) 249 INEQ_DEFINE(2, >=, ge2) 250 INEQ_DEFINE(5, <=, le5) 251 252 static int 253 check_head1(CHKARGS) 254 { 255 256 if (MAN_HEAD == n->type && 1 != n->nchild) 257 mandoc_vmsg(MANDOCERR_ARGCOUNT, man->parse, n->line, 258 n->pos, "line arguments eq 1 (have %d)", n->nchild); 259 260 return(1); 261 } 262 263 static int 264 post_ft(CHKARGS) 265 { 266 char *cp; 267 int ok; 268 269 if (0 == n->nchild) 270 return(1); 271 272 ok = 0; 273 cp = n->child->string; 274 switch (*cp) { 275 case ('1'): 276 /* FALLTHROUGH */ 277 case ('2'): 278 /* FALLTHROUGH */ 279 case ('3'): 280 /* FALLTHROUGH */ 281 case ('4'): 282 /* FALLTHROUGH */ 283 case ('I'): 284 /* FALLTHROUGH */ 285 case ('P'): 286 /* FALLTHROUGH */ 287 case ('R'): 288 if ('\0' == cp[1]) 289 ok = 1; 290 break; 291 case ('B'): 292 if ('\0' == cp[1] || ('I' == cp[1] && '\0' == cp[2])) 293 ok = 1; 294 break; 295 case ('C'): 296 if ('W' == cp[1] && '\0' == cp[2]) 297 ok = 1; 298 break; 299 default: 300 break; 301 } 302 303 if (0 == ok) { 304 mandoc_vmsg 305 (MANDOCERR_BADFONT, man->parse, 306 n->line, n->pos, "%s", cp); 307 *cp = '\0'; 308 } 309 310 if (1 < n->nchild) 311 mandoc_vmsg 312 (MANDOCERR_ARGCOUNT, man->parse, n->line, 313 n->pos, "want one child (have %d)", 314 n->nchild); 315 316 return(1); 317 } 318 319 static int 320 pre_sec(CHKARGS) 321 { 322 323 if (MAN_BLOCK == n->type) 324 man->flags &= ~MAN_LITERAL; 325 return(1); 326 } 327 328 static int 329 post_sec(CHKARGS) 330 { 331 332 if ( ! (MAN_HEAD == n->type && 0 == n->nchild)) 333 return(1); 334 335 man_nmsg(man, n, MANDOCERR_SYNTARGCOUNT); 336 return(0); 337 } 338 339 static int 340 check_part(CHKARGS) 341 { 342 343 if (MAN_BODY == n->type && 0 == n->nchild) 344 mandoc_msg(MANDOCERR_ARGCWARN, man->parse, n->line, 345 n->pos, "want children (have none)"); 346 347 return(1); 348 } 349 350 351 static int 352 check_par(CHKARGS) 353 { 354 355 switch (n->type) { 356 case (MAN_BLOCK): 357 if (0 == n->body->nchild) 358 man_node_delete(man, n); 359 break; 360 case (MAN_BODY): 361 if (0 == n->nchild) 362 man_nmsg(man, n, MANDOCERR_IGNPAR); 363 break; 364 case (MAN_HEAD): 365 if (n->nchild) 366 man_nmsg(man, n, MANDOCERR_ARGSLOST); 367 break; 368 default: 369 break; 370 } 371 372 return(1); 373 } 374 375 static int 376 post_IP(CHKARGS) 377 { 378 379 switch (n->type) { 380 case (MAN_BLOCK): 381 if (0 == n->head->nchild && 0 == n->body->nchild) 382 man_node_delete(man, n); 383 break; 384 case (MAN_BODY): 385 if (0 == n->parent->head->nchild && 0 == n->nchild) 386 man_nmsg(man, n, MANDOCERR_IGNPAR); 387 break; 388 default: 389 break; 390 } 391 return(1); 392 } 393 394 static int 395 post_TH(CHKARGS) 396 { 397 const char *p; 398 int line, pos; 399 400 free(man->meta.title); 401 free(man->meta.vol); 402 free(man->meta.source); 403 free(man->meta.msec); 404 free(man->meta.date); 405 406 line = n->line; 407 pos = n->pos; 408 man->meta.title = man->meta.vol = man->meta.date = 409 man->meta.msec = man->meta.source = NULL; 410 411 /* ->TITLE<- MSEC DATE SOURCE VOL */ 412 413 n = n->child; 414 if (n && n->string) { 415 for (p = n->string; '\0' != *p; p++) { 416 /* Only warn about this once... */ 417 if (isalpha((unsigned char)*p) && 418 ! isupper((unsigned char)*p)) { 419 man_nmsg(man, n, MANDOCERR_UPPERCASE); 420 break; 421 } 422 } 423 man->meta.title = mandoc_strdup(n->string); 424 } else 425 man->meta.title = mandoc_strdup(""); 426 427 /* TITLE ->MSEC<- DATE SOURCE VOL */ 428 429 if (n) 430 n = n->next; 431 if (n && n->string) 432 man->meta.msec = mandoc_strdup(n->string); 433 else 434 man->meta.msec = mandoc_strdup(""); 435 436 /* TITLE MSEC ->DATE<- SOURCE VOL */ 437 438 if (n) 439 n = n->next; 440 if (n && n->string && '\0' != n->string[0]) { 441 pos = n->pos; 442 man->meta.date = mandoc_normdate 443 (man->parse, n->string, line, pos); 444 } else 445 man->meta.date = mandoc_strdup(""); 446 447 /* TITLE MSEC DATE ->SOURCE<- VOL */ 448 449 if (n && (n = n->next)) 450 man->meta.source = mandoc_strdup(n->string); 451 452 /* TITLE MSEC DATE SOURCE ->VOL<- */ 453 /* If missing, use the default VOL name for MSEC. */ 454 455 if (n && (n = n->next)) 456 man->meta.vol = mandoc_strdup(n->string); 457 else if ('\0' != man->meta.msec[0] && 458 (NULL != (p = mandoc_a2msec(man->meta.msec)))) 459 man->meta.vol = mandoc_strdup(p); 460 461 /* 462 * Remove the `TH' node after we've processed it for our 463 * meta-data. 464 */ 465 man_node_delete(man, man->last); 466 return(1); 467 } 468 469 static int 470 post_nf(CHKARGS) 471 { 472 473 if (MAN_LITERAL & man->flags) 474 man_nmsg(man, n, MANDOCERR_SCOPEREP); 475 476 man->flags |= MAN_LITERAL; 477 return(1); 478 } 479 480 static int 481 post_fi(CHKARGS) 482 { 483 484 if ( ! (MAN_LITERAL & man->flags)) 485 man_nmsg(man, n, MANDOCERR_WNOSCOPE); 486 487 man->flags &= ~MAN_LITERAL; 488 return(1); 489 } 490 491 static int 492 post_UC(CHKARGS) 493 { 494 static const char * const bsd_versions[] = { 495 "3rd Berkeley Distribution", 496 "4th Berkeley Distribution", 497 "4.2 Berkeley Distribution", 498 "4.3 Berkeley Distribution", 499 "4.4 Berkeley Distribution", 500 }; 501 502 const char *p, *s; 503 504 n = n->child; 505 506 if (NULL == n || MAN_TEXT != n->type) 507 p = bsd_versions[0]; 508 else { 509 s = n->string; 510 if (0 == strcmp(s, "3")) 511 p = bsd_versions[0]; 512 else if (0 == strcmp(s, "4")) 513 p = bsd_versions[1]; 514 else if (0 == strcmp(s, "5")) 515 p = bsd_versions[2]; 516 else if (0 == strcmp(s, "6")) 517 p = bsd_versions[3]; 518 else if (0 == strcmp(s, "7")) 519 p = bsd_versions[4]; 520 else 521 p = bsd_versions[0]; 522 } 523 524 free(man->meta.source); 525 man->meta.source = mandoc_strdup(p); 526 return(1); 527 } 528 529 static int 530 post_AT(CHKARGS) 531 { 532 static const char * const unix_versions[] = { 533 "7th Edition", 534 "System III", 535 "System V", 536 "System V Release 2", 537 }; 538 539 const char *p, *s; 540 struct man_node *nn; 541 542 n = n->child; 543 544 if (NULL == n || MAN_TEXT != n->type) 545 p = unix_versions[0]; 546 else { 547 s = n->string; 548 if (0 == strcmp(s, "3")) 549 p = unix_versions[0]; 550 else if (0 == strcmp(s, "4")) 551 p = unix_versions[1]; 552 else if (0 == strcmp(s, "5")) { 553 nn = n->next; 554 if (nn && MAN_TEXT == nn->type && nn->string[0]) 555 p = unix_versions[3]; 556 else 557 p = unix_versions[2]; 558 } else 559 p = unix_versions[0]; 560 } 561 562 free(man->meta.source); 563 man->meta.source = mandoc_strdup(p); 564 return(1); 565 } 566 567 static int 568 post_vs(CHKARGS) 569 { 570 571 if (NULL != n->prev) 572 return(1); 573 574 switch (n->parent->tok) { 575 case (MAN_SH): 576 /* FALLTHROUGH */ 577 case (MAN_SS): 578 man_nmsg(man, n, MANDOCERR_IGNPAR); 579 /* FALLTHROUGH */ 580 case (MAN_MAX): 581 /* 582 * Don't warn about this because it occurs in pod2man 583 * and would cause considerable (unfixable) warnage. 584 */ 585 man_node_delete(man, n); 586 break; 587 default: 588 break; 589 } 590 591 return(1); 592 } 593