1 /* $Id: cgi.c,v 1.158 2018/05/29 20:32:45 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 #include <sys/time.h> 22 23 #include <ctype.h> 24 #if HAVE_ERR 25 #include <err.h> 26 #endif 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <limits.h> 30 #include <stdint.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <unistd.h> 35 36 #include "mandoc_aux.h" 37 #include "mandoc.h" 38 #include "roff.h" 39 #include "mdoc.h" 40 #include "man.h" 41 #include "main.h" 42 #include "manconf.h" 43 #include "mansearch.h" 44 #include "cgi.h" 45 46 /* 47 * A query as passed to the search function. 48 */ 49 struct query { 50 char *manpath; /* desired manual directory */ 51 char *arch; /* architecture */ 52 char *sec; /* manual section */ 53 char *query; /* unparsed query expression */ 54 int equal; /* match whole names, not substrings */ 55 }; 56 57 struct req { 58 struct query q; 59 char **p; /* array of available manpaths */ 60 size_t psz; /* number of available manpaths */ 61 int isquery; /* QUERY_STRING used, not PATH_INFO */ 62 }; 63 64 enum focus { 65 FOCUS_NONE = 0, 66 FOCUS_QUERY 67 }; 68 69 static void html_print(const char *); 70 static void html_putchar(char); 71 static int http_decode(char *); 72 static void parse_manpath_conf(struct req *); 73 static void parse_path_info(struct req *req, const char *path); 74 static void parse_query_string(struct req *, const char *); 75 static void pg_error_badrequest(const char *); 76 static void pg_error_internal(void); 77 static void pg_index(const struct req *); 78 static void pg_noresult(const struct req *, const char *); 79 static void pg_redirect(const struct req *, const char *); 80 static void pg_search(const struct req *); 81 static void pg_searchres(const struct req *, 82 struct manpage *, size_t); 83 static void pg_show(struct req *, const char *); 84 static void resp_begin_html(int, const char *, const char *); 85 static void resp_begin_http(int, const char *); 86 static void resp_catman(const struct req *, const char *); 87 static void resp_copy(const char *); 88 static void resp_end_html(void); 89 static void resp_format(const struct req *, const char *); 90 static void resp_searchform(const struct req *, enum focus); 91 static void resp_show(const struct req *, const char *); 92 static void set_query_attr(char **, char **); 93 static int validate_filename(const char *); 94 static int validate_manpath(const struct req *, const char *); 95 static int validate_urifrag(const char *); 96 97 static const char *scriptname = SCRIPT_NAME; 98 99 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 100 static const char *const sec_numbers[] = { 101 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 102 }; 103 static const char *const sec_names[] = { 104 "All Sections", 105 "1 - General Commands", 106 "2 - System Calls", 107 "3 - Library Functions", 108 "3p - Perl Library", 109 "4 - Device Drivers", 110 "5 - File Formats", 111 "6 - Games", 112 "7 - Miscellaneous Information", 113 "8 - System Manager\'s Manual", 114 "9 - Kernel Developer\'s Manual" 115 }; 116 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 117 118 static const char *const arch_names[] = { 119 "amd64", "alpha", "armv7", "arm64", 120 "hppa", "i386", "landisk", 121 "loongson", "luna88k", "macppc", "mips64", 122 "octeon", "sgi", "socppc", "sparc64", 123 "amiga", "arc", "armish", "arm32", 124 "atari", "aviion", "beagle", "cats", 125 "hppa64", "hp300", 126 "ia64", "mac68k", "mvme68k", "mvme88k", 127 "mvmeppc", "palm", "pc532", "pegasos", 128 "pmax", "powerpc", "solbourne", "sparc", 129 "sun3", "vax", "wgrisc", "x68k", 130 "zaurus" 131 }; 132 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 133 134 /* 135 * Print a character, escaping HTML along the way. 136 * This will pass non-ASCII straight to output: be warned! 137 */ 138 static void 139 html_putchar(char c) 140 { 141 142 switch (c) { 143 case '"': 144 printf("""); 145 break; 146 case '&': 147 printf("&"); 148 break; 149 case '>': 150 printf(">"); 151 break; 152 case '<': 153 printf("<"); 154 break; 155 default: 156 putchar((unsigned char)c); 157 break; 158 } 159 } 160 161 /* 162 * Call through to html_putchar(). 163 * Accepts NULL strings. 164 */ 165 static void 166 html_print(const char *p) 167 { 168 169 if (NULL == p) 170 return; 171 while ('\0' != *p) 172 html_putchar(*p++); 173 } 174 175 /* 176 * Transfer the responsibility for the allocated string *val 177 * to the query structure. 178 */ 179 static void 180 set_query_attr(char **attr, char **val) 181 { 182 183 free(*attr); 184 if (**val == '\0') { 185 *attr = NULL; 186 free(*val); 187 } else 188 *attr = *val; 189 *val = NULL; 190 } 191 192 /* 193 * Parse the QUERY_STRING for key-value pairs 194 * and store the values into the query structure. 195 */ 196 static void 197 parse_query_string(struct req *req, const char *qs) 198 { 199 char *key, *val; 200 size_t keysz, valsz; 201 202 req->isquery = 1; 203 req->q.manpath = NULL; 204 req->q.arch = NULL; 205 req->q.sec = NULL; 206 req->q.query = NULL; 207 req->q.equal = 1; 208 209 key = val = NULL; 210 while (*qs != '\0') { 211 212 /* Parse one key. */ 213 214 keysz = strcspn(qs, "=;&"); 215 key = mandoc_strndup(qs, keysz); 216 qs += keysz; 217 if (*qs != '=') 218 goto next; 219 220 /* Parse one value. */ 221 222 valsz = strcspn(++qs, ";&"); 223 val = mandoc_strndup(qs, valsz); 224 qs += valsz; 225 226 /* Decode and catch encoding errors. */ 227 228 if ( ! (http_decode(key) && http_decode(val))) 229 goto next; 230 231 /* Handle key-value pairs. */ 232 233 if ( ! strcmp(key, "query")) 234 set_query_attr(&req->q.query, &val); 235 236 else if ( ! strcmp(key, "apropos")) 237 req->q.equal = !strcmp(val, "0"); 238 239 else if ( ! strcmp(key, "manpath")) { 240 #ifdef COMPAT_OLDURI 241 if ( ! strncmp(val, "OpenBSD ", 8)) { 242 val[7] = '-'; 243 if ('C' == val[8]) 244 val[8] = 'c'; 245 } 246 #endif 247 set_query_attr(&req->q.manpath, &val); 248 } 249 250 else if ( ! (strcmp(key, "sec") 251 #ifdef COMPAT_OLDURI 252 && strcmp(key, "sektion") 253 #endif 254 )) { 255 if ( ! strcmp(val, "0")) 256 *val = '\0'; 257 set_query_attr(&req->q.sec, &val); 258 } 259 260 else if ( ! strcmp(key, "arch")) { 261 if ( ! strcmp(val, "default")) 262 *val = '\0'; 263 set_query_attr(&req->q.arch, &val); 264 } 265 266 /* 267 * The key must be freed in any case. 268 * The val may have been handed over to the query 269 * structure, in which case it is now NULL. 270 */ 271 next: 272 free(key); 273 key = NULL; 274 free(val); 275 val = NULL; 276 277 if (*qs != '\0') 278 qs++; 279 } 280 } 281 282 /* 283 * HTTP-decode a string. The standard explanation is that this turns 284 * "%4e+foo" into "n foo" in the regular way. This is done in-place 285 * over the allocated string. 286 */ 287 static int 288 http_decode(char *p) 289 { 290 char hex[3]; 291 char *q; 292 int c; 293 294 hex[2] = '\0'; 295 296 q = p; 297 for ( ; '\0' != *p; p++, q++) { 298 if ('%' == *p) { 299 if ('\0' == (hex[0] = *(p + 1))) 300 return 0; 301 if ('\0' == (hex[1] = *(p + 2))) 302 return 0; 303 if (1 != sscanf(hex, "%x", &c)) 304 return 0; 305 if ('\0' == c) 306 return 0; 307 308 *q = (char)c; 309 p += 2; 310 } else 311 *q = '+' == *p ? ' ' : *p; 312 } 313 314 *q = '\0'; 315 return 1; 316 } 317 318 static void 319 resp_begin_http(int code, const char *msg) 320 { 321 322 if (200 != code) 323 printf("Status: %d %s\r\n", code, msg); 324 325 printf("Content-Type: text/html; charset=utf-8\r\n" 326 "Cache-Control: no-cache\r\n" 327 "Pragma: no-cache\r\n" 328 "\r\n"); 329 330 fflush(stdout); 331 } 332 333 static void 334 resp_copy(const char *filename) 335 { 336 char buf[4096]; 337 ssize_t sz; 338 int fd; 339 340 if ((fd = open(filename, O_RDONLY)) != -1) { 341 fflush(stdout); 342 while ((sz = read(fd, buf, sizeof(buf))) > 0) 343 write(STDOUT_FILENO, buf, sz); 344 close(fd); 345 } 346 } 347 348 static void 349 resp_begin_html(int code, const char *msg, const char *file) 350 { 351 char *cp; 352 353 resp_begin_http(code, msg); 354 355 printf("<!DOCTYPE html>\n" 356 "<html>\n" 357 "<head>\n" 358 " <meta charset=\"UTF-8\"/>\n" 359 " <meta name=\"viewport\"" 360 " content=\"width=device-width, initial-scale=1.0\">\n" 361 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 362 " type=\"text/css\" media=\"all\">\n" 363 " <title>", 364 CSS_DIR); 365 if (file != NULL) { 366 if ((cp = strrchr(file, '/')) != NULL) 367 file = cp + 1; 368 if ((cp = strrchr(file, '.')) != NULL) { 369 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 370 } else 371 printf("%s - ", file); 372 } 373 printf("%s</title>\n" 374 "</head>\n" 375 "<body>\n", 376 CUSTOMIZE_TITLE); 377 378 resp_copy(MAN_DIR "/header.html"); 379 } 380 381 static void 382 resp_end_html(void) 383 { 384 385 resp_copy(MAN_DIR "/footer.html"); 386 387 puts("</body>\n" 388 "</html>"); 389 } 390 391 static void 392 resp_searchform(const struct req *req, enum focus focus) 393 { 394 int i; 395 396 printf("<form action=\"/%s\" method=\"get\">\n" 397 " <fieldset>\n" 398 " <legend>Manual Page Search Parameters</legend>\n", 399 scriptname); 400 401 /* Write query input box. */ 402 403 printf(" <input type=\"search\" name=\"query\" value=\""); 404 if (req->q.query != NULL) 405 html_print(req->q.query); 406 printf( "\" size=\"40\""); 407 if (focus == FOCUS_QUERY) 408 printf(" autofocus"); 409 puts(">"); 410 411 /* Write submission buttons. */ 412 413 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 414 "man</button>\n" 415 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 416 "apropos</button>\n" 417 " <br/>\n"); 418 419 /* Write section selector. */ 420 421 puts(" <select name=\"sec\">"); 422 for (i = 0; i < sec_MAX; i++) { 423 printf(" <option value=\"%s\"", sec_numbers[i]); 424 if (NULL != req->q.sec && 425 0 == strcmp(sec_numbers[i], req->q.sec)) 426 printf(" selected=\"selected\""); 427 printf(">%s</option>\n", sec_names[i]); 428 } 429 puts(" </select>"); 430 431 /* Write architecture selector. */ 432 433 printf( " <select name=\"arch\">\n" 434 " <option value=\"default\""); 435 if (NULL == req->q.arch) 436 printf(" selected=\"selected\""); 437 puts(">All Architectures</option>"); 438 for (i = 0; i < arch_MAX; i++) { 439 printf(" <option"); 440 if (NULL != req->q.arch && 441 0 == strcmp(arch_names[i], req->q.arch)) 442 printf(" selected=\"selected\""); 443 printf(">%s</option>\n", arch_names[i]); 444 } 445 puts(" </select>"); 446 447 /* Write manpath selector. */ 448 449 if (req->psz > 1) { 450 puts(" <select name=\"manpath\">"); 451 for (i = 0; i < (int)req->psz; i++) { 452 printf(" <option"); 453 if (strcmp(req->q.manpath, req->p[i]) == 0) 454 printf(" selected=\"selected\""); 455 printf(">"); 456 html_print(req->p[i]); 457 puts("</option>"); 458 } 459 puts(" </select>"); 460 } 461 462 puts(" </fieldset>\n" 463 "</form>"); 464 } 465 466 static int 467 validate_urifrag(const char *frag) 468 { 469 470 while ('\0' != *frag) { 471 if ( ! (isalnum((unsigned char)*frag) || 472 '-' == *frag || '.' == *frag || 473 '/' == *frag || '_' == *frag)) 474 return 0; 475 frag++; 476 } 477 return 1; 478 } 479 480 static int 481 validate_manpath(const struct req *req, const char* manpath) 482 { 483 size_t i; 484 485 for (i = 0; i < req->psz; i++) 486 if ( ! strcmp(manpath, req->p[i])) 487 return 1; 488 489 return 0; 490 } 491 492 static int 493 validate_filename(const char *file) 494 { 495 496 if ('.' == file[0] && '/' == file[1]) 497 file += 2; 498 499 return ! (strstr(file, "../") || strstr(file, "/..") || 500 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 501 } 502 503 static void 504 pg_index(const struct req *req) 505 { 506 507 resp_begin_html(200, NULL, NULL); 508 resp_searchform(req, FOCUS_QUERY); 509 printf("<p>\n" 510 "This web interface is documented in the\n" 511 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 512 "manual, and the\n" 513 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 514 "manual explains the query syntax.\n" 515 "</p>\n", 516 scriptname, *scriptname == '\0' ? "" : "/", 517 scriptname, *scriptname == '\0' ? "" : "/"); 518 resp_end_html(); 519 } 520 521 static void 522 pg_noresult(const struct req *req, const char *msg) 523 { 524 resp_begin_html(200, NULL, NULL); 525 resp_searchform(req, FOCUS_QUERY); 526 puts("<p>"); 527 puts(msg); 528 puts("</p>"); 529 resp_end_html(); 530 } 531 532 static void 533 pg_error_badrequest(const char *msg) 534 { 535 536 resp_begin_html(400, "Bad Request", NULL); 537 puts("<h1>Bad Request</h1>\n" 538 "<p>\n"); 539 puts(msg); 540 printf("Try again from the\n" 541 "<a href=\"/%s\">main page</a>.\n" 542 "</p>", scriptname); 543 resp_end_html(); 544 } 545 546 static void 547 pg_error_internal(void) 548 { 549 resp_begin_html(500, "Internal Server Error", NULL); 550 puts("<p>Internal Server Error</p>"); 551 resp_end_html(); 552 } 553 554 static void 555 pg_redirect(const struct req *req, const char *name) 556 { 557 printf("Status: 303 See Other\r\n" 558 "Location: /"); 559 if (*scriptname != '\0') 560 printf("%s/", scriptname); 561 if (strcmp(req->q.manpath, req->p[0])) 562 printf("%s/", req->q.manpath); 563 if (req->q.arch != NULL) 564 printf("%s/", req->q.arch); 565 printf("%s", name); 566 if (req->q.sec != NULL) 567 printf(".%s", req->q.sec); 568 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 569 } 570 571 static void 572 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 573 { 574 char *arch, *archend; 575 const char *sec; 576 size_t i, iuse; 577 int archprio, archpriouse; 578 int prio, priouse; 579 580 for (i = 0; i < sz; i++) { 581 if (validate_filename(r[i].file)) 582 continue; 583 warnx("invalid filename %s in %s database", 584 r[i].file, req->q.manpath); 585 pg_error_internal(); 586 return; 587 } 588 589 if (req->isquery && sz == 1) { 590 /* 591 * If we have just one result, then jump there now 592 * without any delay. 593 */ 594 printf("Status: 303 See Other\r\n" 595 "Location: /"); 596 if (*scriptname != '\0') 597 printf("%s/", scriptname); 598 if (strcmp(req->q.manpath, req->p[0])) 599 printf("%s/", req->q.manpath); 600 printf("%s\r\n" 601 "Content-Type: text/html; charset=utf-8\r\n\r\n", 602 r[0].file); 603 return; 604 } 605 606 /* 607 * In man(1) mode, show one of the pages 608 * even if more than one is found. 609 */ 610 611 iuse = 0; 612 if (req->q.equal || sz == 1) { 613 priouse = 20; 614 archpriouse = 3; 615 for (i = 0; i < sz; i++) { 616 sec = r[i].file; 617 sec += strcspn(sec, "123456789"); 618 if (sec[0] == '\0') 619 continue; 620 prio = sec_prios[sec[0] - '1']; 621 if (sec[1] != '/') 622 prio += 10; 623 if (req->q.arch == NULL) { 624 archprio = 625 ((arch = strchr(sec + 1, '/')) 626 == NULL) ? 3 : 627 ((archend = strchr(arch + 1, '/')) 628 == NULL) ? 0 : 629 strncmp(arch, "amd64/", 630 archend - arch) ? 2 : 1; 631 if (archprio < archpriouse) { 632 archpriouse = archprio; 633 priouse = prio; 634 iuse = i; 635 continue; 636 } 637 if (archprio > archpriouse) 638 continue; 639 } 640 if (prio >= priouse) 641 continue; 642 priouse = prio; 643 iuse = i; 644 } 645 resp_begin_html(200, NULL, r[iuse].file); 646 } else 647 resp_begin_html(200, NULL, NULL); 648 649 resp_searchform(req, 650 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 651 652 if (sz > 1) { 653 puts("<table class=\"results\">"); 654 for (i = 0; i < sz; i++) { 655 printf(" <tr>\n" 656 " <td>" 657 "<a class=\"Xr\" href=\"/"); 658 if (*scriptname != '\0') 659 printf("%s/", scriptname); 660 if (strcmp(req->q.manpath, req->p[0])) 661 printf("%s/", req->q.manpath); 662 printf("%s\">", r[i].file); 663 html_print(r[i].names); 664 printf("</a></td>\n" 665 " <td><span class=\"Nd\">"); 666 html_print(r[i].output); 667 puts("</span></td>\n" 668 " </tr>"); 669 } 670 puts("</table>"); 671 } 672 673 if (req->q.equal || sz == 1) { 674 puts("<hr>"); 675 resp_show(req, r[iuse].file); 676 } 677 678 resp_end_html(); 679 } 680 681 static void 682 resp_catman(const struct req *req, const char *file) 683 { 684 FILE *f; 685 char *p; 686 size_t sz; 687 ssize_t len; 688 int i; 689 int italic, bold; 690 691 if ((f = fopen(file, "r")) == NULL) { 692 puts("<p>You specified an invalid manual file.</p>"); 693 return; 694 } 695 696 puts("<div class=\"catman\">\n" 697 "<pre>"); 698 699 p = NULL; 700 sz = 0; 701 702 while ((len = getline(&p, &sz, f)) != -1) { 703 bold = italic = 0; 704 for (i = 0; i < len - 1; i++) { 705 /* 706 * This means that the catpage is out of state. 707 * Ignore it and keep going (although the 708 * catpage is bogus). 709 */ 710 711 if ('\b' == p[i] || '\n' == p[i]) 712 continue; 713 714 /* 715 * Print a regular character. 716 * Close out any bold/italic scopes. 717 * If we're in back-space mode, make sure we'll 718 * have something to enter when we backspace. 719 */ 720 721 if ('\b' != p[i + 1]) { 722 if (italic) 723 printf("</i>"); 724 if (bold) 725 printf("</b>"); 726 italic = bold = 0; 727 html_putchar(p[i]); 728 continue; 729 } else if (i + 2 >= len) 730 continue; 731 732 /* Italic mode. */ 733 734 if ('_' == p[i]) { 735 if (bold) 736 printf("</b>"); 737 if ( ! italic) 738 printf("<i>"); 739 bold = 0; 740 italic = 1; 741 i += 2; 742 html_putchar(p[i]); 743 continue; 744 } 745 746 /* 747 * Handle funny behaviour troff-isms. 748 * These grok'd from the original man2html.c. 749 */ 750 751 if (('+' == p[i] && 'o' == p[i + 2]) || 752 ('o' == p[i] && '+' == p[i + 2]) || 753 ('|' == p[i] && '=' == p[i + 2]) || 754 ('=' == p[i] && '|' == p[i + 2]) || 755 ('*' == p[i] && '=' == p[i + 2]) || 756 ('=' == p[i] && '*' == p[i + 2]) || 757 ('*' == p[i] && '|' == p[i + 2]) || 758 ('|' == p[i] && '*' == p[i + 2])) { 759 if (italic) 760 printf("</i>"); 761 if (bold) 762 printf("</b>"); 763 italic = bold = 0; 764 putchar('*'); 765 i += 2; 766 continue; 767 } else if (('|' == p[i] && '-' == p[i + 2]) || 768 ('-' == p[i] && '|' == p[i + 1]) || 769 ('+' == p[i] && '-' == p[i + 1]) || 770 ('-' == p[i] && '+' == p[i + 1]) || 771 ('+' == p[i] && '|' == p[i + 1]) || 772 ('|' == p[i] && '+' == p[i + 1])) { 773 if (italic) 774 printf("</i>"); 775 if (bold) 776 printf("</b>"); 777 italic = bold = 0; 778 putchar('+'); 779 i += 2; 780 continue; 781 } 782 783 /* Bold mode. */ 784 785 if (italic) 786 printf("</i>"); 787 if ( ! bold) 788 printf("<b>"); 789 bold = 1; 790 italic = 0; 791 i += 2; 792 html_putchar(p[i]); 793 } 794 795 /* 796 * Clean up the last character. 797 * We can get to a newline; don't print that. 798 */ 799 800 if (italic) 801 printf("</i>"); 802 if (bold) 803 printf("</b>"); 804 805 if (i == len - 1 && p[i] != '\n') 806 html_putchar(p[i]); 807 808 putchar('\n'); 809 } 810 free(p); 811 812 puts("</pre>\n" 813 "</div>"); 814 815 fclose(f); 816 } 817 818 static void 819 resp_format(const struct req *req, const char *file) 820 { 821 struct manoutput conf; 822 struct mparse *mp; 823 struct roff_man *man; 824 void *vp; 825 int fd; 826 int usepath; 827 828 if (-1 == (fd = open(file, O_RDONLY, 0))) { 829 puts("<p>You specified an invalid manual file.</p>"); 830 return; 831 } 832 833 mchars_alloc(); 834 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1, 835 MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath); 836 mparse_readfd(mp, fd, file); 837 close(fd); 838 839 memset(&conf, 0, sizeof(conf)); 840 conf.fragment = 1; 841 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 842 usepath = strcmp(req->q.manpath, req->p[0]); 843 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 844 scriptname, *scriptname == '\0' ? "" : "/", 845 usepath ? req->q.manpath : "", usepath ? "/" : ""); 846 847 mparse_result(mp, &man, NULL); 848 if (man == NULL) { 849 warnx("fatal mandoc error: %s/%s", req->q.manpath, file); 850 pg_error_internal(); 851 mparse_free(mp); 852 mchars_free(); 853 return; 854 } 855 856 vp = html_alloc(&conf); 857 858 if (man->macroset == MACROSET_MDOC) { 859 mdoc_validate(man); 860 html_mdoc(vp, man); 861 } else { 862 man_validate(man); 863 html_man(vp, man); 864 } 865 866 html_free(vp); 867 mparse_free(mp); 868 mchars_free(); 869 free(conf.man); 870 free(conf.style); 871 } 872 873 static void 874 resp_show(const struct req *req, const char *file) 875 { 876 877 if ('.' == file[0] && '/' == file[1]) 878 file += 2; 879 880 if ('c' == *file) 881 resp_catman(req, file); 882 else 883 resp_format(req, file); 884 } 885 886 static void 887 pg_show(struct req *req, const char *fullpath) 888 { 889 char *manpath; 890 const char *file; 891 892 if ((file = strchr(fullpath, '/')) == NULL) { 893 pg_error_badrequest( 894 "You did not specify a page to show."); 895 return; 896 } 897 manpath = mandoc_strndup(fullpath, file - fullpath); 898 file++; 899 900 if ( ! validate_manpath(req, manpath)) { 901 pg_error_badrequest( 902 "You specified an invalid manpath."); 903 free(manpath); 904 return; 905 } 906 907 /* 908 * Begin by chdir()ing into the manpath. 909 * This way we can pick up the database files, which are 910 * relative to the manpath root. 911 */ 912 913 if (chdir(manpath) == -1) { 914 warn("chdir %s", manpath); 915 pg_error_internal(); 916 free(manpath); 917 return; 918 } 919 free(manpath); 920 921 if ( ! validate_filename(file)) { 922 pg_error_badrequest( 923 "You specified an invalid manual file."); 924 return; 925 } 926 927 resp_begin_html(200, NULL, file); 928 resp_searchform(req, FOCUS_NONE); 929 resp_show(req, file); 930 resp_end_html(); 931 } 932 933 static void 934 pg_search(const struct req *req) 935 { 936 struct mansearch search; 937 struct manpaths paths; 938 struct manpage *res; 939 char **argv; 940 char *query, *rp, *wp; 941 size_t ressz; 942 int argc; 943 944 /* 945 * Begin by chdir()ing into the root of the manpath. 946 * This way we can pick up the database files, which are 947 * relative to the manpath root. 948 */ 949 950 if (chdir(req->q.manpath) == -1) { 951 warn("chdir %s", req->q.manpath); 952 pg_error_internal(); 953 return; 954 } 955 956 search.arch = req->q.arch; 957 search.sec = req->q.sec; 958 search.outkey = "Nd"; 959 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 960 search.firstmatch = 1; 961 962 paths.sz = 1; 963 paths.paths = mandoc_malloc(sizeof(char *)); 964 paths.paths[0] = mandoc_strdup("."); 965 966 /* 967 * Break apart at spaces with backslash-escaping. 968 */ 969 970 argc = 0; 971 argv = NULL; 972 rp = query = mandoc_strdup(req->q.query); 973 for (;;) { 974 while (isspace((unsigned char)*rp)) 975 rp++; 976 if (*rp == '\0') 977 break; 978 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 979 argv[argc++] = wp = rp; 980 for (;;) { 981 if (isspace((unsigned char)*rp)) { 982 *wp = '\0'; 983 rp++; 984 break; 985 } 986 if (rp[0] == '\\' && rp[1] != '\0') 987 rp++; 988 if (wp != rp) 989 *wp = *rp; 990 if (*rp == '\0') 991 break; 992 wp++; 993 rp++; 994 } 995 } 996 997 res = NULL; 998 ressz = 0; 999 if (req->isquery && req->q.equal && argc == 1) 1000 pg_redirect(req, argv[0]); 1001 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1002 pg_noresult(req, "You entered an invalid query."); 1003 else if (ressz == 0) 1004 pg_noresult(req, "No results found."); 1005 else 1006 pg_searchres(req, res, ressz); 1007 1008 free(query); 1009 mansearch_free(res, ressz); 1010 free(paths.paths[0]); 1011 free(paths.paths); 1012 } 1013 1014 int 1015 main(void) 1016 { 1017 struct req req; 1018 struct itimerval itimer; 1019 const char *path; 1020 const char *querystring; 1021 int i; 1022 1023 #if HAVE_PLEDGE 1024 /* 1025 * The "rpath" pledge could be revoked after mparse_readfd() 1026 * if the file desciptor to "/footer.html" would be opened 1027 * up front, but it's probably not worth the complication 1028 * of the code it would cause: it would require scattering 1029 * pledge() calls in multiple low-level resp_*() functions. 1030 */ 1031 1032 if (pledge("stdio rpath", NULL) == -1) { 1033 warn("pledge"); 1034 pg_error_internal(); 1035 return EXIT_FAILURE; 1036 } 1037 #endif 1038 1039 /* Poor man's ReDoS mitigation. */ 1040 1041 itimer.it_value.tv_sec = 2; 1042 itimer.it_value.tv_usec = 0; 1043 itimer.it_interval.tv_sec = 2; 1044 itimer.it_interval.tv_usec = 0; 1045 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1046 warn("setitimer"); 1047 pg_error_internal(); 1048 return EXIT_FAILURE; 1049 } 1050 1051 /* 1052 * First we change directory into the MAN_DIR so that 1053 * subsequent scanning for manpath directories is rooted 1054 * relative to the same position. 1055 */ 1056 1057 if (chdir(MAN_DIR) == -1) { 1058 warn("MAN_DIR: %s", MAN_DIR); 1059 pg_error_internal(); 1060 return EXIT_FAILURE; 1061 } 1062 1063 memset(&req, 0, sizeof(struct req)); 1064 req.q.equal = 1; 1065 parse_manpath_conf(&req); 1066 1067 /* Parse the path info and the query string. */ 1068 1069 if ((path = getenv("PATH_INFO")) == NULL) 1070 path = ""; 1071 else if (*path == '/') 1072 path++; 1073 1074 if (*path != '\0') { 1075 parse_path_info(&req, path); 1076 if (req.q.manpath == NULL || req.q.sec == NULL || 1077 *req.q.query == '\0' || access(path, F_OK) == -1) 1078 path = ""; 1079 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1080 parse_query_string(&req, querystring); 1081 1082 /* Validate parsed data and add defaults. */ 1083 1084 if (req.q.manpath == NULL) 1085 req.q.manpath = mandoc_strdup(req.p[0]); 1086 else if ( ! validate_manpath(&req, req.q.manpath)) { 1087 pg_error_badrequest( 1088 "You specified an invalid manpath."); 1089 return EXIT_FAILURE; 1090 } 1091 1092 if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) { 1093 pg_error_badrequest( 1094 "You specified an invalid architecture."); 1095 return EXIT_FAILURE; 1096 } 1097 1098 /* Dispatch to the three different pages. */ 1099 1100 if ('\0' != *path) 1101 pg_show(&req, path); 1102 else if (NULL != req.q.query) 1103 pg_search(&req); 1104 else 1105 pg_index(&req); 1106 1107 free(req.q.manpath); 1108 free(req.q.arch); 1109 free(req.q.sec); 1110 free(req.q.query); 1111 for (i = 0; i < (int)req.psz; i++) 1112 free(req.p[i]); 1113 free(req.p); 1114 return EXIT_SUCCESS; 1115 } 1116 1117 /* 1118 * If PATH_INFO is not a file name, translate it to a query. 1119 */ 1120 static void 1121 parse_path_info(struct req *req, const char *path) 1122 { 1123 char *dir[4]; 1124 int i; 1125 1126 req->isquery = 0; 1127 req->q.equal = 1; 1128 req->q.manpath = mandoc_strdup(path); 1129 req->q.arch = NULL; 1130 1131 /* Mandatory manual page name. */ 1132 if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) { 1133 req->q.query = req->q.manpath; 1134 req->q.manpath = NULL; 1135 } else 1136 *req->q.query++ = '\0'; 1137 1138 /* Optional trailing section. */ 1139 if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) { 1140 if(isdigit((unsigned char)req->q.sec[1])) { 1141 *req->q.sec++ = '\0'; 1142 req->q.sec = mandoc_strdup(req->q.sec); 1143 } else 1144 req->q.sec = NULL; 1145 } 1146 1147 /* Handle the case of name[.section] only. */ 1148 if (req->q.manpath == NULL) 1149 return; 1150 req->q.query = mandoc_strdup(req->q.query); 1151 1152 /* Split directory components. */ 1153 dir[i = 0] = req->q.manpath; 1154 while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) { 1155 if (++i == 3) { 1156 pg_error_badrequest( 1157 "You specified too many directory components."); 1158 exit(EXIT_FAILURE); 1159 } 1160 *dir[i]++ = '\0'; 1161 } 1162 1163 /* Optional manpath. */ 1164 if ((i = validate_manpath(req, req->q.manpath)) == 0) 1165 req->q.manpath = NULL; 1166 else if (dir[1] == NULL) 1167 return; 1168 1169 /* Optional section. */ 1170 if (strncmp(dir[i], "man", 3) == 0) { 1171 free(req->q.sec); 1172 req->q.sec = mandoc_strdup(dir[i++] + 3); 1173 } 1174 if (dir[i] == NULL) { 1175 if (req->q.manpath == NULL) 1176 free(dir[0]); 1177 return; 1178 } 1179 if (dir[i + 1] != NULL) { 1180 pg_error_badrequest( 1181 "You specified an invalid directory component."); 1182 exit(EXIT_FAILURE); 1183 } 1184 1185 /* Optional architecture. */ 1186 if (i) { 1187 req->q.arch = mandoc_strdup(dir[i]); 1188 if (req->q.manpath == NULL) 1189 free(dir[0]); 1190 } else 1191 req->q.arch = dir[0]; 1192 } 1193 1194 /* 1195 * Scan for indexable paths. 1196 */ 1197 static void 1198 parse_manpath_conf(struct req *req) 1199 { 1200 FILE *fp; 1201 char *dp; 1202 size_t dpsz; 1203 ssize_t len; 1204 1205 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1206 warn("%s/manpath.conf", MAN_DIR); 1207 pg_error_internal(); 1208 exit(EXIT_FAILURE); 1209 } 1210 1211 dp = NULL; 1212 dpsz = 0; 1213 1214 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1215 if (dp[len - 1] == '\n') 1216 dp[--len] = '\0'; 1217 req->p = mandoc_realloc(req->p, 1218 (req->psz + 1) * sizeof(char *)); 1219 if ( ! validate_urifrag(dp)) { 1220 warnx("%s/manpath.conf contains " 1221 "unsafe path \"%s\"", MAN_DIR, dp); 1222 pg_error_internal(); 1223 exit(EXIT_FAILURE); 1224 } 1225 if (strchr(dp, '/') != NULL) { 1226 warnx("%s/manpath.conf contains " 1227 "path with slash \"%s\"", MAN_DIR, dp); 1228 pg_error_internal(); 1229 exit(EXIT_FAILURE); 1230 } 1231 req->p[req->psz++] = dp; 1232 dp = NULL; 1233 dpsz = 0; 1234 } 1235 free(dp); 1236 1237 if (req->p == NULL) { 1238 warnx("%s/manpath.conf is empty", MAN_DIR); 1239 pg_error_internal(); 1240 exit(EXIT_FAILURE); 1241 } 1242 } 1243