1 /* $Id: cgi.c,v 1.166 2019/03/06 12:32:41 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 #include <sys/time.h> 22 23 #include <ctype.h> 24 #if HAVE_ERR 25 #include <err.h> 26 #endif 27 #include <errno.h> 28 #include <fcntl.h> 29 #include <limits.h> 30 #include <stdint.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #include <unistd.h> 35 36 #include "mandoc_aux.h" 37 #include "mandoc.h" 38 #include "roff.h" 39 #include "mdoc.h" 40 #include "man.h" 41 #include "mandoc_parse.h" 42 #include "main.h" 43 #include "manconf.h" 44 #include "mansearch.h" 45 #include "cgi.h" 46 47 /* 48 * A query as passed to the search function. 49 */ 50 struct query { 51 char *manpath; /* desired manual directory */ 52 char *arch; /* architecture */ 53 char *sec; /* manual section */ 54 char *query; /* unparsed query expression */ 55 int equal; /* match whole names, not substrings */ 56 }; 57 58 struct req { 59 struct query q; 60 char **p; /* array of available manpaths */ 61 size_t psz; /* number of available manpaths */ 62 int isquery; /* QUERY_STRING used, not PATH_INFO */ 63 }; 64 65 enum focus { 66 FOCUS_NONE = 0, 67 FOCUS_QUERY 68 }; 69 70 static void html_print(const char *); 71 static void html_putchar(char); 72 static int http_decode(char *); 73 static void http_encode(const char *p); 74 static void parse_manpath_conf(struct req *); 75 static void parse_path_info(struct req *req, const char *path); 76 static void parse_query_string(struct req *, const char *); 77 static void pg_error_badrequest(const char *); 78 static void pg_error_internal(void); 79 static void pg_index(const struct req *); 80 static void pg_noresult(const struct req *, const char *); 81 static void pg_redirect(const struct req *, const char *); 82 static void pg_search(const struct req *); 83 static void pg_searchres(const struct req *, 84 struct manpage *, size_t); 85 static void pg_show(struct req *, const char *); 86 static void resp_begin_html(int, const char *, const char *); 87 static void resp_begin_http(int, const char *); 88 static void resp_catman(const struct req *, const char *); 89 static void resp_copy(const char *); 90 static void resp_end_html(void); 91 static void resp_format(const struct req *, const char *); 92 static void resp_searchform(const struct req *, enum focus); 93 static void resp_show(const struct req *, const char *); 94 static void set_query_attr(char **, char **); 95 static int validate_arch(const char *); 96 static int validate_filename(const char *); 97 static int validate_manpath(const struct req *, const char *); 98 static int validate_urifrag(const char *); 99 100 static const char *scriptname = SCRIPT_NAME; 101 102 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 103 static const char *const sec_numbers[] = { 104 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 105 }; 106 static const char *const sec_names[] = { 107 "All Sections", 108 "1 - General Commands", 109 "2 - System Calls", 110 "3 - Library Functions", 111 "3p - Perl Library", 112 "4 - Device Drivers", 113 "5 - File Formats", 114 "6 - Games", 115 "7 - Miscellaneous Information", 116 "8 - System Manager\'s Manual", 117 "9 - Kernel Developer\'s Manual" 118 }; 119 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 120 121 static const char *const arch_names[] = { 122 "amd64", "alpha", "armv7", "arm64", 123 "hppa", "i386", "landisk", 124 "loongson", "luna88k", "macppc", "mips64", 125 "octeon", "sgi", "socppc", "sparc64", 126 "amiga", "arc", "armish", "arm32", 127 "atari", "aviion", "beagle", "cats", 128 "hppa64", "hp300", 129 "ia64", "mac68k", "mvme68k", "mvme88k", 130 "mvmeppc", "palm", "pc532", "pegasos", 131 "pmax", "powerpc", "solbourne", "sparc", 132 "sun3", "vax", "wgrisc", "x68k", 133 "zaurus" 134 }; 135 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 136 137 /* 138 * Print a character, escaping HTML along the way. 139 * This will pass non-ASCII straight to output: be warned! 140 */ 141 static void 142 html_putchar(char c) 143 { 144 145 switch (c) { 146 case '"': 147 printf("""); 148 break; 149 case '&': 150 printf("&"); 151 break; 152 case '>': 153 printf(">"); 154 break; 155 case '<': 156 printf("<"); 157 break; 158 default: 159 putchar((unsigned char)c); 160 break; 161 } 162 } 163 164 /* 165 * Call through to html_putchar(). 166 * Accepts NULL strings. 167 */ 168 static void 169 html_print(const char *p) 170 { 171 172 if (NULL == p) 173 return; 174 while ('\0' != *p) 175 html_putchar(*p++); 176 } 177 178 /* 179 * Transfer the responsibility for the allocated string *val 180 * to the query structure. 181 */ 182 static void 183 set_query_attr(char **attr, char **val) 184 { 185 186 free(*attr); 187 if (**val == '\0') { 188 *attr = NULL; 189 free(*val); 190 } else 191 *attr = *val; 192 *val = NULL; 193 } 194 195 /* 196 * Parse the QUERY_STRING for key-value pairs 197 * and store the values into the query structure. 198 */ 199 static void 200 parse_query_string(struct req *req, const char *qs) 201 { 202 char *key, *val; 203 size_t keysz, valsz; 204 205 req->isquery = 1; 206 req->q.manpath = NULL; 207 req->q.arch = NULL; 208 req->q.sec = NULL; 209 req->q.query = NULL; 210 req->q.equal = 1; 211 212 key = val = NULL; 213 while (*qs != '\0') { 214 215 /* Parse one key. */ 216 217 keysz = strcspn(qs, "=;&"); 218 key = mandoc_strndup(qs, keysz); 219 qs += keysz; 220 if (*qs != '=') 221 goto next; 222 223 /* Parse one value. */ 224 225 valsz = strcspn(++qs, ";&"); 226 val = mandoc_strndup(qs, valsz); 227 qs += valsz; 228 229 /* Decode and catch encoding errors. */ 230 231 if ( ! (http_decode(key) && http_decode(val))) 232 goto next; 233 234 /* Handle key-value pairs. */ 235 236 if ( ! strcmp(key, "query")) 237 set_query_attr(&req->q.query, &val); 238 239 else if ( ! strcmp(key, "apropos")) 240 req->q.equal = !strcmp(val, "0"); 241 242 else if ( ! strcmp(key, "manpath")) { 243 #ifdef COMPAT_OLDURI 244 if ( ! strncmp(val, "OpenBSD ", 8)) { 245 val[7] = '-'; 246 if ('C' == val[8]) 247 val[8] = 'c'; 248 } 249 #endif 250 set_query_attr(&req->q.manpath, &val); 251 } 252 253 else if ( ! (strcmp(key, "sec") 254 #ifdef COMPAT_OLDURI 255 && strcmp(key, "sektion") 256 #endif 257 )) { 258 if ( ! strcmp(val, "0")) 259 *val = '\0'; 260 set_query_attr(&req->q.sec, &val); 261 } 262 263 else if ( ! strcmp(key, "arch")) { 264 if ( ! strcmp(val, "default")) 265 *val = '\0'; 266 set_query_attr(&req->q.arch, &val); 267 } 268 269 /* 270 * The key must be freed in any case. 271 * The val may have been handed over to the query 272 * structure, in which case it is now NULL. 273 */ 274 next: 275 free(key); 276 key = NULL; 277 free(val); 278 val = NULL; 279 280 if (*qs != '\0') 281 qs++; 282 } 283 } 284 285 /* 286 * HTTP-decode a string. The standard explanation is that this turns 287 * "%4e+foo" into "n foo" in the regular way. This is done in-place 288 * over the allocated string. 289 */ 290 static int 291 http_decode(char *p) 292 { 293 char hex[3]; 294 char *q; 295 int c; 296 297 hex[2] = '\0'; 298 299 q = p; 300 for ( ; '\0' != *p; p++, q++) { 301 if ('%' == *p) { 302 if ('\0' == (hex[0] = *(p + 1))) 303 return 0; 304 if ('\0' == (hex[1] = *(p + 2))) 305 return 0; 306 if (1 != sscanf(hex, "%x", &c)) 307 return 0; 308 if ('\0' == c) 309 return 0; 310 311 *q = (char)c; 312 p += 2; 313 } else 314 *q = '+' == *p ? ' ' : *p; 315 } 316 317 *q = '\0'; 318 return 1; 319 } 320 321 static void 322 http_encode(const char *p) 323 { 324 for (; *p != '\0'; p++) { 325 if (isalnum((unsigned char)*p) == 0 && 326 strchr("-._~", *p) == NULL) 327 printf("%%%2.2X", (unsigned char)*p); 328 else 329 putchar(*p); 330 } 331 } 332 333 static void 334 resp_begin_http(int code, const char *msg) 335 { 336 337 if (200 != code) 338 printf("Status: %d %s\r\n", code, msg); 339 340 printf("Content-Type: text/html; charset=utf-8\r\n" 341 "Cache-Control: no-cache\r\n" 342 "Pragma: no-cache\r\n" 343 "\r\n"); 344 345 fflush(stdout); 346 } 347 348 static void 349 resp_copy(const char *filename) 350 { 351 char buf[4096]; 352 ssize_t sz; 353 int fd; 354 355 if ((fd = open(filename, O_RDONLY)) != -1) { 356 fflush(stdout); 357 while ((sz = read(fd, buf, sizeof(buf))) > 0) 358 write(STDOUT_FILENO, buf, sz); 359 close(fd); 360 } 361 } 362 363 static void 364 resp_begin_html(int code, const char *msg, const char *file) 365 { 366 char *cp; 367 368 resp_begin_http(code, msg); 369 370 printf("<!DOCTYPE html>\n" 371 "<html>\n" 372 "<head>\n" 373 " <meta charset=\"UTF-8\"/>\n" 374 " <meta name=\"viewport\"" 375 " content=\"width=device-width, initial-scale=1.0\">\n" 376 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 377 " type=\"text/css\" media=\"all\">\n" 378 " <title>", 379 CSS_DIR); 380 if (file != NULL) { 381 if ((cp = strrchr(file, '/')) != NULL) 382 file = cp + 1; 383 if ((cp = strrchr(file, '.')) != NULL) { 384 printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1); 385 } else 386 printf("%s - ", file); 387 } 388 printf("%s</title>\n" 389 "</head>\n" 390 "<body>\n", 391 CUSTOMIZE_TITLE); 392 393 resp_copy(MAN_DIR "/header.html"); 394 } 395 396 static void 397 resp_end_html(void) 398 { 399 400 resp_copy(MAN_DIR "/footer.html"); 401 402 puts("</body>\n" 403 "</html>"); 404 } 405 406 static void 407 resp_searchform(const struct req *req, enum focus focus) 408 { 409 int i; 410 411 printf("<form action=\"/%s\" method=\"get\">\n" 412 " <fieldset>\n" 413 " <legend>Manual Page Search Parameters</legend>\n", 414 scriptname); 415 416 /* Write query input box. */ 417 418 printf(" <input type=\"search\" name=\"query\" value=\""); 419 if (req->q.query != NULL) 420 html_print(req->q.query); 421 printf( "\" size=\"40\""); 422 if (focus == FOCUS_QUERY) 423 printf(" autofocus"); 424 puts(">"); 425 426 /* Write submission buttons. */ 427 428 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 429 "man</button>\n" 430 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 431 "apropos</button>\n" 432 " <br/>\n"); 433 434 /* Write section selector. */ 435 436 puts(" <select name=\"sec\">"); 437 for (i = 0; i < sec_MAX; i++) { 438 printf(" <option value=\"%s\"", sec_numbers[i]); 439 if (NULL != req->q.sec && 440 0 == strcmp(sec_numbers[i], req->q.sec)) 441 printf(" selected=\"selected\""); 442 printf(">%s</option>\n", sec_names[i]); 443 } 444 puts(" </select>"); 445 446 /* Write architecture selector. */ 447 448 printf( " <select name=\"arch\">\n" 449 " <option value=\"default\""); 450 if (NULL == req->q.arch) 451 printf(" selected=\"selected\""); 452 puts(">All Architectures</option>"); 453 for (i = 0; i < arch_MAX; i++) { 454 printf(" <option"); 455 if (NULL != req->q.arch && 456 0 == strcmp(arch_names[i], req->q.arch)) 457 printf(" selected=\"selected\""); 458 printf(">%s</option>\n", arch_names[i]); 459 } 460 puts(" </select>"); 461 462 /* Write manpath selector. */ 463 464 if (req->psz > 1) { 465 puts(" <select name=\"manpath\">"); 466 for (i = 0; i < (int)req->psz; i++) { 467 printf(" <option"); 468 if (strcmp(req->q.manpath, req->p[i]) == 0) 469 printf(" selected=\"selected\""); 470 printf(">"); 471 html_print(req->p[i]); 472 puts("</option>"); 473 } 474 puts(" </select>"); 475 } 476 477 puts(" </fieldset>\n" 478 "</form>"); 479 } 480 481 static int 482 validate_urifrag(const char *frag) 483 { 484 485 while ('\0' != *frag) { 486 if ( ! (isalnum((unsigned char)*frag) || 487 '-' == *frag || '.' == *frag || 488 '/' == *frag || '_' == *frag)) 489 return 0; 490 frag++; 491 } 492 return 1; 493 } 494 495 static int 496 validate_manpath(const struct req *req, const char* manpath) 497 { 498 size_t i; 499 500 for (i = 0; i < req->psz; i++) 501 if ( ! strcmp(manpath, req->p[i])) 502 return 1; 503 504 return 0; 505 } 506 507 static int 508 validate_arch(const char *arch) 509 { 510 int i; 511 512 for (i = 0; i < arch_MAX; i++) 513 if (strcmp(arch, arch_names[i]) == 0) 514 return 1; 515 516 return 0; 517 } 518 519 static int 520 validate_filename(const char *file) 521 { 522 523 if ('.' == file[0] && '/' == file[1]) 524 file += 2; 525 526 return ! (strstr(file, "../") || strstr(file, "/..") || 527 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 528 } 529 530 static void 531 pg_index(const struct req *req) 532 { 533 534 resp_begin_html(200, NULL, NULL); 535 resp_searchform(req, FOCUS_QUERY); 536 printf("<p>\n" 537 "This web interface is documented in the\n" 538 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 539 "manual, and the\n" 540 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 541 "manual explains the query syntax.\n" 542 "</p>\n", 543 scriptname, *scriptname == '\0' ? "" : "/", 544 scriptname, *scriptname == '\0' ? "" : "/"); 545 resp_end_html(); 546 } 547 548 static void 549 pg_noresult(const struct req *req, const char *msg) 550 { 551 resp_begin_html(200, NULL, NULL); 552 resp_searchform(req, FOCUS_QUERY); 553 puts("<p>"); 554 puts(msg); 555 puts("</p>"); 556 resp_end_html(); 557 } 558 559 static void 560 pg_error_badrequest(const char *msg) 561 { 562 563 resp_begin_html(400, "Bad Request", NULL); 564 puts("<h1>Bad Request</h1>\n" 565 "<p>\n"); 566 puts(msg); 567 printf("Try again from the\n" 568 "<a href=\"/%s\">main page</a>.\n" 569 "</p>", scriptname); 570 resp_end_html(); 571 } 572 573 static void 574 pg_error_internal(void) 575 { 576 resp_begin_html(500, "Internal Server Error", NULL); 577 puts("<p>Internal Server Error</p>"); 578 resp_end_html(); 579 } 580 581 static void 582 pg_redirect(const struct req *req, const char *name) 583 { 584 printf("Status: 303 See Other\r\n" 585 "Location: /"); 586 if (*scriptname != '\0') 587 printf("%s/", scriptname); 588 if (strcmp(req->q.manpath, req->p[0])) 589 printf("%s/", req->q.manpath); 590 if (req->q.arch != NULL) 591 printf("%s/", req->q.arch); 592 http_encode(name); 593 if (req->q.sec != NULL) { 594 putchar('.'); 595 http_encode(req->q.sec); 596 } 597 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 598 } 599 600 static void 601 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 602 { 603 char *arch, *archend; 604 const char *sec; 605 size_t i, iuse; 606 int archprio, archpriouse; 607 int prio, priouse; 608 609 for (i = 0; i < sz; i++) { 610 if (validate_filename(r[i].file)) 611 continue; 612 warnx("invalid filename %s in %s database", 613 r[i].file, req->q.manpath); 614 pg_error_internal(); 615 return; 616 } 617 618 if (req->isquery && sz == 1) { 619 /* 620 * If we have just one result, then jump there now 621 * without any delay. 622 */ 623 printf("Status: 303 See Other\r\n" 624 "Location: /"); 625 if (*scriptname != '\0') 626 printf("%s/", scriptname); 627 if (strcmp(req->q.manpath, req->p[0])) 628 printf("%s/", req->q.manpath); 629 printf("%s\r\n" 630 "Content-Type: text/html; charset=utf-8\r\n\r\n", 631 r[0].file); 632 return; 633 } 634 635 /* 636 * In man(1) mode, show one of the pages 637 * even if more than one is found. 638 */ 639 640 iuse = 0; 641 if (req->q.equal || sz == 1) { 642 priouse = 20; 643 archpriouse = 3; 644 for (i = 0; i < sz; i++) { 645 sec = r[i].file; 646 sec += strcspn(sec, "123456789"); 647 if (sec[0] == '\0') 648 continue; 649 prio = sec_prios[sec[0] - '1']; 650 if (sec[1] != '/') 651 prio += 10; 652 if (req->q.arch == NULL) { 653 archprio = 654 ((arch = strchr(sec + 1, '/')) 655 == NULL) ? 3 : 656 ((archend = strchr(arch + 1, '/')) 657 == NULL) ? 0 : 658 strncmp(arch, "amd64/", 659 archend - arch) ? 2 : 1; 660 if (archprio < archpriouse) { 661 archpriouse = archprio; 662 priouse = prio; 663 iuse = i; 664 continue; 665 } 666 if (archprio > archpriouse) 667 continue; 668 } 669 if (prio >= priouse) 670 continue; 671 priouse = prio; 672 iuse = i; 673 } 674 resp_begin_html(200, NULL, r[iuse].file); 675 } else 676 resp_begin_html(200, NULL, NULL); 677 678 resp_searchform(req, 679 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 680 681 if (sz > 1) { 682 puts("<table class=\"results\">"); 683 for (i = 0; i < sz; i++) { 684 printf(" <tr>\n" 685 " <td>" 686 "<a class=\"Xr\" href=\"/"); 687 if (*scriptname != '\0') 688 printf("%s/", scriptname); 689 if (strcmp(req->q.manpath, req->p[0])) 690 printf("%s/", req->q.manpath); 691 printf("%s\">", r[i].file); 692 html_print(r[i].names); 693 printf("</a></td>\n" 694 " <td><span class=\"Nd\">"); 695 html_print(r[i].output); 696 puts("</span></td>\n" 697 " </tr>"); 698 } 699 puts("</table>"); 700 } 701 702 if (req->q.equal || sz == 1) { 703 puts("<hr>"); 704 resp_show(req, r[iuse].file); 705 } 706 707 resp_end_html(); 708 } 709 710 static void 711 resp_catman(const struct req *req, const char *file) 712 { 713 FILE *f; 714 char *p; 715 size_t sz; 716 ssize_t len; 717 int i; 718 int italic, bold; 719 720 if ((f = fopen(file, "r")) == NULL) { 721 puts("<p>You specified an invalid manual file.</p>"); 722 return; 723 } 724 725 puts("<div class=\"catman\">\n" 726 "<pre>"); 727 728 p = NULL; 729 sz = 0; 730 731 while ((len = getline(&p, &sz, f)) != -1) { 732 bold = italic = 0; 733 for (i = 0; i < len - 1; i++) { 734 /* 735 * This means that the catpage is out of state. 736 * Ignore it and keep going (although the 737 * catpage is bogus). 738 */ 739 740 if ('\b' == p[i] || '\n' == p[i]) 741 continue; 742 743 /* 744 * Print a regular character. 745 * Close out any bold/italic scopes. 746 * If we're in back-space mode, make sure we'll 747 * have something to enter when we backspace. 748 */ 749 750 if ('\b' != p[i + 1]) { 751 if (italic) 752 printf("</i>"); 753 if (bold) 754 printf("</b>"); 755 italic = bold = 0; 756 html_putchar(p[i]); 757 continue; 758 } else if (i + 2 >= len) 759 continue; 760 761 /* Italic mode. */ 762 763 if ('_' == p[i]) { 764 if (bold) 765 printf("</b>"); 766 if ( ! italic) 767 printf("<i>"); 768 bold = 0; 769 italic = 1; 770 i += 2; 771 html_putchar(p[i]); 772 continue; 773 } 774 775 /* 776 * Handle funny behaviour troff-isms. 777 * These grok'd from the original man2html.c. 778 */ 779 780 if (('+' == p[i] && 'o' == p[i + 2]) || 781 ('o' == p[i] && '+' == p[i + 2]) || 782 ('|' == p[i] && '=' == p[i + 2]) || 783 ('=' == p[i] && '|' == p[i + 2]) || 784 ('*' == p[i] && '=' == p[i + 2]) || 785 ('=' == p[i] && '*' == p[i + 2]) || 786 ('*' == p[i] && '|' == p[i + 2]) || 787 ('|' == p[i] && '*' == p[i + 2])) { 788 if (italic) 789 printf("</i>"); 790 if (bold) 791 printf("</b>"); 792 italic = bold = 0; 793 putchar('*'); 794 i += 2; 795 continue; 796 } else if (('|' == p[i] && '-' == p[i + 2]) || 797 ('-' == p[i] && '|' == p[i + 1]) || 798 ('+' == p[i] && '-' == p[i + 1]) || 799 ('-' == p[i] && '+' == p[i + 1]) || 800 ('+' == p[i] && '|' == p[i + 1]) || 801 ('|' == p[i] && '+' == p[i + 1])) { 802 if (italic) 803 printf("</i>"); 804 if (bold) 805 printf("</b>"); 806 italic = bold = 0; 807 putchar('+'); 808 i += 2; 809 continue; 810 } 811 812 /* Bold mode. */ 813 814 if (italic) 815 printf("</i>"); 816 if ( ! bold) 817 printf("<b>"); 818 bold = 1; 819 italic = 0; 820 i += 2; 821 html_putchar(p[i]); 822 } 823 824 /* 825 * Clean up the last character. 826 * We can get to a newline; don't print that. 827 */ 828 829 if (italic) 830 printf("</i>"); 831 if (bold) 832 printf("</b>"); 833 834 if (i == len - 1 && p[i] != '\n') 835 html_putchar(p[i]); 836 837 putchar('\n'); 838 } 839 free(p); 840 841 puts("</pre>\n" 842 "</div>"); 843 844 fclose(f); 845 } 846 847 static void 848 resp_format(const struct req *req, const char *file) 849 { 850 struct manoutput conf; 851 struct mparse *mp; 852 struct roff_meta *meta; 853 void *vp; 854 int fd; 855 int usepath; 856 857 if (-1 == (fd = open(file, O_RDONLY, 0))) { 858 puts("<p>You specified an invalid manual file.</p>"); 859 return; 860 } 861 862 mchars_alloc(); 863 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 864 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 865 mparse_readfd(mp, fd, file); 866 close(fd); 867 meta = mparse_result(mp); 868 869 memset(&conf, 0, sizeof(conf)); 870 conf.fragment = 1; 871 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 872 conf.toc = 1; 873 usepath = strcmp(req->q.manpath, req->p[0]); 874 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 875 scriptname, *scriptname == '\0' ? "" : "/", 876 usepath ? req->q.manpath : "", usepath ? "/" : ""); 877 878 vp = html_alloc(&conf); 879 if (meta->macroset == MACROSET_MDOC) 880 html_mdoc(vp, meta); 881 else 882 html_man(vp, meta); 883 884 html_free(vp); 885 mparse_free(mp); 886 mchars_free(); 887 free(conf.man); 888 free(conf.style); 889 } 890 891 static void 892 resp_show(const struct req *req, const char *file) 893 { 894 895 if ('.' == file[0] && '/' == file[1]) 896 file += 2; 897 898 if ('c' == *file) 899 resp_catman(req, file); 900 else 901 resp_format(req, file); 902 } 903 904 static void 905 pg_show(struct req *req, const char *fullpath) 906 { 907 char *manpath; 908 const char *file; 909 910 if ((file = strchr(fullpath, '/')) == NULL) { 911 pg_error_badrequest( 912 "You did not specify a page to show."); 913 return; 914 } 915 manpath = mandoc_strndup(fullpath, file - fullpath); 916 file++; 917 918 if ( ! validate_manpath(req, manpath)) { 919 pg_error_badrequest( 920 "You specified an invalid manpath."); 921 free(manpath); 922 return; 923 } 924 925 /* 926 * Begin by chdir()ing into the manpath. 927 * This way we can pick up the database files, which are 928 * relative to the manpath root. 929 */ 930 931 if (chdir(manpath) == -1) { 932 warn("chdir %s", manpath); 933 pg_error_internal(); 934 free(manpath); 935 return; 936 } 937 free(manpath); 938 939 if ( ! validate_filename(file)) { 940 pg_error_badrequest( 941 "You specified an invalid manual file."); 942 return; 943 } 944 945 resp_begin_html(200, NULL, file); 946 resp_searchform(req, FOCUS_NONE); 947 resp_show(req, file); 948 resp_end_html(); 949 } 950 951 static void 952 pg_search(const struct req *req) 953 { 954 struct mansearch search; 955 struct manpaths paths; 956 struct manpage *res; 957 char **argv; 958 char *query, *rp, *wp; 959 size_t ressz; 960 int argc; 961 962 /* 963 * Begin by chdir()ing into the root of the manpath. 964 * This way we can pick up the database files, which are 965 * relative to the manpath root. 966 */ 967 968 if (chdir(req->q.manpath) == -1) { 969 warn("chdir %s", req->q.manpath); 970 pg_error_internal(); 971 return; 972 } 973 974 search.arch = req->q.arch; 975 search.sec = req->q.sec; 976 search.outkey = "Nd"; 977 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 978 search.firstmatch = 1; 979 980 paths.sz = 1; 981 paths.paths = mandoc_malloc(sizeof(char *)); 982 paths.paths[0] = mandoc_strdup("."); 983 984 /* 985 * Break apart at spaces with backslash-escaping. 986 */ 987 988 argc = 0; 989 argv = NULL; 990 rp = query = mandoc_strdup(req->q.query); 991 for (;;) { 992 while (isspace((unsigned char)*rp)) 993 rp++; 994 if (*rp == '\0') 995 break; 996 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 997 argv[argc++] = wp = rp; 998 for (;;) { 999 if (isspace((unsigned char)*rp)) { 1000 *wp = '\0'; 1001 rp++; 1002 break; 1003 } 1004 if (rp[0] == '\\' && rp[1] != '\0') 1005 rp++; 1006 if (wp != rp) 1007 *wp = *rp; 1008 if (*rp == '\0') 1009 break; 1010 wp++; 1011 rp++; 1012 } 1013 } 1014 1015 res = NULL; 1016 ressz = 0; 1017 if (req->isquery && req->q.equal && argc == 1) 1018 pg_redirect(req, argv[0]); 1019 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1020 pg_noresult(req, "You entered an invalid query."); 1021 else if (ressz == 0) 1022 pg_noresult(req, "No results found."); 1023 else 1024 pg_searchres(req, res, ressz); 1025 1026 free(query); 1027 mansearch_free(res, ressz); 1028 free(paths.paths[0]); 1029 free(paths.paths); 1030 } 1031 1032 int 1033 main(void) 1034 { 1035 struct req req; 1036 struct itimerval itimer; 1037 const char *path; 1038 const char *querystring; 1039 int i; 1040 1041 #if HAVE_PLEDGE 1042 /* 1043 * The "rpath" pledge could be revoked after mparse_readfd() 1044 * if the file desciptor to "/footer.html" would be opened 1045 * up front, but it's probably not worth the complication 1046 * of the code it would cause: it would require scattering 1047 * pledge() calls in multiple low-level resp_*() functions. 1048 */ 1049 1050 if (pledge("stdio rpath", NULL) == -1) { 1051 warn("pledge"); 1052 pg_error_internal(); 1053 return EXIT_FAILURE; 1054 } 1055 #endif 1056 1057 /* Poor man's ReDoS mitigation. */ 1058 1059 itimer.it_value.tv_sec = 2; 1060 itimer.it_value.tv_usec = 0; 1061 itimer.it_interval.tv_sec = 2; 1062 itimer.it_interval.tv_usec = 0; 1063 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1064 warn("setitimer"); 1065 pg_error_internal(); 1066 return EXIT_FAILURE; 1067 } 1068 1069 /* 1070 * First we change directory into the MAN_DIR so that 1071 * subsequent scanning for manpath directories is rooted 1072 * relative to the same position. 1073 */ 1074 1075 if (chdir(MAN_DIR) == -1) { 1076 warn("MAN_DIR: %s", MAN_DIR); 1077 pg_error_internal(); 1078 return EXIT_FAILURE; 1079 } 1080 1081 memset(&req, 0, sizeof(struct req)); 1082 req.q.equal = 1; 1083 parse_manpath_conf(&req); 1084 1085 /* Parse the path info and the query string. */ 1086 1087 if ((path = getenv("PATH_INFO")) == NULL) 1088 path = ""; 1089 else if (*path == '/') 1090 path++; 1091 1092 if (*path != '\0') { 1093 parse_path_info(&req, path); 1094 if (req.q.manpath == NULL || req.q.sec == NULL || 1095 *req.q.query == '\0' || access(path, F_OK) == -1) 1096 path = ""; 1097 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1098 parse_query_string(&req, querystring); 1099 1100 /* Validate parsed data and add defaults. */ 1101 1102 if (req.q.manpath == NULL) 1103 req.q.manpath = mandoc_strdup(req.p[0]); 1104 else if ( ! validate_manpath(&req, req.q.manpath)) { 1105 pg_error_badrequest( 1106 "You specified an invalid manpath."); 1107 return EXIT_FAILURE; 1108 } 1109 1110 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1111 pg_error_badrequest( 1112 "You specified an invalid architecture."); 1113 return EXIT_FAILURE; 1114 } 1115 1116 /* Dispatch to the three different pages. */ 1117 1118 if ('\0' != *path) 1119 pg_show(&req, path); 1120 else if (NULL != req.q.query) 1121 pg_search(&req); 1122 else 1123 pg_index(&req); 1124 1125 free(req.q.manpath); 1126 free(req.q.arch); 1127 free(req.q.sec); 1128 free(req.q.query); 1129 for (i = 0; i < (int)req.psz; i++) 1130 free(req.p[i]); 1131 free(req.p); 1132 return EXIT_SUCCESS; 1133 } 1134 1135 /* 1136 * Translate PATH_INFO to a query. 1137 */ 1138 static void 1139 parse_path_info(struct req *req, const char *path) 1140 { 1141 const char *name, *sec, *end; 1142 1143 req->isquery = 0; 1144 req->q.equal = 1; 1145 req->q.manpath = NULL; 1146 req->q.arch = NULL; 1147 1148 /* Mandatory manual page name. */ 1149 if ((name = strrchr(path, '/')) == NULL) 1150 name = path; 1151 else 1152 name++; 1153 1154 /* Optional trailing section. */ 1155 sec = strrchr(name, '.'); 1156 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1157 req->q.query = mandoc_strndup(name, sec - name - 1); 1158 req->q.sec = mandoc_strdup(sec); 1159 } else { 1160 req->q.query = mandoc_strdup(name); 1161 req->q.sec = NULL; 1162 } 1163 1164 /* Handle the case of name[.section] only. */ 1165 if (name == path) 1166 return; 1167 1168 /* Optional manpath. */ 1169 end = strchr(path, '/'); 1170 req->q.manpath = mandoc_strndup(path, end - path); 1171 if (validate_manpath(req, req->q.manpath)) { 1172 path = end + 1; 1173 if (name == path) 1174 return; 1175 } else { 1176 free(req->q.manpath); 1177 req->q.manpath = NULL; 1178 } 1179 1180 /* Optional section. */ 1181 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1182 path += 3; 1183 end = strchr(path, '/'); 1184 free(req->q.sec); 1185 req->q.sec = mandoc_strndup(path, end - path); 1186 path = end + 1; 1187 if (name == path) 1188 return; 1189 } 1190 1191 /* Optional architecture. */ 1192 end = strchr(path, '/'); 1193 if (end + 1 != name) { 1194 pg_error_badrequest( 1195 "You specified too many directory components."); 1196 exit(EXIT_FAILURE); 1197 } 1198 req->q.arch = mandoc_strndup(path, end - path); 1199 if (validate_arch(req->q.arch) == 0) { 1200 pg_error_badrequest( 1201 "You specified an invalid directory component."); 1202 exit(EXIT_FAILURE); 1203 } 1204 } 1205 1206 /* 1207 * Scan for indexable paths. 1208 */ 1209 static void 1210 parse_manpath_conf(struct req *req) 1211 { 1212 FILE *fp; 1213 char *dp; 1214 size_t dpsz; 1215 ssize_t len; 1216 1217 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1218 warn("%s/manpath.conf", MAN_DIR); 1219 pg_error_internal(); 1220 exit(EXIT_FAILURE); 1221 } 1222 1223 dp = NULL; 1224 dpsz = 0; 1225 1226 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1227 if (dp[len - 1] == '\n') 1228 dp[--len] = '\0'; 1229 req->p = mandoc_realloc(req->p, 1230 (req->psz + 1) * sizeof(char *)); 1231 if ( ! validate_urifrag(dp)) { 1232 warnx("%s/manpath.conf contains " 1233 "unsafe path \"%s\"", MAN_DIR, dp); 1234 pg_error_internal(); 1235 exit(EXIT_FAILURE); 1236 } 1237 if (strchr(dp, '/') != NULL) { 1238 warnx("%s/manpath.conf contains " 1239 "path with slash \"%s\"", MAN_DIR, dp); 1240 pg_error_internal(); 1241 exit(EXIT_FAILURE); 1242 } 1243 req->p[req->psz++] = dp; 1244 dp = NULL; 1245 dpsz = 0; 1246 } 1247 free(dp); 1248 1249 if (req->p == NULL) { 1250 warnx("%s/manpath.conf is empty", MAN_DIR); 1251 pg_error_internal(); 1252 exit(EXIT_FAILURE); 1253 } 1254 } 1255