1 /* $Id: cgi.c,v 1.175 2021/08/19 15:23:36 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de> 4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Implementation of the man.cgi(8) program. 19 */ 20 #include "config.h" 21 22 #include <sys/types.h> 23 #include <sys/time.h> 24 25 #include <ctype.h> 26 #if HAVE_ERR 27 #include <err.h> 28 #endif 29 #include <errno.h> 30 #include <fcntl.h> 31 #include <limits.h> 32 #include <stdint.h> 33 #include <stdio.h> 34 #include <stdlib.h> 35 #include <string.h> 36 #include <unistd.h> 37 38 #include "mandoc_aux.h" 39 #include "mandoc.h" 40 #include "roff.h" 41 #include "mdoc.h" 42 #include "man.h" 43 #include "mandoc_parse.h" 44 #include "main.h" 45 #include "manconf.h" 46 #include "mansearch.h" 47 #include "cgi.h" 48 49 /* 50 * A query as passed to the search function. 51 */ 52 struct query { 53 char *manpath; /* desired manual directory */ 54 char *arch; /* architecture */ 55 char *sec; /* manual section */ 56 char *query; /* unparsed query expression */ 57 int equal; /* match whole names, not substrings */ 58 }; 59 60 struct req { 61 struct query q; 62 char **p; /* array of available manpaths */ 63 size_t psz; /* number of available manpaths */ 64 int isquery; /* QUERY_STRING used, not PATH_INFO */ 65 }; 66 67 enum focus { 68 FOCUS_NONE = 0, 69 FOCUS_QUERY 70 }; 71 72 static void html_print(const char *); 73 static void html_putchar(char); 74 static int http_decode(char *); 75 static void http_encode(const char *); 76 static void parse_manpath_conf(struct req *); 77 static void parse_path_info(struct req *, const char *); 78 static void parse_query_string(struct req *, const char *); 79 static void pg_error_badrequest(const char *); 80 static void pg_error_internal(void); 81 static void pg_index(const struct req *); 82 static void pg_noresult(const struct req *, int, const char *, 83 const char *); 84 static void pg_redirect(const struct req *, const char *); 85 static void pg_search(const struct req *); 86 static void pg_searchres(const struct req *, 87 struct manpage *, size_t); 88 static void pg_show(struct req *, const char *); 89 static void resp_begin_html(int, const char *, const char *); 90 static void resp_begin_http(int, const char *); 91 static void resp_catman(const struct req *, const char *); 92 static void resp_copy(const char *); 93 static void resp_end_html(void); 94 static void resp_format(const struct req *, const char *); 95 static void resp_searchform(const struct req *, enum focus); 96 static void resp_show(const struct req *, const char *); 97 static void set_query_attr(char **, char **); 98 static int validate_arch(const char *); 99 static int validate_filename(const char *); 100 static int validate_manpath(const struct req *, const char *); 101 static int validate_urifrag(const char *); 102 103 static const char *scriptname = SCRIPT_NAME; 104 105 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 106 static const char *const sec_numbers[] = { 107 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 108 }; 109 static const char *const sec_names[] = { 110 "All Sections", 111 "1 - General Commands", 112 "2 - System Calls", 113 "3 - Library Functions", 114 "3p - Perl Library", 115 "4 - Device Drivers", 116 "5 - File Formats", 117 "6 - Games", 118 "7 - Miscellaneous Information", 119 "8 - System Manager\'s Manual", 120 "9 - Kernel Developer\'s Manual" 121 }; 122 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 123 124 static const char *const arch_names[] = { 125 "amd64", "alpha", "armv7", "arm64", 126 "hppa", "i386", "landisk", "loongson", 127 "luna88k", "macppc", "mips64", "octeon", 128 "powerpc64", "riscv64", "sparc64", 129 130 "amiga", "arc", "armish", "arm32", 131 "atari", "aviion", "beagle", "cats", 132 "hppa64", "hp300", 133 "ia64", "mac68k", "mvme68k", "mvme88k", 134 "mvmeppc", "palm", "pc532", "pegasos", 135 "pmax", "powerpc", "sgi", "socppc", 136 "solbourne", "sparc", 137 "sun3", "vax", "wgrisc", "x68k", 138 "zaurus" 139 }; 140 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 141 142 /* 143 * Print a character, escaping HTML along the way. 144 * This will pass non-ASCII straight to output: be warned! 145 */ 146 static void 147 html_putchar(char c) 148 { 149 150 switch (c) { 151 case '"': 152 printf("""); 153 break; 154 case '&': 155 printf("&"); 156 break; 157 case '>': 158 printf(">"); 159 break; 160 case '<': 161 printf("<"); 162 break; 163 default: 164 putchar((unsigned char)c); 165 break; 166 } 167 } 168 169 /* 170 * Call through to html_putchar(). 171 * Accepts NULL strings. 172 */ 173 static void 174 html_print(const char *p) 175 { 176 177 if (NULL == p) 178 return; 179 while ('\0' != *p) 180 html_putchar(*p++); 181 } 182 183 /* 184 * Transfer the responsibility for the allocated string *val 185 * to the query structure. 186 */ 187 static void 188 set_query_attr(char **attr, char **val) 189 { 190 191 free(*attr); 192 if (**val == '\0') { 193 *attr = NULL; 194 free(*val); 195 } else 196 *attr = *val; 197 *val = NULL; 198 } 199 200 /* 201 * Parse the QUERY_STRING for key-value pairs 202 * and store the values into the query structure. 203 */ 204 static void 205 parse_query_string(struct req *req, const char *qs) 206 { 207 char *key, *val; 208 size_t keysz, valsz; 209 210 req->isquery = 1; 211 req->q.manpath = NULL; 212 req->q.arch = NULL; 213 req->q.sec = NULL; 214 req->q.query = NULL; 215 req->q.equal = 1; 216 217 key = val = NULL; 218 while (*qs != '\0') { 219 220 /* Parse one key. */ 221 222 keysz = strcspn(qs, "=;&"); 223 key = mandoc_strndup(qs, keysz); 224 qs += keysz; 225 if (*qs != '=') 226 goto next; 227 228 /* Parse one value. */ 229 230 valsz = strcspn(++qs, ";&"); 231 val = mandoc_strndup(qs, valsz); 232 qs += valsz; 233 234 /* Decode and catch encoding errors. */ 235 236 if ( ! (http_decode(key) && http_decode(val))) 237 goto next; 238 239 /* Handle key-value pairs. */ 240 241 if ( ! strcmp(key, "query")) 242 set_query_attr(&req->q.query, &val); 243 244 else if ( ! strcmp(key, "apropos")) 245 req->q.equal = !strcmp(val, "0"); 246 247 else if ( ! strcmp(key, "manpath")) { 248 #ifdef COMPAT_OLDURI 249 if ( ! strncmp(val, "OpenBSD ", 8)) { 250 val[7] = '-'; 251 if ('C' == val[8]) 252 val[8] = 'c'; 253 } 254 #endif 255 set_query_attr(&req->q.manpath, &val); 256 } 257 258 else if ( ! (strcmp(key, "sec") 259 #ifdef COMPAT_OLDURI 260 && strcmp(key, "sektion") 261 #endif 262 )) { 263 if ( ! strcmp(val, "0")) 264 *val = '\0'; 265 set_query_attr(&req->q.sec, &val); 266 } 267 268 else if ( ! strcmp(key, "arch")) { 269 if ( ! strcmp(val, "default")) 270 *val = '\0'; 271 set_query_attr(&req->q.arch, &val); 272 } 273 274 /* 275 * The key must be freed in any case. 276 * The val may have been handed over to the query 277 * structure, in which case it is now NULL. 278 */ 279 next: 280 free(key); 281 key = NULL; 282 free(val); 283 val = NULL; 284 285 if (*qs != '\0') 286 qs++; 287 } 288 } 289 290 /* 291 * HTTP-decode a string. The standard explanation is that this turns 292 * "%4e+foo" into "n foo" in the regular way. This is done in-place 293 * over the allocated string. 294 */ 295 static int 296 http_decode(char *p) 297 { 298 char hex[3]; 299 char *q; 300 int c; 301 302 hex[2] = '\0'; 303 304 q = p; 305 for ( ; '\0' != *p; p++, q++) { 306 if ('%' == *p) { 307 if ('\0' == (hex[0] = *(p + 1))) 308 return 0; 309 if ('\0' == (hex[1] = *(p + 2))) 310 return 0; 311 if (1 != sscanf(hex, "%x", &c)) 312 return 0; 313 if ('\0' == c) 314 return 0; 315 316 *q = (char)c; 317 p += 2; 318 } else 319 *q = '+' == *p ? ' ' : *p; 320 } 321 322 *q = '\0'; 323 return 1; 324 } 325 326 static void 327 http_encode(const char *p) 328 { 329 for (; *p != '\0'; p++) { 330 if (isalnum((unsigned char)*p) == 0 && 331 strchr("-._~", *p) == NULL) 332 printf("%%%2.2X", (unsigned char)*p); 333 else 334 putchar(*p); 335 } 336 } 337 338 static void 339 resp_begin_http(int code, const char *msg) 340 { 341 342 if (200 != code) 343 printf("Status: %d %s\r\n", code, msg); 344 345 printf("Content-Type: text/html; charset=utf-8\r\n" 346 "Cache-Control: no-cache\r\n" 347 "Content-Security-Policy: default-src 'none'; " 348 "style-src 'self' 'unsafe-inline'\r\n" 349 "Pragma: no-cache\r\n" 350 "\r\n"); 351 352 fflush(stdout); 353 } 354 355 static void 356 resp_copy(const char *filename) 357 { 358 char buf[4096]; 359 ssize_t sz; 360 int fd; 361 362 if ((fd = open(filename, O_RDONLY)) != -1) { 363 fflush(stdout); 364 while ((sz = read(fd, buf, sizeof(buf))) > 0) 365 write(STDOUT_FILENO, buf, sz); 366 close(fd); 367 } 368 } 369 370 static void 371 resp_begin_html(int code, const char *msg, const char *file) 372 { 373 const char *name, *sec, *cp; 374 int namesz, secsz; 375 376 resp_begin_http(code, msg); 377 378 printf("<!DOCTYPE html>\n" 379 "<html>\n" 380 "<head>\n" 381 " <meta charset=\"UTF-8\"/>\n" 382 " <meta name=\"viewport\"" 383 " content=\"width=device-width, initial-scale=1.0\">\n" 384 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 385 " type=\"text/css\" media=\"all\">\n" 386 " <title>", 387 CSS_DIR); 388 if (file != NULL) { 389 cp = strrchr(file, '/'); 390 name = cp == NULL ? file : cp + 1; 391 cp = strrchr(name, '.'); 392 namesz = cp == NULL ? strlen(name) : cp - name; 393 sec = NULL; 394 if (cp != NULL && cp[1] != '0') { 395 sec = cp + 1; 396 secsz = strlen(sec); 397 } else if (name - file > 1) { 398 for (cp = name - 2; cp >= file; cp--) { 399 if (*cp < '1' || *cp > '9') 400 continue; 401 sec = cp; 402 secsz = name - cp - 1; 403 break; 404 } 405 } 406 printf("%.*s", namesz, name); 407 if (sec != NULL) 408 printf("(%.*s)", secsz, sec); 409 fputs(" - ", stdout); 410 } 411 printf("%s</title>\n" 412 "</head>\n" 413 "<body>\n", 414 CUSTOMIZE_TITLE); 415 416 resp_copy(MAN_DIR "/header.html"); 417 } 418 419 static void 420 resp_end_html(void) 421 { 422 423 resp_copy(MAN_DIR "/footer.html"); 424 425 puts("</body>\n" 426 "</html>"); 427 } 428 429 static void 430 resp_searchform(const struct req *req, enum focus focus) 431 { 432 int i; 433 434 printf("<form action=\"/%s\" method=\"get\" " 435 "autocomplete=\"off\" autocapitalize=\"none\">\n" 436 " <fieldset>\n" 437 " <legend>Manual Page Search Parameters</legend>\n", 438 scriptname); 439 440 /* Write query input box. */ 441 442 printf(" <input type=\"search\" name=\"query\" value=\""); 443 if (req->q.query != NULL) 444 html_print(req->q.query); 445 printf( "\" size=\"40\""); 446 if (focus == FOCUS_QUERY) 447 printf(" autofocus"); 448 puts(">"); 449 450 /* Write submission buttons. */ 451 452 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 453 "man</button>\n" 454 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 455 "apropos</button>\n" 456 " <br/>\n"); 457 458 /* Write section selector. */ 459 460 puts(" <select name=\"sec\">"); 461 for (i = 0; i < sec_MAX; i++) { 462 printf(" <option value=\"%s\"", sec_numbers[i]); 463 if (NULL != req->q.sec && 464 0 == strcmp(sec_numbers[i], req->q.sec)) 465 printf(" selected=\"selected\""); 466 printf(">%s</option>\n", sec_names[i]); 467 } 468 puts(" </select>"); 469 470 /* Write architecture selector. */ 471 472 printf( " <select name=\"arch\">\n" 473 " <option value=\"default\""); 474 if (NULL == req->q.arch) 475 printf(" selected=\"selected\""); 476 puts(">All Architectures</option>"); 477 for (i = 0; i < arch_MAX; i++) { 478 printf(" <option"); 479 if (NULL != req->q.arch && 480 0 == strcmp(arch_names[i], req->q.arch)) 481 printf(" selected=\"selected\""); 482 printf(">%s</option>\n", arch_names[i]); 483 } 484 puts(" </select>"); 485 486 /* Write manpath selector. */ 487 488 if (req->psz > 1) { 489 puts(" <select name=\"manpath\">"); 490 for (i = 0; i < (int)req->psz; i++) { 491 printf(" <option"); 492 if (strcmp(req->q.manpath, req->p[i]) == 0) 493 printf(" selected=\"selected\""); 494 printf(">"); 495 html_print(req->p[i]); 496 puts("</option>"); 497 } 498 puts(" </select>"); 499 } 500 501 puts(" </fieldset>\n" 502 "</form>"); 503 } 504 505 static int 506 validate_urifrag(const char *frag) 507 { 508 509 while ('\0' != *frag) { 510 if ( ! (isalnum((unsigned char)*frag) || 511 '-' == *frag || '.' == *frag || 512 '/' == *frag || '_' == *frag)) 513 return 0; 514 frag++; 515 } 516 return 1; 517 } 518 519 static int 520 validate_manpath(const struct req *req, const char* manpath) 521 { 522 size_t i; 523 524 for (i = 0; i < req->psz; i++) 525 if ( ! strcmp(manpath, req->p[i])) 526 return 1; 527 528 return 0; 529 } 530 531 static int 532 validate_arch(const char *arch) 533 { 534 int i; 535 536 for (i = 0; i < arch_MAX; i++) 537 if (strcmp(arch, arch_names[i]) == 0) 538 return 1; 539 540 return 0; 541 } 542 543 static int 544 validate_filename(const char *file) 545 { 546 547 if ('.' == file[0] && '/' == file[1]) 548 file += 2; 549 550 return ! (strstr(file, "../") || strstr(file, "/..") || 551 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 552 } 553 554 static void 555 pg_index(const struct req *req) 556 { 557 558 resp_begin_html(200, NULL, NULL); 559 resp_searchform(req, FOCUS_QUERY); 560 printf("<p>\n" 561 "This web interface is documented in the\n" 562 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n" 563 "manual, and the\n" 564 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n" 565 "manual explains the query syntax.\n" 566 "</p>\n", 567 scriptname, *scriptname == '\0' ? "" : "/", 568 scriptname, *scriptname == '\0' ? "" : "/"); 569 resp_end_html(); 570 } 571 572 static void 573 pg_noresult(const struct req *req, int code, const char *http_msg, 574 const char *user_msg) 575 { 576 resp_begin_html(code, http_msg, NULL); 577 resp_searchform(req, FOCUS_QUERY); 578 puts("<p>"); 579 puts(user_msg); 580 puts("</p>"); 581 resp_end_html(); 582 } 583 584 static void 585 pg_error_badrequest(const char *msg) 586 { 587 588 resp_begin_html(400, "Bad Request", NULL); 589 puts("<h1>Bad Request</h1>\n" 590 "<p>\n"); 591 puts(msg); 592 printf("Try again from the\n" 593 "<a href=\"/%s\">main page</a>.\n" 594 "</p>", scriptname); 595 resp_end_html(); 596 } 597 598 static void 599 pg_error_internal(void) 600 { 601 resp_begin_html(500, "Internal Server Error", NULL); 602 puts("<p>Internal Server Error</p>"); 603 resp_end_html(); 604 } 605 606 static void 607 pg_redirect(const struct req *req, const char *name) 608 { 609 printf("Status: 303 See Other\r\n" 610 "Location: /"); 611 if (*scriptname != '\0') 612 printf("%s/", scriptname); 613 if (strcmp(req->q.manpath, req->p[0])) 614 printf("%s/", req->q.manpath); 615 if (req->q.arch != NULL) 616 printf("%s/", req->q.arch); 617 http_encode(name); 618 if (req->q.sec != NULL) { 619 putchar('.'); 620 http_encode(req->q.sec); 621 } 622 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 623 } 624 625 static void 626 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 627 { 628 char *arch, *archend; 629 const char *sec; 630 size_t i, iuse; 631 int archprio, archpriouse; 632 int prio, priouse; 633 634 for (i = 0; i < sz; i++) { 635 if (validate_filename(r[i].file)) 636 continue; 637 warnx("invalid filename %s in %s database", 638 r[i].file, req->q.manpath); 639 pg_error_internal(); 640 return; 641 } 642 643 if (req->isquery && sz == 1) { 644 /* 645 * If we have just one result, then jump there now 646 * without any delay. 647 */ 648 printf("Status: 303 See Other\r\n" 649 "Location: /"); 650 if (*scriptname != '\0') 651 printf("%s/", scriptname); 652 if (strcmp(req->q.manpath, req->p[0])) 653 printf("%s/", req->q.manpath); 654 printf("%s\r\n" 655 "Content-Type: text/html; charset=utf-8\r\n\r\n", 656 r[0].file); 657 return; 658 } 659 660 /* 661 * In man(1) mode, show one of the pages 662 * even if more than one is found. 663 */ 664 665 iuse = 0; 666 if (req->q.equal || sz == 1) { 667 priouse = 20; 668 archpriouse = 3; 669 for (i = 0; i < sz; i++) { 670 sec = r[i].file; 671 sec += strcspn(sec, "123456789"); 672 if (sec[0] == '\0') 673 continue; 674 prio = sec_prios[sec[0] - '1']; 675 if (sec[1] != '/') 676 prio += 10; 677 if (req->q.arch == NULL) { 678 archprio = 679 ((arch = strchr(sec + 1, '/')) 680 == NULL) ? 3 : 681 ((archend = strchr(arch + 1, '/')) 682 == NULL) ? 0 : 683 strncmp(arch, "amd64/", 684 archend - arch) ? 2 : 1; 685 if (archprio < archpriouse) { 686 archpriouse = archprio; 687 priouse = prio; 688 iuse = i; 689 continue; 690 } 691 if (archprio > archpriouse) 692 continue; 693 } 694 if (prio >= priouse) 695 continue; 696 priouse = prio; 697 iuse = i; 698 } 699 resp_begin_html(200, NULL, r[iuse].file); 700 } else 701 resp_begin_html(200, NULL, NULL); 702 703 resp_searchform(req, 704 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 705 706 if (sz > 1) { 707 puts("<table class=\"results\">"); 708 for (i = 0; i < sz; i++) { 709 printf(" <tr>\n" 710 " <td>" 711 "<a class=\"Xr\" href=\"/"); 712 if (*scriptname != '\0') 713 printf("%s/", scriptname); 714 if (strcmp(req->q.manpath, req->p[0])) 715 printf("%s/", req->q.manpath); 716 printf("%s\">", r[i].file); 717 html_print(r[i].names); 718 printf("</a></td>\n" 719 " <td><span class=\"Nd\">"); 720 html_print(r[i].output); 721 puts("</span></td>\n" 722 " </tr>"); 723 } 724 puts("</table>"); 725 } 726 727 if (req->q.equal || sz == 1) { 728 puts("<hr>"); 729 resp_show(req, r[iuse].file); 730 } 731 732 resp_end_html(); 733 } 734 735 static void 736 resp_catman(const struct req *req, const char *file) 737 { 738 FILE *f; 739 char *p; 740 size_t sz; 741 ssize_t len; 742 int i; 743 int italic, bold; 744 745 if ((f = fopen(file, "r")) == NULL) { 746 puts("<p>You specified an invalid manual file.</p>"); 747 return; 748 } 749 750 puts("<div class=\"catman\">\n" 751 "<pre>"); 752 753 p = NULL; 754 sz = 0; 755 756 while ((len = getline(&p, &sz, f)) != -1) { 757 bold = italic = 0; 758 for (i = 0; i < len - 1; i++) { 759 /* 760 * This means that the catpage is out of state. 761 * Ignore it and keep going (although the 762 * catpage is bogus). 763 */ 764 765 if ('\b' == p[i] || '\n' == p[i]) 766 continue; 767 768 /* 769 * Print a regular character. 770 * Close out any bold/italic scopes. 771 * If we're in back-space mode, make sure we'll 772 * have something to enter when we backspace. 773 */ 774 775 if ('\b' != p[i + 1]) { 776 if (italic) 777 printf("</i>"); 778 if (bold) 779 printf("</b>"); 780 italic = bold = 0; 781 html_putchar(p[i]); 782 continue; 783 } else if (i + 2 >= len) 784 continue; 785 786 /* Italic mode. */ 787 788 if ('_' == p[i]) { 789 if (bold) 790 printf("</b>"); 791 if ( ! italic) 792 printf("<i>"); 793 bold = 0; 794 italic = 1; 795 i += 2; 796 html_putchar(p[i]); 797 continue; 798 } 799 800 /* 801 * Handle funny behaviour troff-isms. 802 * These grok'd from the original man2html.c. 803 */ 804 805 if (('+' == p[i] && 'o' == p[i + 2]) || 806 ('o' == p[i] && '+' == p[i + 2]) || 807 ('|' == p[i] && '=' == p[i + 2]) || 808 ('=' == p[i] && '|' == p[i + 2]) || 809 ('*' == p[i] && '=' == p[i + 2]) || 810 ('=' == p[i] && '*' == p[i + 2]) || 811 ('*' == p[i] && '|' == p[i + 2]) || 812 ('|' == p[i] && '*' == p[i + 2])) { 813 if (italic) 814 printf("</i>"); 815 if (bold) 816 printf("</b>"); 817 italic = bold = 0; 818 putchar('*'); 819 i += 2; 820 continue; 821 } else if (('|' == p[i] && '-' == p[i + 2]) || 822 ('-' == p[i] && '|' == p[i + 1]) || 823 ('+' == p[i] && '-' == p[i + 1]) || 824 ('-' == p[i] && '+' == p[i + 1]) || 825 ('+' == p[i] && '|' == p[i + 1]) || 826 ('|' == p[i] && '+' == p[i + 1])) { 827 if (italic) 828 printf("</i>"); 829 if (bold) 830 printf("</b>"); 831 italic = bold = 0; 832 putchar('+'); 833 i += 2; 834 continue; 835 } 836 837 /* Bold mode. */ 838 839 if (italic) 840 printf("</i>"); 841 if ( ! bold) 842 printf("<b>"); 843 bold = 1; 844 italic = 0; 845 i += 2; 846 html_putchar(p[i]); 847 } 848 849 /* 850 * Clean up the last character. 851 * We can get to a newline; don't print that. 852 */ 853 854 if (italic) 855 printf("</i>"); 856 if (bold) 857 printf("</b>"); 858 859 if (i == len - 1 && p[i] != '\n') 860 html_putchar(p[i]); 861 862 putchar('\n'); 863 } 864 free(p); 865 866 puts("</pre>\n" 867 "</div>"); 868 869 fclose(f); 870 } 871 872 static void 873 resp_format(const struct req *req, const char *file) 874 { 875 struct manoutput conf; 876 struct mparse *mp; 877 struct roff_meta *meta; 878 void *vp; 879 int fd; 880 int usepath; 881 882 if (-1 == (fd = open(file, O_RDONLY, 0))) { 883 puts("<p>You specified an invalid manual file.</p>"); 884 return; 885 } 886 887 mchars_alloc(); 888 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 889 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 890 mparse_readfd(mp, fd, file); 891 close(fd); 892 meta = mparse_result(mp); 893 894 memset(&conf, 0, sizeof(conf)); 895 conf.fragment = 1; 896 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 897 usepath = strcmp(req->q.manpath, req->p[0]); 898 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 899 scriptname, *scriptname == '\0' ? "" : "/", 900 usepath ? req->q.manpath : "", usepath ? "/" : ""); 901 902 vp = html_alloc(&conf); 903 if (meta->macroset == MACROSET_MDOC) 904 html_mdoc(vp, meta); 905 else 906 html_man(vp, meta); 907 908 html_free(vp); 909 mparse_free(mp); 910 mchars_free(); 911 free(conf.man); 912 free(conf.style); 913 } 914 915 static void 916 resp_show(const struct req *req, const char *file) 917 { 918 919 if ('.' == file[0] && '/' == file[1]) 920 file += 2; 921 922 if ('c' == *file) 923 resp_catman(req, file); 924 else 925 resp_format(req, file); 926 } 927 928 static void 929 pg_show(struct req *req, const char *fullpath) 930 { 931 char *manpath; 932 const char *file; 933 934 if ((file = strchr(fullpath, '/')) == NULL) { 935 pg_error_badrequest( 936 "You did not specify a page to show."); 937 return; 938 } 939 manpath = mandoc_strndup(fullpath, file - fullpath); 940 file++; 941 942 if ( ! validate_manpath(req, manpath)) { 943 pg_error_badrequest( 944 "You specified an invalid manpath."); 945 free(manpath); 946 return; 947 } 948 949 /* 950 * Begin by chdir()ing into the manpath. 951 * This way we can pick up the database files, which are 952 * relative to the manpath root. 953 */ 954 955 if (chdir(manpath) == -1) { 956 warn("chdir %s", manpath); 957 pg_error_internal(); 958 free(manpath); 959 return; 960 } 961 free(manpath); 962 963 if ( ! validate_filename(file)) { 964 pg_error_badrequest( 965 "You specified an invalid manual file."); 966 return; 967 } 968 969 resp_begin_html(200, NULL, file); 970 resp_searchform(req, FOCUS_NONE); 971 resp_show(req, file); 972 resp_end_html(); 973 } 974 975 static void 976 pg_search(const struct req *req) 977 { 978 struct mansearch search; 979 struct manpaths paths; 980 struct manpage *res; 981 char **argv; 982 char *query, *rp, *wp; 983 size_t ressz; 984 int argc; 985 986 /* 987 * Begin by chdir()ing into the root of the manpath. 988 * This way we can pick up the database files, which are 989 * relative to the manpath root. 990 */ 991 992 if (chdir(req->q.manpath) == -1) { 993 warn("chdir %s", req->q.manpath); 994 pg_error_internal(); 995 return; 996 } 997 998 search.arch = req->q.arch; 999 search.sec = req->q.sec; 1000 search.outkey = "Nd"; 1001 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 1002 search.firstmatch = 1; 1003 1004 paths.sz = 1; 1005 paths.paths = mandoc_malloc(sizeof(char *)); 1006 paths.paths[0] = mandoc_strdup("."); 1007 1008 /* 1009 * Break apart at spaces with backslash-escaping. 1010 */ 1011 1012 argc = 0; 1013 argv = NULL; 1014 rp = query = mandoc_strdup(req->q.query); 1015 for (;;) { 1016 while (isspace((unsigned char)*rp)) 1017 rp++; 1018 if (*rp == '\0') 1019 break; 1020 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 1021 argv[argc++] = wp = rp; 1022 for (;;) { 1023 if (isspace((unsigned char)*rp)) { 1024 *wp = '\0'; 1025 rp++; 1026 break; 1027 } 1028 if (rp[0] == '\\' && rp[1] != '\0') 1029 rp++; 1030 if (wp != rp) 1031 *wp = *rp; 1032 if (*rp == '\0') 1033 break; 1034 wp++; 1035 rp++; 1036 } 1037 } 1038 1039 res = NULL; 1040 ressz = 0; 1041 if (req->isquery && req->q.equal && argc == 1) 1042 pg_redirect(req, argv[0]); 1043 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1044 pg_noresult(req, 400, "Bad Request", 1045 "You entered an invalid query."); 1046 else if (ressz == 0) 1047 pg_noresult(req, 404, "Not Found", "No results found."); 1048 else 1049 pg_searchres(req, res, ressz); 1050 1051 free(query); 1052 mansearch_free(res, ressz); 1053 free(paths.paths[0]); 1054 free(paths.paths); 1055 } 1056 1057 int 1058 main(void) 1059 { 1060 struct req req; 1061 struct itimerval itimer; 1062 const char *path; 1063 const char *querystring; 1064 int i; 1065 1066 #if HAVE_PLEDGE 1067 /* 1068 * The "rpath" pledge could be revoked after mparse_readfd() 1069 * if the file desciptor to "/footer.html" would be opened 1070 * up front, but it's probably not worth the complication 1071 * of the code it would cause: it would require scattering 1072 * pledge() calls in multiple low-level resp_*() functions. 1073 */ 1074 1075 if (pledge("stdio rpath", NULL) == -1) { 1076 warn("pledge"); 1077 pg_error_internal(); 1078 return EXIT_FAILURE; 1079 } 1080 #endif 1081 1082 /* Poor man's ReDoS mitigation. */ 1083 1084 itimer.it_value.tv_sec = 2; 1085 itimer.it_value.tv_usec = 0; 1086 itimer.it_interval.tv_sec = 2; 1087 itimer.it_interval.tv_usec = 0; 1088 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1089 warn("setitimer"); 1090 pg_error_internal(); 1091 return EXIT_FAILURE; 1092 } 1093 1094 /* 1095 * First we change directory into the MAN_DIR so that 1096 * subsequent scanning for manpath directories is rooted 1097 * relative to the same position. 1098 */ 1099 1100 if (chdir(MAN_DIR) == -1) { 1101 warn("MAN_DIR: %s", MAN_DIR); 1102 pg_error_internal(); 1103 return EXIT_FAILURE; 1104 } 1105 1106 memset(&req, 0, sizeof(struct req)); 1107 req.q.equal = 1; 1108 parse_manpath_conf(&req); 1109 1110 /* Parse the path info and the query string. */ 1111 1112 if ((path = getenv("PATH_INFO")) == NULL) 1113 path = ""; 1114 else if (*path == '/') 1115 path++; 1116 1117 if (*path != '\0') { 1118 parse_path_info(&req, path); 1119 if (req.q.manpath == NULL || req.q.sec == NULL || 1120 *req.q.query == '\0' || access(path, F_OK) == -1) 1121 path = ""; 1122 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1123 parse_query_string(&req, querystring); 1124 1125 /* Validate parsed data and add defaults. */ 1126 1127 if (req.q.manpath == NULL) 1128 req.q.manpath = mandoc_strdup(req.p[0]); 1129 else if ( ! validate_manpath(&req, req.q.manpath)) { 1130 pg_error_badrequest( 1131 "You specified an invalid manpath."); 1132 return EXIT_FAILURE; 1133 } 1134 1135 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1136 pg_error_badrequest( 1137 "You specified an invalid architecture."); 1138 return EXIT_FAILURE; 1139 } 1140 1141 /* Dispatch to the three different pages. */ 1142 1143 if ('\0' != *path) 1144 pg_show(&req, path); 1145 else if (NULL != req.q.query) 1146 pg_search(&req); 1147 else 1148 pg_index(&req); 1149 1150 free(req.q.manpath); 1151 free(req.q.arch); 1152 free(req.q.sec); 1153 free(req.q.query); 1154 for (i = 0; i < (int)req.psz; i++) 1155 free(req.p[i]); 1156 free(req.p); 1157 return EXIT_SUCCESS; 1158 } 1159 1160 /* 1161 * Translate PATH_INFO to a query. 1162 */ 1163 static void 1164 parse_path_info(struct req *req, const char *path) 1165 { 1166 const char *name, *sec, *end; 1167 1168 req->isquery = 0; 1169 req->q.equal = 1; 1170 req->q.manpath = NULL; 1171 req->q.arch = NULL; 1172 1173 /* Mandatory manual page name. */ 1174 if ((name = strrchr(path, '/')) == NULL) 1175 name = path; 1176 else 1177 name++; 1178 1179 /* Optional trailing section. */ 1180 sec = strrchr(name, '.'); 1181 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1182 req->q.query = mandoc_strndup(name, sec - name - 1); 1183 req->q.sec = mandoc_strdup(sec); 1184 } else { 1185 req->q.query = mandoc_strdup(name); 1186 req->q.sec = NULL; 1187 } 1188 1189 /* Handle the case of name[.section] only. */ 1190 if (name == path) 1191 return; 1192 1193 /* Optional manpath. */ 1194 end = strchr(path, '/'); 1195 req->q.manpath = mandoc_strndup(path, end - path); 1196 if (validate_manpath(req, req->q.manpath)) { 1197 path = end + 1; 1198 if (name == path) 1199 return; 1200 } else { 1201 free(req->q.manpath); 1202 req->q.manpath = NULL; 1203 } 1204 1205 /* Optional section. */ 1206 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1207 path += 3; 1208 end = strchr(path, '/'); 1209 free(req->q.sec); 1210 req->q.sec = mandoc_strndup(path, end - path); 1211 path = end + 1; 1212 if (name == path) 1213 return; 1214 } 1215 1216 /* Optional architecture. */ 1217 end = strchr(path, '/'); 1218 if (end + 1 != name) { 1219 pg_error_badrequest( 1220 "You specified too many directory components."); 1221 exit(EXIT_FAILURE); 1222 } 1223 req->q.arch = mandoc_strndup(path, end - path); 1224 if (validate_arch(req->q.arch) == 0) { 1225 pg_error_badrequest( 1226 "You specified an invalid directory component."); 1227 exit(EXIT_FAILURE); 1228 } 1229 } 1230 1231 /* 1232 * Scan for indexable paths. 1233 */ 1234 static void 1235 parse_manpath_conf(struct req *req) 1236 { 1237 FILE *fp; 1238 char *dp; 1239 size_t dpsz; 1240 ssize_t len; 1241 1242 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1243 warn("%s/manpath.conf", MAN_DIR); 1244 pg_error_internal(); 1245 exit(EXIT_FAILURE); 1246 } 1247 1248 dp = NULL; 1249 dpsz = 0; 1250 1251 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1252 if (dp[len - 1] == '\n') 1253 dp[--len] = '\0'; 1254 req->p = mandoc_realloc(req->p, 1255 (req->psz + 1) * sizeof(char *)); 1256 if ( ! validate_urifrag(dp)) { 1257 warnx("%s/manpath.conf contains " 1258 "unsafe path \"%s\"", MAN_DIR, dp); 1259 pg_error_internal(); 1260 exit(EXIT_FAILURE); 1261 } 1262 if (strchr(dp, '/') != NULL) { 1263 warnx("%s/manpath.conf contains " 1264 "path with slash \"%s\"", MAN_DIR, dp); 1265 pg_error_internal(); 1266 exit(EXIT_FAILURE); 1267 } 1268 req->p[req->psz++] = dp; 1269 dp = NULL; 1270 dpsz = 0; 1271 } 1272 free(dp); 1273 1274 if (req->p == NULL) { 1275 warnx("%s/manpath.conf is empty", MAN_DIR); 1276 pg_error_internal(); 1277 exit(EXIT_FAILURE); 1278 } 1279 } 1280