1 /* $Id: cgi.c,v 1.181 2023/04/28 19:11:03 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de> 4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv> 5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 * 19 * Implementation of the man.cgi(8) program. 20 */ 21 #include "config.h" 22 23 #include <sys/types.h> 24 #include <sys/time.h> 25 26 #include <ctype.h> 27 #if HAVE_ERR 28 #include <err.h> 29 #endif 30 #include <errno.h> 31 #include <fcntl.h> 32 #include <limits.h> 33 #include <stdint.h> 34 #include <stdio.h> 35 #include <stdlib.h> 36 #include <string.h> 37 #include <unistd.h> 38 39 #include "mandoc_aux.h" 40 #include "mandoc.h" 41 #include "roff.h" 42 #include "mdoc.h" 43 #include "man.h" 44 #include "mandoc_parse.h" 45 #include "main.h" 46 #include "manconf.h" 47 #include "mansearch.h" 48 #include "cgi.h" 49 50 /* 51 * A query as passed to the search function. 52 */ 53 struct query { 54 char *manpath; /* desired manual directory */ 55 char *arch; /* architecture */ 56 char *sec; /* manual section */ 57 char *query; /* unparsed query expression */ 58 int equal; /* match whole names, not substrings */ 59 }; 60 61 struct req { 62 struct query q; 63 char **p; /* array of available manpaths */ 64 size_t psz; /* number of available manpaths */ 65 int isquery; /* QUERY_STRING used, not PATH_INFO */ 66 }; 67 68 enum focus { 69 FOCUS_NONE = 0, 70 FOCUS_QUERY 71 }; 72 73 static void html_print(const char *); 74 static void html_putchar(char); 75 static int http_decode(char *); 76 static void http_encode(const char *); 77 static void parse_manpath_conf(struct req *); 78 static void parse_path_info(struct req *, const char *); 79 static void parse_query_string(struct req *, const char *); 80 static void pg_error_badrequest(const char *); 81 static void pg_error_internal(void); 82 static void pg_index(const struct req *); 83 static void pg_noresult(const struct req *, int, const char *, 84 const char *); 85 static void pg_redirect(const struct req *, const char *); 86 static void pg_search(const struct req *); 87 static void pg_searchres(const struct req *, 88 struct manpage *, size_t); 89 static void pg_show(struct req *, const char *); 90 static int resp_begin_html(int, const char *, const char *); 91 static void resp_begin_http(int, const char *); 92 static void resp_catman(const struct req *, const char *); 93 static int resp_copy(const char *, const char *); 94 static void resp_end_html(void); 95 static void resp_format(const struct req *, const char *); 96 static void resp_searchform(const struct req *, enum focus); 97 static void resp_show(const struct req *, const char *); 98 static void set_query_attr(char **, char **); 99 static int validate_arch(const char *); 100 static int validate_filename(const char *); 101 static int validate_manpath(const struct req *, const char *); 102 static int validate_urifrag(const char *); 103 104 static const char *scriptname = SCRIPT_NAME; 105 106 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9}; 107 static const char *const sec_numbers[] = { 108 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9" 109 }; 110 static const char *const sec_names[] = { 111 "All Sections", 112 "1 - General Commands", 113 "2 - System Calls", 114 "3 - Library Functions", 115 "3p - Perl Library", 116 "4 - Device Drivers", 117 "5 - File Formats", 118 "6 - Games", 119 "7 - Miscellaneous Information", 120 "8 - System Manager\'s Manual", 121 "9 - Kernel Developer\'s Manual" 122 }; 123 static const int sec_MAX = sizeof(sec_names) / sizeof(char *); 124 125 static const char *const arch_names[] = { 126 "amd64", "alpha", "armv7", "arm64", 127 "hppa", "i386", "landisk", "loongson", 128 "luna88k", "macppc", "mips64", "octeon", 129 "powerpc64", "riscv64", "sparc64", 130 131 "amiga", "arc", "armish", "arm32", 132 "atari", "aviion", "beagle", "cats", 133 "hppa64", "hp300", 134 "ia64", "mac68k", "mvme68k", "mvme88k", 135 "mvmeppc", "palm", "pc532", "pegasos", 136 "pmax", "powerpc", "sgi", "socppc", 137 "solbourne", "sparc", 138 "sun3", "vax", "wgrisc", "x68k", 139 "zaurus" 140 }; 141 static const int arch_MAX = sizeof(arch_names) / sizeof(char *); 142 143 /* 144 * Print a character, escaping HTML along the way. 145 * This will pass non-ASCII straight to output: be warned! 146 */ 147 static void 148 html_putchar(char c) 149 { 150 151 switch (c) { 152 case '"': 153 printf("""); 154 break; 155 case '&': 156 printf("&"); 157 break; 158 case '>': 159 printf(">"); 160 break; 161 case '<': 162 printf("<"); 163 break; 164 default: 165 putchar((unsigned char)c); 166 break; 167 } 168 } 169 170 /* 171 * Call through to html_putchar(). 172 * Accepts NULL strings. 173 */ 174 static void 175 html_print(const char *p) 176 { 177 178 if (NULL == p) 179 return; 180 while ('\0' != *p) 181 html_putchar(*p++); 182 } 183 184 /* 185 * Transfer the responsibility for the allocated string *val 186 * to the query structure. 187 */ 188 static void 189 set_query_attr(char **attr, char **val) 190 { 191 192 free(*attr); 193 if (**val == '\0') { 194 *attr = NULL; 195 free(*val); 196 } else 197 *attr = *val; 198 *val = NULL; 199 } 200 201 /* 202 * Parse the QUERY_STRING for key-value pairs 203 * and store the values into the query structure. 204 */ 205 static void 206 parse_query_string(struct req *req, const char *qs) 207 { 208 char *key, *val; 209 size_t keysz, valsz; 210 211 req->isquery = 1; 212 req->q.manpath = NULL; 213 req->q.arch = NULL; 214 req->q.sec = NULL; 215 req->q.query = NULL; 216 req->q.equal = 1; 217 218 key = val = NULL; 219 while (*qs != '\0') { 220 221 /* Parse one key. */ 222 223 keysz = strcspn(qs, "=;&"); 224 key = mandoc_strndup(qs, keysz); 225 qs += keysz; 226 if (*qs != '=') 227 goto next; 228 229 /* Parse one value. */ 230 231 valsz = strcspn(++qs, ";&"); 232 val = mandoc_strndup(qs, valsz); 233 qs += valsz; 234 235 /* Decode and catch encoding errors. */ 236 237 if ( ! (http_decode(key) && http_decode(val))) 238 goto next; 239 240 /* Handle key-value pairs. */ 241 242 if ( ! strcmp(key, "query")) 243 set_query_attr(&req->q.query, &val); 244 245 else if ( ! strcmp(key, "apropos")) 246 req->q.equal = !strcmp(val, "0"); 247 248 else if ( ! strcmp(key, "manpath")) { 249 #ifdef COMPAT_OLDURI 250 if ( ! strncmp(val, "OpenBSD ", 8)) { 251 val[7] = '-'; 252 if ('C' == val[8]) 253 val[8] = 'c'; 254 } 255 #endif 256 set_query_attr(&req->q.manpath, &val); 257 } 258 259 else if ( ! (strcmp(key, "sec") 260 #ifdef COMPAT_OLDURI 261 && strcmp(key, "sektion") 262 #endif 263 )) { 264 if ( ! strcmp(val, "0")) 265 *val = '\0'; 266 set_query_attr(&req->q.sec, &val); 267 } 268 269 else if ( ! strcmp(key, "arch")) { 270 if ( ! strcmp(val, "default")) 271 *val = '\0'; 272 set_query_attr(&req->q.arch, &val); 273 } 274 275 /* 276 * The key must be freed in any case. 277 * The val may have been handed over to the query 278 * structure, in which case it is now NULL. 279 */ 280 next: 281 free(key); 282 key = NULL; 283 free(val); 284 val = NULL; 285 286 if (*qs != '\0') 287 qs++; 288 } 289 } 290 291 /* 292 * HTTP-decode a string. The standard explanation is that this turns 293 * "%4e+foo" into "n foo" in the regular way. This is done in-place 294 * over the allocated string. 295 */ 296 static int 297 http_decode(char *p) 298 { 299 char hex[3]; 300 char *q; 301 int c; 302 303 hex[2] = '\0'; 304 305 q = p; 306 for ( ; '\0' != *p; p++, q++) { 307 if ('%' == *p) { 308 if ('\0' == (hex[0] = *(p + 1))) 309 return 0; 310 if ('\0' == (hex[1] = *(p + 2))) 311 return 0; 312 if (1 != sscanf(hex, "%x", &c)) 313 return 0; 314 if ('\0' == c) 315 return 0; 316 317 *q = (char)c; 318 p += 2; 319 } else 320 *q = '+' == *p ? ' ' : *p; 321 } 322 323 *q = '\0'; 324 return 1; 325 } 326 327 static void 328 http_encode(const char *p) 329 { 330 for (; *p != '\0'; p++) { 331 if (isalnum((unsigned char)*p) == 0 && 332 strchr("-._~", *p) == NULL) 333 printf("%%%2.2X", (unsigned char)*p); 334 else 335 putchar(*p); 336 } 337 } 338 339 static void 340 resp_begin_http(int code, const char *msg) 341 { 342 343 if (200 != code) 344 printf("Status: %d %s\r\n", code, msg); 345 346 printf("Content-Type: text/html; charset=utf-8\r\n" 347 "Cache-Control: no-cache\r\n" 348 "Content-Security-Policy: default-src 'none'; " 349 "style-src 'self' 'unsafe-inline'\r\n" 350 "Pragma: no-cache\r\n" 351 "\r\n"); 352 353 fflush(stdout); 354 } 355 356 static int 357 resp_copy(const char *element, const char *filename) 358 { 359 char buf[4096]; 360 ssize_t sz; 361 int fd; 362 363 if ((fd = open(filename, O_RDONLY)) == -1) 364 return 0; 365 366 if (element != NULL) 367 printf("<%s>\n", element); 368 fflush(stdout); 369 while ((sz = read(fd, buf, sizeof(buf))) > 0) 370 write(STDOUT_FILENO, buf, sz); 371 close(fd); 372 return 1; 373 } 374 375 static int 376 resp_begin_html(int code, const char *msg, const char *file) 377 { 378 const char *name, *sec, *cp; 379 int namesz, secsz; 380 381 resp_begin_http(code, msg); 382 383 printf("<!DOCTYPE html>\n" 384 "<html>\n" 385 "<head>\n" 386 " <meta charset=\"UTF-8\"/>\n" 387 " <meta name=\"viewport\"" 388 " content=\"width=device-width, initial-scale=1.0\">\n" 389 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\"" 390 " type=\"text/css\" media=\"all\">\n" 391 " <title>", 392 CSS_DIR); 393 if (file != NULL) { 394 cp = strrchr(file, '/'); 395 name = cp == NULL ? file : cp + 1; 396 cp = strrchr(name, '.'); 397 namesz = cp == NULL ? strlen(name) : cp - name; 398 sec = NULL; 399 if (cp != NULL && cp[1] != '0') { 400 sec = cp + 1; 401 secsz = strlen(sec); 402 } else if (name - file > 1) { 403 for (cp = name - 2; cp >= file; cp--) { 404 if (*cp < '1' || *cp > '9') 405 continue; 406 sec = cp; 407 secsz = name - cp - 1; 408 break; 409 } 410 } 411 printf("%.*s", namesz, name); 412 if (sec != NULL) 413 printf("(%.*s)", secsz, sec); 414 fputs(" - ", stdout); 415 } 416 printf("%s</title>\n" 417 "</head>\n" 418 "<body>\n", 419 CUSTOMIZE_TITLE); 420 421 return resp_copy("header", MAN_DIR "/header.html"); 422 } 423 424 static void 425 resp_end_html(void) 426 { 427 if (resp_copy("footer", MAN_DIR "/footer.html")) 428 puts("</footer>"); 429 430 puts("</body>\n" 431 "</html>"); 432 } 433 434 static void 435 resp_searchform(const struct req *req, enum focus focus) 436 { 437 int i; 438 439 printf("<form role=\"search\" action=\"/%s\" method=\"get\" " 440 "autocomplete=\"off\" autocapitalize=\"none\">\n" 441 " <fieldset>\n" 442 " <legend>Manual Page Search Parameters</legend>\n", 443 scriptname); 444 445 /* Write query input box. */ 446 447 printf(" <label>Search query:\n" 448 " <input type=\"search\" name=\"query\" value=\""); 449 if (req->q.query != NULL) 450 html_print(req->q.query); 451 printf("\" size=\"40\""); 452 if (focus == FOCUS_QUERY) 453 printf(" autofocus"); 454 puts(">\n </label>"); 455 456 /* Write submission buttons. */ 457 458 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">" 459 "man</button>\n" 460 " <button type=\"submit\" name=\"apropos\" value=\"1\">" 461 "apropos</button>\n" 462 " <br/>\n"); 463 464 /* Write section selector. */ 465 466 puts(" <select name=\"sec\" aria-label=\"Manual section\">"); 467 for (i = 0; i < sec_MAX; i++) { 468 printf(" <option value=\"%s\"", sec_numbers[i]); 469 if (NULL != req->q.sec && 470 0 == strcmp(sec_numbers[i], req->q.sec)) 471 printf(" selected=\"selected\""); 472 printf(">%s</option>\n", sec_names[i]); 473 } 474 puts(" </select>"); 475 476 /* Write architecture selector. */ 477 478 printf( " <select name=\"arch\" aria-label=\"CPU architecture\">\n" 479 " <option value=\"default\""); 480 if (NULL == req->q.arch) 481 printf(" selected=\"selected\""); 482 puts(">All Architectures</option>"); 483 for (i = 0; i < arch_MAX; i++) { 484 printf(" <option"); 485 if (NULL != req->q.arch && 486 0 == strcmp(arch_names[i], req->q.arch)) 487 printf(" selected=\"selected\""); 488 printf(">%s</option>\n", arch_names[i]); 489 } 490 puts(" </select>"); 491 492 /* Write manpath selector. */ 493 494 if (req->psz > 1) { 495 puts(" <select name=\"manpath\"" 496 " aria-label=\"Manual path\">"); 497 for (i = 0; i < (int)req->psz; i++) { 498 printf(" <option"); 499 if (strcmp(req->q.manpath, req->p[i]) == 0) 500 printf(" selected=\"selected\""); 501 printf(">"); 502 html_print(req->p[i]); 503 puts("</option>"); 504 } 505 puts(" </select>"); 506 } 507 508 puts(" </fieldset>\n" 509 "</form>"); 510 } 511 512 static int 513 validate_urifrag(const char *frag) 514 { 515 516 while ('\0' != *frag) { 517 if ( ! (isalnum((unsigned char)*frag) || 518 '-' == *frag || '.' == *frag || 519 '/' == *frag || '_' == *frag)) 520 return 0; 521 frag++; 522 } 523 return 1; 524 } 525 526 static int 527 validate_manpath(const struct req *req, const char* manpath) 528 { 529 size_t i; 530 531 for (i = 0; i < req->psz; i++) 532 if ( ! strcmp(manpath, req->p[i])) 533 return 1; 534 535 return 0; 536 } 537 538 static int 539 validate_arch(const char *arch) 540 { 541 int i; 542 543 for (i = 0; i < arch_MAX; i++) 544 if (strcmp(arch, arch_names[i]) == 0) 545 return 1; 546 547 return 0; 548 } 549 550 static int 551 validate_filename(const char *file) 552 { 553 554 if ('.' == file[0] && '/' == file[1]) 555 file += 2; 556 557 return ! (strstr(file, "../") || strstr(file, "/..") || 558 (strncmp(file, "man", 3) && strncmp(file, "cat", 3))); 559 } 560 561 static void 562 pg_index(const struct req *req) 563 { 564 if (resp_begin_html(200, NULL, NULL) == 0) 565 puts("<header>"); 566 resp_searchform(req, FOCUS_QUERY); 567 printf("</header>\n" 568 "<main>\n" 569 "<p role=\"doc-notice\" aria-label=\"Usage\">\n" 570 "This web interface is documented in the\n" 571 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\"" 572 " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n" 573 "manual, and the\n" 574 "<a class=\"Xr\" href=\"/%s%sapropos.1\"" 575 " aria-label=\"apropos, section 1\">apropos(1)</a>\n" 576 "manual explains the query syntax.\n" 577 "</p>\n" 578 "</main>\n", 579 scriptname, *scriptname == '\0' ? "" : "/", 580 scriptname, *scriptname == '\0' ? "" : "/"); 581 resp_end_html(); 582 } 583 584 static void 585 pg_noresult(const struct req *req, int code, const char *http_msg, 586 const char *user_msg) 587 { 588 if (resp_begin_html(code, http_msg, NULL) == 0) 589 puts("<header>"); 590 resp_searchform(req, FOCUS_QUERY); 591 puts("</header>"); 592 puts("<main>"); 593 puts("<p role=\"doc-notice\" aria-label=\"No result\">"); 594 puts(user_msg); 595 puts("</p>"); 596 puts("</main>"); 597 resp_end_html(); 598 } 599 600 static void 601 pg_error_badrequest(const char *msg) 602 { 603 if (resp_begin_html(400, "Bad Request", NULL)) 604 puts("</header>"); 605 puts("<main>\n" 606 "<h1>Bad Request</h1>\n" 607 "<p role=\"doc-notice\" aria-label=\"Bad Request\">"); 608 puts(msg); 609 printf("Try again from the\n" 610 "<a href=\"/%s\">main page</a>.\n" 611 "</p>\n" 612 "</main>\n", scriptname); 613 resp_end_html(); 614 } 615 616 static void 617 pg_error_internal(void) 618 { 619 if (resp_begin_html(500, "Internal Server Error", NULL)) 620 puts("</header>"); 621 puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>"); 622 resp_end_html(); 623 } 624 625 static void 626 pg_redirect(const struct req *req, const char *name) 627 { 628 printf("Status: 303 See Other\r\n" 629 "Location: /"); 630 if (*scriptname != '\0') 631 printf("%s/", scriptname); 632 if (strcmp(req->q.manpath, req->p[0])) 633 printf("%s/", req->q.manpath); 634 if (req->q.arch != NULL) 635 printf("%s/", req->q.arch); 636 http_encode(name); 637 if (req->q.sec != NULL) { 638 putchar('.'); 639 http_encode(req->q.sec); 640 } 641 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n"); 642 } 643 644 static void 645 pg_searchres(const struct req *req, struct manpage *r, size_t sz) 646 { 647 char *arch, *archend; 648 const char *sec; 649 size_t i, iuse; 650 int archprio, archpriouse; 651 int prio, priouse; 652 int have_header; 653 654 for (i = 0; i < sz; i++) { 655 if (validate_filename(r[i].file)) 656 continue; 657 warnx("invalid filename %s in %s database", 658 r[i].file, req->q.manpath); 659 pg_error_internal(); 660 return; 661 } 662 663 if (req->isquery && sz == 1) { 664 /* 665 * If we have just one result, then jump there now 666 * without any delay. 667 */ 668 printf("Status: 303 See Other\r\n" 669 "Location: /"); 670 if (*scriptname != '\0') 671 printf("%s/", scriptname); 672 if (strcmp(req->q.manpath, req->p[0])) 673 printf("%s/", req->q.manpath); 674 printf("%s\r\n" 675 "Content-Type: text/html; charset=utf-8\r\n\r\n", 676 r[0].file); 677 return; 678 } 679 680 /* 681 * In man(1) mode, show one of the pages 682 * even if more than one is found. 683 */ 684 685 iuse = 0; 686 if (req->q.equal || sz == 1) { 687 priouse = 20; 688 archpriouse = 3; 689 for (i = 0; i < sz; i++) { 690 sec = r[i].file; 691 sec += strcspn(sec, "123456789"); 692 if (sec[0] == '\0') 693 continue; 694 prio = sec_prios[sec[0] - '1']; 695 if (sec[1] != '/') 696 prio += 10; 697 if (req->q.arch == NULL) { 698 archprio = 699 ((arch = strchr(sec + 1, '/')) 700 == NULL) ? 3 : 701 ((archend = strchr(arch + 1, '/')) 702 == NULL) ? 0 : 703 strncmp(arch, "amd64/", 704 archend - arch) ? 2 : 1; 705 if (archprio < archpriouse) { 706 archpriouse = archprio; 707 priouse = prio; 708 iuse = i; 709 continue; 710 } 711 if (archprio > archpriouse) 712 continue; 713 } 714 if (prio >= priouse) 715 continue; 716 priouse = prio; 717 iuse = i; 718 } 719 have_header = resp_begin_html(200, NULL, r[iuse].file); 720 } else 721 have_header = resp_begin_html(200, NULL, NULL); 722 723 if (have_header == 0) 724 puts("<header>"); 725 resp_searchform(req, 726 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY); 727 puts("</header>"); 728 729 if (sz > 1) { 730 puts("<nav>"); 731 puts("<table class=\"results\">"); 732 for (i = 0; i < sz; i++) { 733 printf(" <tr>\n" 734 " <td>" 735 "<a class=\"Xr\" href=\"/"); 736 if (*scriptname != '\0') 737 printf("%s/", scriptname); 738 if (strcmp(req->q.manpath, req->p[0])) 739 printf("%s/", req->q.manpath); 740 printf("%s\">", r[i].file); 741 html_print(r[i].names); 742 printf("</a></td>\n" 743 " <td><span class=\"Nd\">"); 744 html_print(r[i].output); 745 puts("</span></td>\n" 746 " </tr>"); 747 } 748 puts("</table>"); 749 puts("</nav>"); 750 } 751 752 if (req->q.equal || sz == 1) { 753 puts("<hr>"); 754 resp_show(req, r[iuse].file); 755 } 756 757 resp_end_html(); 758 } 759 760 static void 761 resp_catman(const struct req *req, const char *file) 762 { 763 FILE *f; 764 char *p; 765 size_t sz; 766 ssize_t len; 767 int i; 768 int italic, bold; 769 770 if ((f = fopen(file, "r")) == NULL) { 771 puts("<p role=\"doc-notice\">\n" 772 " You specified an invalid manual file.\n" 773 "</p>"); 774 return; 775 } 776 777 puts("<div class=\"catman\">\n" 778 "<pre>"); 779 780 p = NULL; 781 sz = 0; 782 783 while ((len = getline(&p, &sz, f)) != -1) { 784 bold = italic = 0; 785 for (i = 0; i < len - 1; i++) { 786 /* 787 * This means that the catpage is out of state. 788 * Ignore it and keep going (although the 789 * catpage is bogus). 790 */ 791 792 if ('\b' == p[i] || '\n' == p[i]) 793 continue; 794 795 /* 796 * Print a regular character. 797 * Close out any bold/italic scopes. 798 * If we're in back-space mode, make sure we'll 799 * have something to enter when we backspace. 800 */ 801 802 if ('\b' != p[i + 1]) { 803 if (italic) 804 printf("</i>"); 805 if (bold) 806 printf("</b>"); 807 italic = bold = 0; 808 html_putchar(p[i]); 809 continue; 810 } else if (i + 2 >= len) 811 continue; 812 813 /* Italic mode. */ 814 815 if ('_' == p[i]) { 816 if (bold) 817 printf("</b>"); 818 if ( ! italic) 819 printf("<i>"); 820 bold = 0; 821 italic = 1; 822 i += 2; 823 html_putchar(p[i]); 824 continue; 825 } 826 827 /* 828 * Handle funny behaviour troff-isms. 829 * These grok'd from the original man2html.c. 830 */ 831 832 if (('+' == p[i] && 'o' == p[i + 2]) || 833 ('o' == p[i] && '+' == p[i + 2]) || 834 ('|' == p[i] && '=' == p[i + 2]) || 835 ('=' == p[i] && '|' == p[i + 2]) || 836 ('*' == p[i] && '=' == p[i + 2]) || 837 ('=' == p[i] && '*' == p[i + 2]) || 838 ('*' == p[i] && '|' == p[i + 2]) || 839 ('|' == p[i] && '*' == p[i + 2])) { 840 if (italic) 841 printf("</i>"); 842 if (bold) 843 printf("</b>"); 844 italic = bold = 0; 845 putchar('*'); 846 i += 2; 847 continue; 848 } else if (('|' == p[i] && '-' == p[i + 2]) || 849 ('-' == p[i] && '|' == p[i + 1]) || 850 ('+' == p[i] && '-' == p[i + 1]) || 851 ('-' == p[i] && '+' == p[i + 1]) || 852 ('+' == p[i] && '|' == p[i + 1]) || 853 ('|' == p[i] && '+' == p[i + 1])) { 854 if (italic) 855 printf("</i>"); 856 if (bold) 857 printf("</b>"); 858 italic = bold = 0; 859 putchar('+'); 860 i += 2; 861 continue; 862 } 863 864 /* Bold mode. */ 865 866 if (italic) 867 printf("</i>"); 868 if ( ! bold) 869 printf("<b>"); 870 bold = 1; 871 italic = 0; 872 i += 2; 873 html_putchar(p[i]); 874 } 875 876 /* 877 * Clean up the last character. 878 * We can get to a newline; don't print that. 879 */ 880 881 if (italic) 882 printf("</i>"); 883 if (bold) 884 printf("</b>"); 885 886 if (i == len - 1 && p[i] != '\n') 887 html_putchar(p[i]); 888 889 putchar('\n'); 890 } 891 free(p); 892 893 puts("</pre>\n" 894 "</div>"); 895 896 fclose(f); 897 } 898 899 static void 900 resp_format(const struct req *req, const char *file) 901 { 902 struct manoutput conf; 903 struct mparse *mp; 904 struct roff_meta *meta; 905 void *vp; 906 int fd; 907 int usepath; 908 909 if (-1 == (fd = open(file, O_RDONLY))) { 910 puts("<p role=\"doc-notice\">\n" 911 " You specified an invalid manual file.\n" 912 "</p>"); 913 return; 914 } 915 916 mchars_alloc(); 917 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 | 918 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath); 919 mparse_readfd(mp, fd, file); 920 close(fd); 921 meta = mparse_result(mp); 922 923 memset(&conf, 0, sizeof(conf)); 924 conf.fragment = 1; 925 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css"); 926 usepath = strcmp(req->q.manpath, req->p[0]); 927 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S", 928 scriptname, *scriptname == '\0' ? "" : "/", 929 usepath ? req->q.manpath : "", usepath ? "/" : ""); 930 931 vp = html_alloc(&conf); 932 if (meta->macroset == MACROSET_MDOC) 933 html_mdoc(vp, meta); 934 else 935 html_man(vp, meta); 936 937 html_free(vp); 938 mparse_free(mp); 939 mchars_free(); 940 free(conf.man); 941 free(conf.style); 942 } 943 944 static void 945 resp_show(const struct req *req, const char *file) 946 { 947 948 if ('.' == file[0] && '/' == file[1]) 949 file += 2; 950 951 if ('c' == *file) 952 resp_catman(req, file); 953 else 954 resp_format(req, file); 955 } 956 957 static void 958 pg_show(struct req *req, const char *fullpath) 959 { 960 char *manpath; 961 const char *file; 962 963 if ((file = strchr(fullpath, '/')) == NULL) { 964 pg_error_badrequest( 965 "You did not specify a page to show."); 966 return; 967 } 968 manpath = mandoc_strndup(fullpath, file - fullpath); 969 file++; 970 971 if ( ! validate_manpath(req, manpath)) { 972 pg_error_badrequest( 973 "You specified an invalid manpath."); 974 free(manpath); 975 return; 976 } 977 978 /* 979 * Begin by chdir()ing into the manpath. 980 * This way we can pick up the database files, which are 981 * relative to the manpath root. 982 */ 983 984 if (chdir(manpath) == -1) { 985 warn("chdir %s", manpath); 986 pg_error_internal(); 987 free(manpath); 988 return; 989 } 990 free(manpath); 991 992 if ( ! validate_filename(file)) { 993 pg_error_badrequest( 994 "You specified an invalid manual file."); 995 return; 996 } 997 998 if (resp_begin_html(200, NULL, file) == 0) 999 puts("<header>"); 1000 resp_searchform(req, FOCUS_NONE); 1001 puts("</header>"); 1002 resp_show(req, file); 1003 resp_end_html(); 1004 } 1005 1006 static void 1007 pg_search(const struct req *req) 1008 { 1009 struct mansearch search; 1010 struct manpaths paths; 1011 struct manpage *res; 1012 char **argv; 1013 char *query, *rp, *wp; 1014 size_t ressz; 1015 int argc; 1016 1017 /* 1018 * Begin by chdir()ing into the root of the manpath. 1019 * This way we can pick up the database files, which are 1020 * relative to the manpath root. 1021 */ 1022 1023 if (chdir(req->q.manpath) == -1) { 1024 warn("chdir %s", req->q.manpath); 1025 pg_error_internal(); 1026 return; 1027 } 1028 1029 search.arch = req->q.arch; 1030 search.sec = req->q.sec; 1031 search.outkey = "Nd"; 1032 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR; 1033 search.firstmatch = 1; 1034 1035 paths.sz = 1; 1036 paths.paths = mandoc_malloc(sizeof(char *)); 1037 paths.paths[0] = mandoc_strdup("."); 1038 1039 /* 1040 * Break apart at spaces with backslash-escaping. 1041 */ 1042 1043 argc = 0; 1044 argv = NULL; 1045 rp = query = mandoc_strdup(req->q.query); 1046 for (;;) { 1047 while (isspace((unsigned char)*rp)) 1048 rp++; 1049 if (*rp == '\0') 1050 break; 1051 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *)); 1052 argv[argc++] = wp = rp; 1053 for (;;) { 1054 if (isspace((unsigned char)*rp)) { 1055 *wp = '\0'; 1056 rp++; 1057 break; 1058 } 1059 if (rp[0] == '\\' && rp[1] != '\0') 1060 rp++; 1061 if (wp != rp) 1062 *wp = *rp; 1063 if (*rp == '\0') 1064 break; 1065 wp++; 1066 rp++; 1067 } 1068 } 1069 1070 res = NULL; 1071 ressz = 0; 1072 if (req->isquery && req->q.equal && argc == 1) 1073 pg_redirect(req, argv[0]); 1074 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0) 1075 pg_noresult(req, 400, "Bad Request", 1076 "You entered an invalid query."); 1077 else if (ressz == 0) 1078 pg_noresult(req, 404, "Not Found", "No results found."); 1079 else 1080 pg_searchres(req, res, ressz); 1081 1082 free(query); 1083 mansearch_free(res, ressz); 1084 free(paths.paths[0]); 1085 free(paths.paths); 1086 } 1087 1088 int 1089 main(void) 1090 { 1091 struct req req; 1092 struct itimerval itimer; 1093 const char *path; 1094 const char *querystring; 1095 int i; 1096 1097 #if HAVE_PLEDGE 1098 /* 1099 * The "rpath" pledge could be revoked after mparse_readfd() 1100 * if the file descriptor to "/footer.html" would be opened 1101 * up front, but it's probably not worth the complication 1102 * of the code it would cause: it would require scattering 1103 * pledge() calls in multiple low-level resp_*() functions. 1104 */ 1105 1106 if (pledge("stdio rpath", NULL) == -1) { 1107 warn("pledge"); 1108 pg_error_internal(); 1109 return EXIT_FAILURE; 1110 } 1111 #endif 1112 1113 /* Poor man's ReDoS mitigation. */ 1114 1115 itimer.it_value.tv_sec = 2; 1116 itimer.it_value.tv_usec = 0; 1117 itimer.it_interval.tv_sec = 2; 1118 itimer.it_interval.tv_usec = 0; 1119 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) { 1120 warn("setitimer"); 1121 pg_error_internal(); 1122 return EXIT_FAILURE; 1123 } 1124 1125 /* 1126 * First we change directory into the MAN_DIR so that 1127 * subsequent scanning for manpath directories is rooted 1128 * relative to the same position. 1129 */ 1130 1131 if (chdir(MAN_DIR) == -1) { 1132 warn("MAN_DIR: %s", MAN_DIR); 1133 pg_error_internal(); 1134 return EXIT_FAILURE; 1135 } 1136 1137 memset(&req, 0, sizeof(struct req)); 1138 req.q.equal = 1; 1139 parse_manpath_conf(&req); 1140 1141 /* Parse the path info and the query string. */ 1142 1143 if ((path = getenv("PATH_INFO")) == NULL) 1144 path = ""; 1145 else if (*path == '/') 1146 path++; 1147 1148 if (*path != '\0') { 1149 parse_path_info(&req, path); 1150 if (req.q.manpath == NULL || req.q.sec == NULL || 1151 *req.q.query == '\0' || access(path, F_OK) == -1) 1152 path = ""; 1153 } else if ((querystring = getenv("QUERY_STRING")) != NULL) 1154 parse_query_string(&req, querystring); 1155 1156 /* Validate parsed data and add defaults. */ 1157 1158 if (req.q.manpath == NULL) 1159 req.q.manpath = mandoc_strdup(req.p[0]); 1160 else if ( ! validate_manpath(&req, req.q.manpath)) { 1161 pg_error_badrequest( 1162 "You specified an invalid manpath."); 1163 return EXIT_FAILURE; 1164 } 1165 1166 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) { 1167 pg_error_badrequest( 1168 "You specified an invalid architecture."); 1169 return EXIT_FAILURE; 1170 } 1171 1172 /* Dispatch to the three different pages. */ 1173 1174 if ('\0' != *path) 1175 pg_show(&req, path); 1176 else if (NULL != req.q.query) 1177 pg_search(&req); 1178 else 1179 pg_index(&req); 1180 1181 free(req.q.manpath); 1182 free(req.q.arch); 1183 free(req.q.sec); 1184 free(req.q.query); 1185 for (i = 0; i < (int)req.psz; i++) 1186 free(req.p[i]); 1187 free(req.p); 1188 return EXIT_SUCCESS; 1189 } 1190 1191 /* 1192 * Translate PATH_INFO to a query. 1193 */ 1194 static void 1195 parse_path_info(struct req *req, const char *path) 1196 { 1197 const char *name, *sec, *end; 1198 1199 req->isquery = 0; 1200 req->q.equal = 1; 1201 req->q.manpath = NULL; 1202 req->q.arch = NULL; 1203 1204 /* Mandatory manual page name. */ 1205 if ((name = strrchr(path, '/')) == NULL) 1206 name = path; 1207 else 1208 name++; 1209 1210 /* Optional trailing section. */ 1211 sec = strrchr(name, '.'); 1212 if (sec != NULL && isdigit((unsigned char)*++sec)) { 1213 req->q.query = mandoc_strndup(name, sec - name - 1); 1214 req->q.sec = mandoc_strdup(sec); 1215 } else { 1216 req->q.query = mandoc_strdup(name); 1217 req->q.sec = NULL; 1218 } 1219 1220 /* Handle the case of name[.section] only. */ 1221 if (name == path) 1222 return; 1223 1224 /* Optional manpath. */ 1225 end = strchr(path, '/'); 1226 req->q.manpath = mandoc_strndup(path, end - path); 1227 if (validate_manpath(req, req->q.manpath)) { 1228 path = end + 1; 1229 if (name == path) 1230 return; 1231 } else { 1232 free(req->q.manpath); 1233 req->q.manpath = NULL; 1234 } 1235 1236 /* Optional section. */ 1237 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) { 1238 path += 3; 1239 end = strchr(path, '/'); 1240 free(req->q.sec); 1241 req->q.sec = mandoc_strndup(path, end - path); 1242 path = end + 1; 1243 if (name == path) 1244 return; 1245 } 1246 1247 /* Optional architecture. */ 1248 end = strchr(path, '/'); 1249 if (end + 1 != name) { 1250 pg_error_badrequest( 1251 "You specified too many directory components."); 1252 exit(EXIT_FAILURE); 1253 } 1254 req->q.arch = mandoc_strndup(path, end - path); 1255 if (validate_arch(req->q.arch) == 0) { 1256 pg_error_badrequest( 1257 "You specified an invalid directory component."); 1258 exit(EXIT_FAILURE); 1259 } 1260 } 1261 1262 /* 1263 * Scan for indexable paths. 1264 */ 1265 static void 1266 parse_manpath_conf(struct req *req) 1267 { 1268 FILE *fp; 1269 char *dp; 1270 size_t dpsz; 1271 ssize_t len; 1272 1273 if ((fp = fopen("manpath.conf", "r")) == NULL) { 1274 warn("%s/manpath.conf", MAN_DIR); 1275 pg_error_internal(); 1276 exit(EXIT_FAILURE); 1277 } 1278 1279 dp = NULL; 1280 dpsz = 0; 1281 1282 while ((len = getline(&dp, &dpsz, fp)) != -1) { 1283 if (dp[len - 1] == '\n') 1284 dp[--len] = '\0'; 1285 req->p = mandoc_realloc(req->p, 1286 (req->psz + 1) * sizeof(char *)); 1287 if ( ! validate_urifrag(dp)) { 1288 warnx("%s/manpath.conf contains " 1289 "unsafe path \"%s\"", MAN_DIR, dp); 1290 pg_error_internal(); 1291 exit(EXIT_FAILURE); 1292 } 1293 if (strchr(dp, '/') != NULL) { 1294 warnx("%s/manpath.conf contains " 1295 "path with slash \"%s\"", MAN_DIR, dp); 1296 pg_error_internal(); 1297 exit(EXIT_FAILURE); 1298 } 1299 req->p[req->psz++] = dp; 1300 dp = NULL; 1301 dpsz = 0; 1302 } 1303 free(dp); 1304 1305 if (req->p == NULL) { 1306 warnx("%s/manpath.conf is empty", MAN_DIR); 1307 pg_error_internal(); 1308 exit(EXIT_FAILURE); 1309 } 1310 } 1311