1 /* $Id: cgi.c,v 1.175 2021/08/19 15:23:36 schwarze Exp $ */
2 /*
3 * Copyright (c) 2014-2019, 2021 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Implementation of the man.cgi(8) program.
19 */
20 #include "config.h"
21
22 #include <sys/types.h>
23 #include <sys/time.h>
24
25 #include <ctype.h>
26 #if HAVE_ERR
27 #include <err.h>
28 #endif
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <limits.h>
32 #include <stdint.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37
38 #include "mandoc_aux.h"
39 #include "mandoc.h"
40 #include "roff.h"
41 #include "mdoc.h"
42 #include "man.h"
43 #include "mandoc_parse.h"
44 #include "main.h"
45 #include "manconf.h"
46 #include "mansearch.h"
47 #include "cgi.h"
48
49 /*
50 * A query as passed to the search function.
51 */
52 struct query {
53 char *manpath; /* desired manual directory */
54 char *arch; /* architecture */
55 char *sec; /* manual section */
56 char *query; /* unparsed query expression */
57 int equal; /* match whole names, not substrings */
58 };
59
60 struct req {
61 struct query q;
62 char **p; /* array of available manpaths */
63 size_t psz; /* number of available manpaths */
64 int isquery; /* QUERY_STRING used, not PATH_INFO */
65 };
66
67 enum focus {
68 FOCUS_NONE = 0,
69 FOCUS_QUERY
70 };
71
72 static void html_print(const char *);
73 static void html_putchar(char);
74 static int http_decode(char *);
75 static void http_encode(const char *);
76 static void parse_manpath_conf(struct req *);
77 static void parse_path_info(struct req *, const char *);
78 static void parse_query_string(struct req *, const char *);
79 static void pg_error_badrequest(const char *);
80 static void pg_error_internal(void);
81 static void pg_index(const struct req *);
82 static void pg_noresult(const struct req *, int, const char *,
83 const char *);
84 static void pg_redirect(const struct req *, const char *);
85 static void pg_search(const struct req *);
86 static void pg_searchres(const struct req *,
87 struct manpage *, size_t);
88 static void pg_show(struct req *, const char *);
89 static void resp_begin_html(int, const char *, const char *);
90 static void resp_begin_http(int, const char *);
91 static void resp_catman(const struct req *, const char *);
92 static void resp_copy(const char *);
93 static void resp_end_html(void);
94 static void resp_format(const struct req *, const char *);
95 static void resp_searchform(const struct req *, enum focus);
96 static void resp_show(const struct req *, const char *);
97 static void set_query_attr(char **, char **);
98 static int validate_arch(const char *);
99 static int validate_filename(const char *);
100 static int validate_manpath(const struct req *, const char *);
101 static int validate_urifrag(const char *);
102
103 static const char *scriptname = SCRIPT_NAME;
104
105 static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
106 static const char *const sec_numbers[] = {
107 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
108 };
109 static const char *const sec_names[] = {
110 "All Sections",
111 "1 - General Commands",
112 "2 - System Calls",
113 "3 - Library Functions",
114 "3p - Perl Library",
115 "4 - Device Drivers",
116 "5 - File Formats",
117 "6 - Games",
118 "7 - Miscellaneous Information",
119 "8 - System Manager\'s Manual",
120 "9 - Kernel Developer\'s Manual"
121 };
122 static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
123
124 static const char *const arch_names[] = {
125 "amd64", "alpha", "armv7", "arm64",
126 "hppa", "i386", "landisk", "loongson",
127 "luna88k", "macppc", "mips64", "octeon",
128 "powerpc64", "riscv64", "sparc64",
129
130 "amiga", "arc", "armish", "arm32",
131 "atari", "aviion", "beagle", "cats",
132 "hppa64", "hp300",
133 "ia64", "mac68k", "mvme68k", "mvme88k",
134 "mvmeppc", "palm", "pc532", "pegasos",
135 "pmax", "powerpc", "sgi", "socppc",
136 "solbourne", "sparc",
137 "sun3", "vax", "wgrisc", "x68k",
138 "zaurus"
139 };
140 static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
141
142 /*
143 * Print a character, escaping HTML along the way.
144 * This will pass non-ASCII straight to output: be warned!
145 */
146 static void
html_putchar(char c)147 html_putchar(char c)
148 {
149
150 switch (c) {
151 case '"':
152 printf(""");
153 break;
154 case '&':
155 printf("&");
156 break;
157 case '>':
158 printf(">");
159 break;
160 case '<':
161 printf("<");
162 break;
163 default:
164 putchar((unsigned char)c);
165 break;
166 }
167 }
168
169 /*
170 * Call through to html_putchar().
171 * Accepts NULL strings.
172 */
173 static void
html_print(const char * p)174 html_print(const char *p)
175 {
176
177 if (NULL == p)
178 return;
179 while ('\0' != *p)
180 html_putchar(*p++);
181 }
182
183 /*
184 * Transfer the responsibility for the allocated string *val
185 * to the query structure.
186 */
187 static void
set_query_attr(char ** attr,char ** val)188 set_query_attr(char **attr, char **val)
189 {
190
191 free(*attr);
192 if (**val == '\0') {
193 *attr = NULL;
194 free(*val);
195 } else
196 *attr = *val;
197 *val = NULL;
198 }
199
200 /*
201 * Parse the QUERY_STRING for key-value pairs
202 * and store the values into the query structure.
203 */
204 static void
parse_query_string(struct req * req,const char * qs)205 parse_query_string(struct req *req, const char *qs)
206 {
207 char *key, *val;
208 size_t keysz, valsz;
209
210 req->isquery = 1;
211 req->q.manpath = NULL;
212 req->q.arch = NULL;
213 req->q.sec = NULL;
214 req->q.query = NULL;
215 req->q.equal = 1;
216
217 key = val = NULL;
218 while (*qs != '\0') {
219
220 /* Parse one key. */
221
222 keysz = strcspn(qs, "=;&");
223 key = mandoc_strndup(qs, keysz);
224 qs += keysz;
225 if (*qs != '=')
226 goto next;
227
228 /* Parse one value. */
229
230 valsz = strcspn(++qs, ";&");
231 val = mandoc_strndup(qs, valsz);
232 qs += valsz;
233
234 /* Decode and catch encoding errors. */
235
236 if ( ! (http_decode(key) && http_decode(val)))
237 goto next;
238
239 /* Handle key-value pairs. */
240
241 if ( ! strcmp(key, "query"))
242 set_query_attr(&req->q.query, &val);
243
244 else if ( ! strcmp(key, "apropos"))
245 req->q.equal = !strcmp(val, "0");
246
247 else if ( ! strcmp(key, "manpath")) {
248 #ifdef COMPAT_OLDURI
249 if ( ! strncmp(val, "OpenBSD ", 8)) {
250 val[7] = '-';
251 if ('C' == val[8])
252 val[8] = 'c';
253 }
254 #endif
255 set_query_attr(&req->q.manpath, &val);
256 }
257
258 else if ( ! (strcmp(key, "sec")
259 #ifdef COMPAT_OLDURI
260 && strcmp(key, "sektion")
261 #endif
262 )) {
263 if ( ! strcmp(val, "0"))
264 *val = '\0';
265 set_query_attr(&req->q.sec, &val);
266 }
267
268 else if ( ! strcmp(key, "arch")) {
269 if ( ! strcmp(val, "default"))
270 *val = '\0';
271 set_query_attr(&req->q.arch, &val);
272 }
273
274 /*
275 * The key must be freed in any case.
276 * The val may have been handed over to the query
277 * structure, in which case it is now NULL.
278 */
279 next:
280 free(key);
281 key = NULL;
282 free(val);
283 val = NULL;
284
285 if (*qs != '\0')
286 qs++;
287 }
288 }
289
290 /*
291 * HTTP-decode a string. The standard explanation is that this turns
292 * "%4e+foo" into "n foo" in the regular way. This is done in-place
293 * over the allocated string.
294 */
295 static int
http_decode(char * p)296 http_decode(char *p)
297 {
298 char hex[3];
299 char *q;
300 int c;
301
302 hex[2] = '\0';
303
304 q = p;
305 for ( ; '\0' != *p; p++, q++) {
306 if ('%' == *p) {
307 if ('\0' == (hex[0] = *(p + 1)))
308 return 0;
309 if ('\0' == (hex[1] = *(p + 2)))
310 return 0;
311 if (1 != sscanf(hex, "%x", &c))
312 return 0;
313 if ('\0' == c)
314 return 0;
315
316 *q = (char)c;
317 p += 2;
318 } else
319 *q = '+' == *p ? ' ' : *p;
320 }
321
322 *q = '\0';
323 return 1;
324 }
325
326 static void
http_encode(const char * p)327 http_encode(const char *p)
328 {
329 for (; *p != '\0'; p++) {
330 if (isalnum((unsigned char)*p) == 0 &&
331 strchr("-._~", *p) == NULL)
332 printf("%%%2.2X", (unsigned char)*p);
333 else
334 putchar(*p);
335 }
336 }
337
338 static void
resp_begin_http(int code,const char * msg)339 resp_begin_http(int code, const char *msg)
340 {
341
342 if (200 != code)
343 printf("Status: %d %s\r\n", code, msg);
344
345 printf("Content-Type: text/html; charset=utf-8\r\n"
346 "Cache-Control: no-cache\r\n"
347 "Content-Security-Policy: default-src 'none'; "
348 "style-src 'self' 'unsafe-inline'\r\n"
349 "Pragma: no-cache\r\n"
350 "\r\n");
351
352 fflush(stdout);
353 }
354
355 static void
resp_copy(const char * filename)356 resp_copy(const char *filename)
357 {
358 char buf[4096];
359 ssize_t sz;
360 int fd;
361
362 if ((fd = open(filename, O_RDONLY)) != -1) {
363 fflush(stdout);
364 while ((sz = read(fd, buf, sizeof(buf))) > 0)
365 write(STDOUT_FILENO, buf, sz);
366 close(fd);
367 }
368 }
369
370 static void
resp_begin_html(int code,const char * msg,const char * file)371 resp_begin_html(int code, const char *msg, const char *file)
372 {
373 const char *name, *sec, *cp;
374 int namesz, secsz;
375
376 resp_begin_http(code, msg);
377
378 printf("<!DOCTYPE html>\n"
379 "<html>\n"
380 "<head>\n"
381 " <meta charset=\"UTF-8\"/>\n"
382 " <meta name=\"viewport\""
383 " content=\"width=device-width, initial-scale=1.0\">\n"
384 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
385 " type=\"text/css\" media=\"all\">\n"
386 " <title>",
387 CSS_DIR);
388 if (file != NULL) {
389 cp = strrchr(file, '/');
390 name = cp == NULL ? file : cp + 1;
391 cp = strrchr(name, '.');
392 namesz = cp == NULL ? strlen(name) : cp - name;
393 sec = NULL;
394 if (cp != NULL && cp[1] != '0') {
395 sec = cp + 1;
396 secsz = strlen(sec);
397 } else if (name - file > 1) {
398 for (cp = name - 2; cp >= file; cp--) {
399 if (*cp < '1' || *cp > '9')
400 continue;
401 sec = cp;
402 secsz = name - cp - 1;
403 break;
404 }
405 }
406 printf("%.*s", namesz, name);
407 if (sec != NULL)
408 printf("(%.*s)", secsz, sec);
409 fputs(" - ", stdout);
410 }
411 printf("%s</title>\n"
412 "</head>\n"
413 "<body>\n",
414 CUSTOMIZE_TITLE);
415
416 resp_copy(MAN_DIR "/header.html");
417 }
418
419 static void
resp_end_html(void)420 resp_end_html(void)
421 {
422
423 resp_copy(MAN_DIR "/footer.html");
424
425 puts("</body>\n"
426 "</html>");
427 }
428
429 static void
resp_searchform(const struct req * req,enum focus focus)430 resp_searchform(const struct req *req, enum focus focus)
431 {
432 int i;
433
434 printf("<form action=\"/%s\" method=\"get\" "
435 "autocomplete=\"off\" autocapitalize=\"none\">\n"
436 " <fieldset>\n"
437 " <legend>Manual Page Search Parameters</legend>\n",
438 scriptname);
439
440 /* Write query input box. */
441
442 printf(" <input type=\"search\" name=\"query\" value=\"");
443 if (req->q.query != NULL)
444 html_print(req->q.query);
445 printf( "\" size=\"40\"");
446 if (focus == FOCUS_QUERY)
447 printf(" autofocus");
448 puts(">");
449
450 /* Write submission buttons. */
451
452 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
453 "man</button>\n"
454 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
455 "apropos</button>\n"
456 " <br/>\n");
457
458 /* Write section selector. */
459
460 puts(" <select name=\"sec\">");
461 for (i = 0; i < sec_MAX; i++) {
462 printf(" <option value=\"%s\"", sec_numbers[i]);
463 if (NULL != req->q.sec &&
464 0 == strcmp(sec_numbers[i], req->q.sec))
465 printf(" selected=\"selected\"");
466 printf(">%s</option>\n", sec_names[i]);
467 }
468 puts(" </select>");
469
470 /* Write architecture selector. */
471
472 printf( " <select name=\"arch\">\n"
473 " <option value=\"default\"");
474 if (NULL == req->q.arch)
475 printf(" selected=\"selected\"");
476 puts(">All Architectures</option>");
477 for (i = 0; i < arch_MAX; i++) {
478 printf(" <option");
479 if (NULL != req->q.arch &&
480 0 == strcmp(arch_names[i], req->q.arch))
481 printf(" selected=\"selected\"");
482 printf(">%s</option>\n", arch_names[i]);
483 }
484 puts(" </select>");
485
486 /* Write manpath selector. */
487
488 if (req->psz > 1) {
489 puts(" <select name=\"manpath\">");
490 for (i = 0; i < (int)req->psz; i++) {
491 printf(" <option");
492 if (strcmp(req->q.manpath, req->p[i]) == 0)
493 printf(" selected=\"selected\"");
494 printf(">");
495 html_print(req->p[i]);
496 puts("</option>");
497 }
498 puts(" </select>");
499 }
500
501 puts(" </fieldset>\n"
502 "</form>");
503 }
504
505 static int
validate_urifrag(const char * frag)506 validate_urifrag(const char *frag)
507 {
508
509 while ('\0' != *frag) {
510 if ( ! (isalnum((unsigned char)*frag) ||
511 '-' == *frag || '.' == *frag ||
512 '/' == *frag || '_' == *frag))
513 return 0;
514 frag++;
515 }
516 return 1;
517 }
518
519 static int
validate_manpath(const struct req * req,const char * manpath)520 validate_manpath(const struct req *req, const char* manpath)
521 {
522 size_t i;
523
524 for (i = 0; i < req->psz; i++)
525 if ( ! strcmp(manpath, req->p[i]))
526 return 1;
527
528 return 0;
529 }
530
531 static int
validate_arch(const char * arch)532 validate_arch(const char *arch)
533 {
534 int i;
535
536 for (i = 0; i < arch_MAX; i++)
537 if (strcmp(arch, arch_names[i]) == 0)
538 return 1;
539
540 return 0;
541 }
542
543 static int
validate_filename(const char * file)544 validate_filename(const char *file)
545 {
546
547 if ('.' == file[0] && '/' == file[1])
548 file += 2;
549
550 return ! (strstr(file, "../") || strstr(file, "/..") ||
551 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
552 }
553
554 static void
pg_index(const struct req * req)555 pg_index(const struct req *req)
556 {
557
558 resp_begin_html(200, NULL, NULL);
559 resp_searchform(req, FOCUS_QUERY);
560 printf("<p>\n"
561 "This web interface is documented in the\n"
562 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
563 "manual, and the\n"
564 "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
565 "manual explains the query syntax.\n"
566 "</p>\n",
567 scriptname, *scriptname == '\0' ? "" : "/",
568 scriptname, *scriptname == '\0' ? "" : "/");
569 resp_end_html();
570 }
571
572 static void
pg_noresult(const struct req * req,int code,const char * http_msg,const char * user_msg)573 pg_noresult(const struct req *req, int code, const char *http_msg,
574 const char *user_msg)
575 {
576 resp_begin_html(code, http_msg, NULL);
577 resp_searchform(req, FOCUS_QUERY);
578 puts("<p>");
579 puts(user_msg);
580 puts("</p>");
581 resp_end_html();
582 }
583
584 static void
pg_error_badrequest(const char * msg)585 pg_error_badrequest(const char *msg)
586 {
587
588 resp_begin_html(400, "Bad Request", NULL);
589 puts("<h1>Bad Request</h1>\n"
590 "<p>\n");
591 puts(msg);
592 printf("Try again from the\n"
593 "<a href=\"/%s\">main page</a>.\n"
594 "</p>", scriptname);
595 resp_end_html();
596 }
597
598 static void
pg_error_internal(void)599 pg_error_internal(void)
600 {
601 resp_begin_html(500, "Internal Server Error", NULL);
602 puts("<p>Internal Server Error</p>");
603 resp_end_html();
604 }
605
606 static void
pg_redirect(const struct req * req,const char * name)607 pg_redirect(const struct req *req, const char *name)
608 {
609 printf("Status: 303 See Other\r\n"
610 "Location: /");
611 if (*scriptname != '\0')
612 printf("%s/", scriptname);
613 if (strcmp(req->q.manpath, req->p[0]))
614 printf("%s/", req->q.manpath);
615 if (req->q.arch != NULL)
616 printf("%s/", req->q.arch);
617 http_encode(name);
618 if (req->q.sec != NULL) {
619 putchar('.');
620 http_encode(req->q.sec);
621 }
622 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
623 }
624
625 static void
pg_searchres(const struct req * req,struct manpage * r,size_t sz)626 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
627 {
628 char *arch, *archend;
629 const char *sec;
630 size_t i, iuse;
631 int archprio, archpriouse;
632 int prio, priouse;
633
634 for (i = 0; i < sz; i++) {
635 if (validate_filename(r[i].file))
636 continue;
637 warnx("invalid filename %s in %s database",
638 r[i].file, req->q.manpath);
639 pg_error_internal();
640 return;
641 }
642
643 if (req->isquery && sz == 1) {
644 /*
645 * If we have just one result, then jump there now
646 * without any delay.
647 */
648 printf("Status: 303 See Other\r\n"
649 "Location: /");
650 if (*scriptname != '\0')
651 printf("%s/", scriptname);
652 if (strcmp(req->q.manpath, req->p[0]))
653 printf("%s/", req->q.manpath);
654 printf("%s\r\n"
655 "Content-Type: text/html; charset=utf-8\r\n\r\n",
656 r[0].file);
657 return;
658 }
659
660 /*
661 * In man(1) mode, show one of the pages
662 * even if more than one is found.
663 */
664
665 iuse = 0;
666 if (req->q.equal || sz == 1) {
667 priouse = 20;
668 archpriouse = 3;
669 for (i = 0; i < sz; i++) {
670 sec = r[i].file;
671 sec += strcspn(sec, "123456789");
672 if (sec[0] == '\0')
673 continue;
674 prio = sec_prios[sec[0] - '1'];
675 if (sec[1] != '/')
676 prio += 10;
677 if (req->q.arch == NULL) {
678 archprio =
679 ((arch = strchr(sec + 1, '/'))
680 == NULL) ? 3 :
681 ((archend = strchr(arch + 1, '/'))
682 == NULL) ? 0 :
683 strncmp(arch, "amd64/",
684 archend - arch) ? 2 : 1;
685 if (archprio < archpriouse) {
686 archpriouse = archprio;
687 priouse = prio;
688 iuse = i;
689 continue;
690 }
691 if (archprio > archpriouse)
692 continue;
693 }
694 if (prio >= priouse)
695 continue;
696 priouse = prio;
697 iuse = i;
698 }
699 resp_begin_html(200, NULL, r[iuse].file);
700 } else
701 resp_begin_html(200, NULL, NULL);
702
703 resp_searchform(req,
704 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
705
706 if (sz > 1) {
707 puts("<table class=\"results\">");
708 for (i = 0; i < sz; i++) {
709 printf(" <tr>\n"
710 " <td>"
711 "<a class=\"Xr\" href=\"/");
712 if (*scriptname != '\0')
713 printf("%s/", scriptname);
714 if (strcmp(req->q.manpath, req->p[0]))
715 printf("%s/", req->q.manpath);
716 printf("%s\">", r[i].file);
717 html_print(r[i].names);
718 printf("</a></td>\n"
719 " <td><span class=\"Nd\">");
720 html_print(r[i].output);
721 puts("</span></td>\n"
722 " </tr>");
723 }
724 puts("</table>");
725 }
726
727 if (req->q.equal || sz == 1) {
728 puts("<hr>");
729 resp_show(req, r[iuse].file);
730 }
731
732 resp_end_html();
733 }
734
735 static void
resp_catman(const struct req * req,const char * file)736 resp_catman(const struct req *req, const char *file)
737 {
738 FILE *f;
739 char *p;
740 size_t sz;
741 ssize_t len;
742 int i;
743 int italic, bold;
744
745 if ((f = fopen(file, "r")) == NULL) {
746 puts("<p>You specified an invalid manual file.</p>");
747 return;
748 }
749
750 puts("<div class=\"catman\">\n"
751 "<pre>");
752
753 p = NULL;
754 sz = 0;
755
756 while ((len = getline(&p, &sz, f)) != -1) {
757 bold = italic = 0;
758 for (i = 0; i < len - 1; i++) {
759 /*
760 * This means that the catpage is out of state.
761 * Ignore it and keep going (although the
762 * catpage is bogus).
763 */
764
765 if ('\b' == p[i] || '\n' == p[i])
766 continue;
767
768 /*
769 * Print a regular character.
770 * Close out any bold/italic scopes.
771 * If we're in back-space mode, make sure we'll
772 * have something to enter when we backspace.
773 */
774
775 if ('\b' != p[i + 1]) {
776 if (italic)
777 printf("</i>");
778 if (bold)
779 printf("</b>");
780 italic = bold = 0;
781 html_putchar(p[i]);
782 continue;
783 } else if (i + 2 >= len)
784 continue;
785
786 /* Italic mode. */
787
788 if ('_' == p[i]) {
789 if (bold)
790 printf("</b>");
791 if ( ! italic)
792 printf("<i>");
793 bold = 0;
794 italic = 1;
795 i += 2;
796 html_putchar(p[i]);
797 continue;
798 }
799
800 /*
801 * Handle funny behaviour troff-isms.
802 * These grok'd from the original man2html.c.
803 */
804
805 if (('+' == p[i] && 'o' == p[i + 2]) ||
806 ('o' == p[i] && '+' == p[i + 2]) ||
807 ('|' == p[i] && '=' == p[i + 2]) ||
808 ('=' == p[i] && '|' == p[i + 2]) ||
809 ('*' == p[i] && '=' == p[i + 2]) ||
810 ('=' == p[i] && '*' == p[i + 2]) ||
811 ('*' == p[i] && '|' == p[i + 2]) ||
812 ('|' == p[i] && '*' == p[i + 2])) {
813 if (italic)
814 printf("</i>");
815 if (bold)
816 printf("</b>");
817 italic = bold = 0;
818 putchar('*');
819 i += 2;
820 continue;
821 } else if (('|' == p[i] && '-' == p[i + 2]) ||
822 ('-' == p[i] && '|' == p[i + 1]) ||
823 ('+' == p[i] && '-' == p[i + 1]) ||
824 ('-' == p[i] && '+' == p[i + 1]) ||
825 ('+' == p[i] && '|' == p[i + 1]) ||
826 ('|' == p[i] && '+' == p[i + 1])) {
827 if (italic)
828 printf("</i>");
829 if (bold)
830 printf("</b>");
831 italic = bold = 0;
832 putchar('+');
833 i += 2;
834 continue;
835 }
836
837 /* Bold mode. */
838
839 if (italic)
840 printf("</i>");
841 if ( ! bold)
842 printf("<b>");
843 bold = 1;
844 italic = 0;
845 i += 2;
846 html_putchar(p[i]);
847 }
848
849 /*
850 * Clean up the last character.
851 * We can get to a newline; don't print that.
852 */
853
854 if (italic)
855 printf("</i>");
856 if (bold)
857 printf("</b>");
858
859 if (i == len - 1 && p[i] != '\n')
860 html_putchar(p[i]);
861
862 putchar('\n');
863 }
864 free(p);
865
866 puts("</pre>\n"
867 "</div>");
868
869 fclose(f);
870 }
871
872 static void
resp_format(const struct req * req,const char * file)873 resp_format(const struct req *req, const char *file)
874 {
875 struct manoutput conf;
876 struct mparse *mp;
877 struct roff_meta *meta;
878 void *vp;
879 int fd;
880 int usepath;
881
882 if (-1 == (fd = open(file, O_RDONLY, 0))) {
883 puts("<p>You specified an invalid manual file.</p>");
884 return;
885 }
886
887 mchars_alloc();
888 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
889 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
890 mparse_readfd(mp, fd, file);
891 close(fd);
892 meta = mparse_result(mp);
893
894 memset(&conf, 0, sizeof(conf));
895 conf.fragment = 1;
896 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
897 usepath = strcmp(req->q.manpath, req->p[0]);
898 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
899 scriptname, *scriptname == '\0' ? "" : "/",
900 usepath ? req->q.manpath : "", usepath ? "/" : "");
901
902 vp = html_alloc(&conf);
903 if (meta->macroset == MACROSET_MDOC)
904 html_mdoc(vp, meta);
905 else
906 html_man(vp, meta);
907
908 html_free(vp);
909 mparse_free(mp);
910 mchars_free();
911 free(conf.man);
912 free(conf.style);
913 }
914
915 static void
resp_show(const struct req * req,const char * file)916 resp_show(const struct req *req, const char *file)
917 {
918
919 if ('.' == file[0] && '/' == file[1])
920 file += 2;
921
922 if ('c' == *file)
923 resp_catman(req, file);
924 else
925 resp_format(req, file);
926 }
927
928 static void
pg_show(struct req * req,const char * fullpath)929 pg_show(struct req *req, const char *fullpath)
930 {
931 char *manpath;
932 const char *file;
933
934 if ((file = strchr(fullpath, '/')) == NULL) {
935 pg_error_badrequest(
936 "You did not specify a page to show.");
937 return;
938 }
939 manpath = mandoc_strndup(fullpath, file - fullpath);
940 file++;
941
942 if ( ! validate_manpath(req, manpath)) {
943 pg_error_badrequest(
944 "You specified an invalid manpath.");
945 free(manpath);
946 return;
947 }
948
949 /*
950 * Begin by chdir()ing into the manpath.
951 * This way we can pick up the database files, which are
952 * relative to the manpath root.
953 */
954
955 if (chdir(manpath) == -1) {
956 warn("chdir %s", manpath);
957 pg_error_internal();
958 free(manpath);
959 return;
960 }
961 free(manpath);
962
963 if ( ! validate_filename(file)) {
964 pg_error_badrequest(
965 "You specified an invalid manual file.");
966 return;
967 }
968
969 resp_begin_html(200, NULL, file);
970 resp_searchform(req, FOCUS_NONE);
971 resp_show(req, file);
972 resp_end_html();
973 }
974
975 static void
pg_search(const struct req * req)976 pg_search(const struct req *req)
977 {
978 struct mansearch search;
979 struct manpaths paths;
980 struct manpage *res;
981 char **argv;
982 char *query, *rp, *wp;
983 size_t ressz;
984 int argc;
985
986 /*
987 * Begin by chdir()ing into the root of the manpath.
988 * This way we can pick up the database files, which are
989 * relative to the manpath root.
990 */
991
992 if (chdir(req->q.manpath) == -1) {
993 warn("chdir %s", req->q.manpath);
994 pg_error_internal();
995 return;
996 }
997
998 search.arch = req->q.arch;
999 search.sec = req->q.sec;
1000 search.outkey = "Nd";
1001 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1002 search.firstmatch = 1;
1003
1004 paths.sz = 1;
1005 paths.paths = mandoc_malloc(sizeof(char *));
1006 paths.paths[0] = mandoc_strdup(".");
1007
1008 /*
1009 * Break apart at spaces with backslash-escaping.
1010 */
1011
1012 argc = 0;
1013 argv = NULL;
1014 rp = query = mandoc_strdup(req->q.query);
1015 for (;;) {
1016 while (isspace((unsigned char)*rp))
1017 rp++;
1018 if (*rp == '\0')
1019 break;
1020 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1021 argv[argc++] = wp = rp;
1022 for (;;) {
1023 if (isspace((unsigned char)*rp)) {
1024 *wp = '\0';
1025 rp++;
1026 break;
1027 }
1028 if (rp[0] == '\\' && rp[1] != '\0')
1029 rp++;
1030 if (wp != rp)
1031 *wp = *rp;
1032 if (*rp == '\0')
1033 break;
1034 wp++;
1035 rp++;
1036 }
1037 }
1038
1039 res = NULL;
1040 ressz = 0;
1041 if (req->isquery && req->q.equal && argc == 1)
1042 pg_redirect(req, argv[0]);
1043 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1044 pg_noresult(req, 400, "Bad Request",
1045 "You entered an invalid query.");
1046 else if (ressz == 0)
1047 pg_noresult(req, 404, "Not Found", "No results found.");
1048 else
1049 pg_searchres(req, res, ressz);
1050
1051 free(query);
1052 mansearch_free(res, ressz);
1053 free(paths.paths[0]);
1054 free(paths.paths);
1055 }
1056
1057 int
main(void)1058 main(void)
1059 {
1060 struct req req;
1061 struct itimerval itimer;
1062 const char *path;
1063 const char *querystring;
1064 int i;
1065
1066 #if HAVE_PLEDGE
1067 /*
1068 * The "rpath" pledge could be revoked after mparse_readfd()
1069 * if the file desciptor to "/footer.html" would be opened
1070 * up front, but it's probably not worth the complication
1071 * of the code it would cause: it would require scattering
1072 * pledge() calls in multiple low-level resp_*() functions.
1073 */
1074
1075 if (pledge("stdio rpath", NULL) == -1) {
1076 warn("pledge");
1077 pg_error_internal();
1078 return EXIT_FAILURE;
1079 }
1080 #endif
1081
1082 /* Poor man's ReDoS mitigation. */
1083
1084 itimer.it_value.tv_sec = 2;
1085 itimer.it_value.tv_usec = 0;
1086 itimer.it_interval.tv_sec = 2;
1087 itimer.it_interval.tv_usec = 0;
1088 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1089 warn("setitimer");
1090 pg_error_internal();
1091 return EXIT_FAILURE;
1092 }
1093
1094 /*
1095 * First we change directory into the MAN_DIR so that
1096 * subsequent scanning for manpath directories is rooted
1097 * relative to the same position.
1098 */
1099
1100 if (chdir(MAN_DIR) == -1) {
1101 warn("MAN_DIR: %s", MAN_DIR);
1102 pg_error_internal();
1103 return EXIT_FAILURE;
1104 }
1105
1106 memset(&req, 0, sizeof(struct req));
1107 req.q.equal = 1;
1108 parse_manpath_conf(&req);
1109
1110 /* Parse the path info and the query string. */
1111
1112 if ((path = getenv("PATH_INFO")) == NULL)
1113 path = "";
1114 else if (*path == '/')
1115 path++;
1116
1117 if (*path != '\0') {
1118 parse_path_info(&req, path);
1119 if (req.q.manpath == NULL || req.q.sec == NULL ||
1120 *req.q.query == '\0' || access(path, F_OK) == -1)
1121 path = "";
1122 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1123 parse_query_string(&req, querystring);
1124
1125 /* Validate parsed data and add defaults. */
1126
1127 if (req.q.manpath == NULL)
1128 req.q.manpath = mandoc_strdup(req.p[0]);
1129 else if ( ! validate_manpath(&req, req.q.manpath)) {
1130 pg_error_badrequest(
1131 "You specified an invalid manpath.");
1132 return EXIT_FAILURE;
1133 }
1134
1135 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1136 pg_error_badrequest(
1137 "You specified an invalid architecture.");
1138 return EXIT_FAILURE;
1139 }
1140
1141 /* Dispatch to the three different pages. */
1142
1143 if ('\0' != *path)
1144 pg_show(&req, path);
1145 else if (NULL != req.q.query)
1146 pg_search(&req);
1147 else
1148 pg_index(&req);
1149
1150 free(req.q.manpath);
1151 free(req.q.arch);
1152 free(req.q.sec);
1153 free(req.q.query);
1154 for (i = 0; i < (int)req.psz; i++)
1155 free(req.p[i]);
1156 free(req.p);
1157 return EXIT_SUCCESS;
1158 }
1159
1160 /*
1161 * Translate PATH_INFO to a query.
1162 */
1163 static void
parse_path_info(struct req * req,const char * path)1164 parse_path_info(struct req *req, const char *path)
1165 {
1166 const char *name, *sec, *end;
1167
1168 req->isquery = 0;
1169 req->q.equal = 1;
1170 req->q.manpath = NULL;
1171 req->q.arch = NULL;
1172
1173 /* Mandatory manual page name. */
1174 if ((name = strrchr(path, '/')) == NULL)
1175 name = path;
1176 else
1177 name++;
1178
1179 /* Optional trailing section. */
1180 sec = strrchr(name, '.');
1181 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1182 req->q.query = mandoc_strndup(name, sec - name - 1);
1183 req->q.sec = mandoc_strdup(sec);
1184 } else {
1185 req->q.query = mandoc_strdup(name);
1186 req->q.sec = NULL;
1187 }
1188
1189 /* Handle the case of name[.section] only. */
1190 if (name == path)
1191 return;
1192
1193 /* Optional manpath. */
1194 end = strchr(path, '/');
1195 req->q.manpath = mandoc_strndup(path, end - path);
1196 if (validate_manpath(req, req->q.manpath)) {
1197 path = end + 1;
1198 if (name == path)
1199 return;
1200 } else {
1201 free(req->q.manpath);
1202 req->q.manpath = NULL;
1203 }
1204
1205 /* Optional section. */
1206 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1207 path += 3;
1208 end = strchr(path, '/');
1209 free(req->q.sec);
1210 req->q.sec = mandoc_strndup(path, end - path);
1211 path = end + 1;
1212 if (name == path)
1213 return;
1214 }
1215
1216 /* Optional architecture. */
1217 end = strchr(path, '/');
1218 if (end + 1 != name) {
1219 pg_error_badrequest(
1220 "You specified too many directory components.");
1221 exit(EXIT_FAILURE);
1222 }
1223 req->q.arch = mandoc_strndup(path, end - path);
1224 if (validate_arch(req->q.arch) == 0) {
1225 pg_error_badrequest(
1226 "You specified an invalid directory component.");
1227 exit(EXIT_FAILURE);
1228 }
1229 }
1230
1231 /*
1232 * Scan for indexable paths.
1233 */
1234 static void
parse_manpath_conf(struct req * req)1235 parse_manpath_conf(struct req *req)
1236 {
1237 FILE *fp;
1238 char *dp;
1239 size_t dpsz;
1240 ssize_t len;
1241
1242 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1243 warn("%s/manpath.conf", MAN_DIR);
1244 pg_error_internal();
1245 exit(EXIT_FAILURE);
1246 }
1247
1248 dp = NULL;
1249 dpsz = 0;
1250
1251 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1252 if (dp[len - 1] == '\n')
1253 dp[--len] = '\0';
1254 req->p = mandoc_realloc(req->p,
1255 (req->psz + 1) * sizeof(char *));
1256 if ( ! validate_urifrag(dp)) {
1257 warnx("%s/manpath.conf contains "
1258 "unsafe path \"%s\"", MAN_DIR, dp);
1259 pg_error_internal();
1260 exit(EXIT_FAILURE);
1261 }
1262 if (strchr(dp, '/') != NULL) {
1263 warnx("%s/manpath.conf contains "
1264 "path with slash \"%s\"", MAN_DIR, dp);
1265 pg_error_internal();
1266 exit(EXIT_FAILURE);
1267 }
1268 req->p[req->psz++] = dp;
1269 dp = NULL;
1270 dpsz = 0;
1271 }
1272 free(dp);
1273
1274 if (req->p == NULL) {
1275 warnx("%s/manpath.conf is empty", MAN_DIR);
1276 pg_error_internal();
1277 exit(EXIT_FAILURE);
1278 }
1279 }
1280