xref: /freebsd/contrib/mandoc/cgi.c (revision 731d06abf2105cc0873fa84e972178f9f37ca760)
1 /*	$Id: cgi.c,v 1.166 2019/03/06 12:32:41 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "mandoc_parse.h"
42 #include "main.h"
43 #include "manconf.h"
44 #include "mansearch.h"
45 #include "cgi.h"
46 
47 /*
48  * A query as passed to the search function.
49  */
50 struct	query {
51 	char		*manpath; /* desired manual directory */
52 	char		*arch; /* architecture */
53 	char		*sec; /* manual section */
54 	char		*query; /* unparsed query expression */
55 	int		 equal; /* match whole names, not substrings */
56 };
57 
58 struct	req {
59 	struct query	  q;
60 	char		**p; /* array of available manpaths */
61 	size_t		  psz; /* number of available manpaths */
62 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
63 };
64 
65 enum	focus {
66 	FOCUS_NONE = 0,
67 	FOCUS_QUERY
68 };
69 
70 static	void		 html_print(const char *);
71 static	void		 html_putchar(char);
72 static	int		 http_decode(char *);
73 static	void		 http_encode(const char *p);
74 static	void		 parse_manpath_conf(struct req *);
75 static	void		 parse_path_info(struct req *req, const char *path);
76 static	void		 parse_query_string(struct req *, const char *);
77 static	void		 pg_error_badrequest(const char *);
78 static	void		 pg_error_internal(void);
79 static	void		 pg_index(const struct req *);
80 static	void		 pg_noresult(const struct req *, const char *);
81 static	void		 pg_redirect(const struct req *, const char *);
82 static	void		 pg_search(const struct req *);
83 static	void		 pg_searchres(const struct req *,
84 				struct manpage *, size_t);
85 static	void		 pg_show(struct req *, const char *);
86 static	void		 resp_begin_html(int, const char *, const char *);
87 static	void		 resp_begin_http(int, const char *);
88 static	void		 resp_catman(const struct req *, const char *);
89 static	void		 resp_copy(const char *);
90 static	void		 resp_end_html(void);
91 static	void		 resp_format(const struct req *, const char *);
92 static	void		 resp_searchform(const struct req *, enum focus);
93 static	void		 resp_show(const struct req *, const char *);
94 static	void		 set_query_attr(char **, char **);
95 static	int		 validate_arch(const char *);
96 static	int		 validate_filename(const char *);
97 static	int		 validate_manpath(const struct req *, const char *);
98 static	int		 validate_urifrag(const char *);
99 
100 static	const char	 *scriptname = SCRIPT_NAME;
101 
102 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
103 static	const char *const sec_numbers[] = {
104     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
105 };
106 static	const char *const sec_names[] = {
107     "All Sections",
108     "1 - General Commands",
109     "2 - System Calls",
110     "3 - Library Functions",
111     "3p - Perl Library",
112     "4 - Device Drivers",
113     "5 - File Formats",
114     "6 - Games",
115     "7 - Miscellaneous Information",
116     "8 - System Manager\'s Manual",
117     "9 - Kernel Developer\'s Manual"
118 };
119 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
120 
121 static	const char *const arch_names[] = {
122     "amd64",       "alpha",       "armv7",	"arm64",
123     "hppa",        "i386",        "landisk",
124     "loongson",    "luna88k",     "macppc",      "mips64",
125     "octeon",      "sgi",         "socppc",      "sparc64",
126     "amiga",       "arc",         "armish",      "arm32",
127     "atari",       "aviion",      "beagle",      "cats",
128     "hppa64",      "hp300",
129     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
130     "mvmeppc",     "palm",        "pc532",       "pegasos",
131     "pmax",        "powerpc",     "solbourne",   "sparc",
132     "sun3",        "vax",         "wgrisc",      "x68k",
133     "zaurus"
134 };
135 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
136 
137 /*
138  * Print a character, escaping HTML along the way.
139  * This will pass non-ASCII straight to output: be warned!
140  */
141 static void
142 html_putchar(char c)
143 {
144 
145 	switch (c) {
146 	case '"':
147 		printf("&quot;");
148 		break;
149 	case '&':
150 		printf("&amp;");
151 		break;
152 	case '>':
153 		printf("&gt;");
154 		break;
155 	case '<':
156 		printf("&lt;");
157 		break;
158 	default:
159 		putchar((unsigned char)c);
160 		break;
161 	}
162 }
163 
164 /*
165  * Call through to html_putchar().
166  * Accepts NULL strings.
167  */
168 static void
169 html_print(const char *p)
170 {
171 
172 	if (NULL == p)
173 		return;
174 	while ('\0' != *p)
175 		html_putchar(*p++);
176 }
177 
178 /*
179  * Transfer the responsibility for the allocated string *val
180  * to the query structure.
181  */
182 static void
183 set_query_attr(char **attr, char **val)
184 {
185 
186 	free(*attr);
187 	if (**val == '\0') {
188 		*attr = NULL;
189 		free(*val);
190 	} else
191 		*attr = *val;
192 	*val = NULL;
193 }
194 
195 /*
196  * Parse the QUERY_STRING for key-value pairs
197  * and store the values into the query structure.
198  */
199 static void
200 parse_query_string(struct req *req, const char *qs)
201 {
202 	char		*key, *val;
203 	size_t		 keysz, valsz;
204 
205 	req->isquery	= 1;
206 	req->q.manpath	= NULL;
207 	req->q.arch	= NULL;
208 	req->q.sec	= NULL;
209 	req->q.query	= NULL;
210 	req->q.equal	= 1;
211 
212 	key = val = NULL;
213 	while (*qs != '\0') {
214 
215 		/* Parse one key. */
216 
217 		keysz = strcspn(qs, "=;&");
218 		key = mandoc_strndup(qs, keysz);
219 		qs += keysz;
220 		if (*qs != '=')
221 			goto next;
222 
223 		/* Parse one value. */
224 
225 		valsz = strcspn(++qs, ";&");
226 		val = mandoc_strndup(qs, valsz);
227 		qs += valsz;
228 
229 		/* Decode and catch encoding errors. */
230 
231 		if ( ! (http_decode(key) && http_decode(val)))
232 			goto next;
233 
234 		/* Handle key-value pairs. */
235 
236 		if ( ! strcmp(key, "query"))
237 			set_query_attr(&req->q.query, &val);
238 
239 		else if ( ! strcmp(key, "apropos"))
240 			req->q.equal = !strcmp(val, "0");
241 
242 		else if ( ! strcmp(key, "manpath")) {
243 #ifdef COMPAT_OLDURI
244 			if ( ! strncmp(val, "OpenBSD ", 8)) {
245 				val[7] = '-';
246 				if ('C' == val[8])
247 					val[8] = 'c';
248 			}
249 #endif
250 			set_query_attr(&req->q.manpath, &val);
251 		}
252 
253 		else if ( ! (strcmp(key, "sec")
254 #ifdef COMPAT_OLDURI
255 		    && strcmp(key, "sektion")
256 #endif
257 		    )) {
258 			if ( ! strcmp(val, "0"))
259 				*val = '\0';
260 			set_query_attr(&req->q.sec, &val);
261 		}
262 
263 		else if ( ! strcmp(key, "arch")) {
264 			if ( ! strcmp(val, "default"))
265 				*val = '\0';
266 			set_query_attr(&req->q.arch, &val);
267 		}
268 
269 		/*
270 		 * The key must be freed in any case.
271 		 * The val may have been handed over to the query
272 		 * structure, in which case it is now NULL.
273 		 */
274 next:
275 		free(key);
276 		key = NULL;
277 		free(val);
278 		val = NULL;
279 
280 		if (*qs != '\0')
281 			qs++;
282 	}
283 }
284 
285 /*
286  * HTTP-decode a string.  The standard explanation is that this turns
287  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
288  * over the allocated string.
289  */
290 static int
291 http_decode(char *p)
292 {
293 	char             hex[3];
294 	char		*q;
295 	int              c;
296 
297 	hex[2] = '\0';
298 
299 	q = p;
300 	for ( ; '\0' != *p; p++, q++) {
301 		if ('%' == *p) {
302 			if ('\0' == (hex[0] = *(p + 1)))
303 				return 0;
304 			if ('\0' == (hex[1] = *(p + 2)))
305 				return 0;
306 			if (1 != sscanf(hex, "%x", &c))
307 				return 0;
308 			if ('\0' == c)
309 				return 0;
310 
311 			*q = (char)c;
312 			p += 2;
313 		} else
314 			*q = '+' == *p ? ' ' : *p;
315 	}
316 
317 	*q = '\0';
318 	return 1;
319 }
320 
321 static void
322 http_encode(const char *p)
323 {
324 	for (; *p != '\0'; p++) {
325 		if (isalnum((unsigned char)*p) == 0 &&
326 		    strchr("-._~", *p) == NULL)
327 			printf("%%%2.2X", (unsigned char)*p);
328 		else
329 			putchar(*p);
330 	}
331 }
332 
333 static void
334 resp_begin_http(int code, const char *msg)
335 {
336 
337 	if (200 != code)
338 		printf("Status: %d %s\r\n", code, msg);
339 
340 	printf("Content-Type: text/html; charset=utf-8\r\n"
341 	     "Cache-Control: no-cache\r\n"
342 	     "Pragma: no-cache\r\n"
343 	     "\r\n");
344 
345 	fflush(stdout);
346 }
347 
348 static void
349 resp_copy(const char *filename)
350 {
351 	char	 buf[4096];
352 	ssize_t	 sz;
353 	int	 fd;
354 
355 	if ((fd = open(filename, O_RDONLY)) != -1) {
356 		fflush(stdout);
357 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
358 			write(STDOUT_FILENO, buf, sz);
359 		close(fd);
360 	}
361 }
362 
363 static void
364 resp_begin_html(int code, const char *msg, const char *file)
365 {
366 	char	*cp;
367 
368 	resp_begin_http(code, msg);
369 
370 	printf("<!DOCTYPE html>\n"
371 	       "<html>\n"
372 	       "<head>\n"
373 	       "  <meta charset=\"UTF-8\"/>\n"
374 	       "  <meta name=\"viewport\""
375 		      " content=\"width=device-width, initial-scale=1.0\">\n"
376 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
377 	       " type=\"text/css\" media=\"all\">\n"
378 	       "  <title>",
379 	       CSS_DIR);
380 	if (file != NULL) {
381 		if ((cp = strrchr(file, '/')) != NULL)
382 			file = cp + 1;
383 		if ((cp = strrchr(file, '.')) != NULL) {
384 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
385 		} else
386 			printf("%s - ", file);
387 	}
388 	printf("%s</title>\n"
389 	       "</head>\n"
390 	       "<body>\n",
391 	       CUSTOMIZE_TITLE);
392 
393 	resp_copy(MAN_DIR "/header.html");
394 }
395 
396 static void
397 resp_end_html(void)
398 {
399 
400 	resp_copy(MAN_DIR "/footer.html");
401 
402 	puts("</body>\n"
403 	     "</html>");
404 }
405 
406 static void
407 resp_searchform(const struct req *req, enum focus focus)
408 {
409 	int		 i;
410 
411 	printf("<form action=\"/%s\" method=\"get\">\n"
412 	       "  <fieldset>\n"
413 	       "    <legend>Manual Page Search Parameters</legend>\n",
414 	       scriptname);
415 
416 	/* Write query input box. */
417 
418 	printf("    <input type=\"search\" name=\"query\" value=\"");
419 	if (req->q.query != NULL)
420 		html_print(req->q.query);
421 	printf( "\" size=\"40\"");
422 	if (focus == FOCUS_QUERY)
423 		printf(" autofocus");
424 	puts(">");
425 
426 	/* Write submission buttons. */
427 
428 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
429 		"man</button>\n"
430 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
431 		"apropos</button>\n"
432 		"    <br/>\n");
433 
434 	/* Write section selector. */
435 
436 	puts("    <select name=\"sec\">");
437 	for (i = 0; i < sec_MAX; i++) {
438 		printf("      <option value=\"%s\"", sec_numbers[i]);
439 		if (NULL != req->q.sec &&
440 		    0 == strcmp(sec_numbers[i], req->q.sec))
441 			printf(" selected=\"selected\"");
442 		printf(">%s</option>\n", sec_names[i]);
443 	}
444 	puts("    </select>");
445 
446 	/* Write architecture selector. */
447 
448 	printf(	"    <select name=\"arch\">\n"
449 		"      <option value=\"default\"");
450 	if (NULL == req->q.arch)
451 		printf(" selected=\"selected\"");
452 	puts(">All Architectures</option>");
453 	for (i = 0; i < arch_MAX; i++) {
454 		printf("      <option");
455 		if (NULL != req->q.arch &&
456 		    0 == strcmp(arch_names[i], req->q.arch))
457 			printf(" selected=\"selected\"");
458 		printf(">%s</option>\n", arch_names[i]);
459 	}
460 	puts("    </select>");
461 
462 	/* Write manpath selector. */
463 
464 	if (req->psz > 1) {
465 		puts("    <select name=\"manpath\">");
466 		for (i = 0; i < (int)req->psz; i++) {
467 			printf("      <option");
468 			if (strcmp(req->q.manpath, req->p[i]) == 0)
469 				printf(" selected=\"selected\"");
470 			printf(">");
471 			html_print(req->p[i]);
472 			puts("</option>");
473 		}
474 		puts("    </select>");
475 	}
476 
477 	puts("  </fieldset>\n"
478 	     "</form>");
479 }
480 
481 static int
482 validate_urifrag(const char *frag)
483 {
484 
485 	while ('\0' != *frag) {
486 		if ( ! (isalnum((unsigned char)*frag) ||
487 		    '-' == *frag || '.' == *frag ||
488 		    '/' == *frag || '_' == *frag))
489 			return 0;
490 		frag++;
491 	}
492 	return 1;
493 }
494 
495 static int
496 validate_manpath(const struct req *req, const char* manpath)
497 {
498 	size_t	 i;
499 
500 	for (i = 0; i < req->psz; i++)
501 		if ( ! strcmp(manpath, req->p[i]))
502 			return 1;
503 
504 	return 0;
505 }
506 
507 static int
508 validate_arch(const char *arch)
509 {
510 	int	 i;
511 
512 	for (i = 0; i < arch_MAX; i++)
513 		if (strcmp(arch, arch_names[i]) == 0)
514 			return 1;
515 
516 	return 0;
517 }
518 
519 static int
520 validate_filename(const char *file)
521 {
522 
523 	if ('.' == file[0] && '/' == file[1])
524 		file += 2;
525 
526 	return ! (strstr(file, "../") || strstr(file, "/..") ||
527 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
528 }
529 
530 static void
531 pg_index(const struct req *req)
532 {
533 
534 	resp_begin_html(200, NULL, NULL);
535 	resp_searchform(req, FOCUS_QUERY);
536 	printf("<p>\n"
537 	       "This web interface is documented in the\n"
538 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
539 	       "manual, and the\n"
540 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
541 	       "manual explains the query syntax.\n"
542 	       "</p>\n",
543 	       scriptname, *scriptname == '\0' ? "" : "/",
544 	       scriptname, *scriptname == '\0' ? "" : "/");
545 	resp_end_html();
546 }
547 
548 static void
549 pg_noresult(const struct req *req, const char *msg)
550 {
551 	resp_begin_html(200, NULL, NULL);
552 	resp_searchform(req, FOCUS_QUERY);
553 	puts("<p>");
554 	puts(msg);
555 	puts("</p>");
556 	resp_end_html();
557 }
558 
559 static void
560 pg_error_badrequest(const char *msg)
561 {
562 
563 	resp_begin_html(400, "Bad Request", NULL);
564 	puts("<h1>Bad Request</h1>\n"
565 	     "<p>\n");
566 	puts(msg);
567 	printf("Try again from the\n"
568 	       "<a href=\"/%s\">main page</a>.\n"
569 	       "</p>", scriptname);
570 	resp_end_html();
571 }
572 
573 static void
574 pg_error_internal(void)
575 {
576 	resp_begin_html(500, "Internal Server Error", NULL);
577 	puts("<p>Internal Server Error</p>");
578 	resp_end_html();
579 }
580 
581 static void
582 pg_redirect(const struct req *req, const char *name)
583 {
584 	printf("Status: 303 See Other\r\n"
585 	    "Location: /");
586 	if (*scriptname != '\0')
587 		printf("%s/", scriptname);
588 	if (strcmp(req->q.manpath, req->p[0]))
589 		printf("%s/", req->q.manpath);
590 	if (req->q.arch != NULL)
591 		printf("%s/", req->q.arch);
592 	http_encode(name);
593 	if (req->q.sec != NULL) {
594 		putchar('.');
595 		http_encode(req->q.sec);
596 	}
597 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
598 }
599 
600 static void
601 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
602 {
603 	char		*arch, *archend;
604 	const char	*sec;
605 	size_t		 i, iuse;
606 	int		 archprio, archpriouse;
607 	int		 prio, priouse;
608 
609 	for (i = 0; i < sz; i++) {
610 		if (validate_filename(r[i].file))
611 			continue;
612 		warnx("invalid filename %s in %s database",
613 		    r[i].file, req->q.manpath);
614 		pg_error_internal();
615 		return;
616 	}
617 
618 	if (req->isquery && sz == 1) {
619 		/*
620 		 * If we have just one result, then jump there now
621 		 * without any delay.
622 		 */
623 		printf("Status: 303 See Other\r\n"
624 		    "Location: /");
625 		if (*scriptname != '\0')
626 			printf("%s/", scriptname);
627 		if (strcmp(req->q.manpath, req->p[0]))
628 			printf("%s/", req->q.manpath);
629 		printf("%s\r\n"
630 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
631 		    r[0].file);
632 		return;
633 	}
634 
635 	/*
636 	 * In man(1) mode, show one of the pages
637 	 * even if more than one is found.
638 	 */
639 
640 	iuse = 0;
641 	if (req->q.equal || sz == 1) {
642 		priouse = 20;
643 		archpriouse = 3;
644 		for (i = 0; i < sz; i++) {
645 			sec = r[i].file;
646 			sec += strcspn(sec, "123456789");
647 			if (sec[0] == '\0')
648 				continue;
649 			prio = sec_prios[sec[0] - '1'];
650 			if (sec[1] != '/')
651 				prio += 10;
652 			if (req->q.arch == NULL) {
653 				archprio =
654 				    ((arch = strchr(sec + 1, '/'))
655 					== NULL) ? 3 :
656 				    ((archend = strchr(arch + 1, '/'))
657 					== NULL) ? 0 :
658 				    strncmp(arch, "amd64/",
659 					archend - arch) ? 2 : 1;
660 				if (archprio < archpriouse) {
661 					archpriouse = archprio;
662 					priouse = prio;
663 					iuse = i;
664 					continue;
665 				}
666 				if (archprio > archpriouse)
667 					continue;
668 			}
669 			if (prio >= priouse)
670 				continue;
671 			priouse = prio;
672 			iuse = i;
673 		}
674 		resp_begin_html(200, NULL, r[iuse].file);
675 	} else
676 		resp_begin_html(200, NULL, NULL);
677 
678 	resp_searchform(req,
679 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
680 
681 	if (sz > 1) {
682 		puts("<table class=\"results\">");
683 		for (i = 0; i < sz; i++) {
684 			printf("  <tr>\n"
685 			       "    <td>"
686 			       "<a class=\"Xr\" href=\"/");
687 			if (*scriptname != '\0')
688 				printf("%s/", scriptname);
689 			if (strcmp(req->q.manpath, req->p[0]))
690 				printf("%s/", req->q.manpath);
691 			printf("%s\">", r[i].file);
692 			html_print(r[i].names);
693 			printf("</a></td>\n"
694 			       "    <td><span class=\"Nd\">");
695 			html_print(r[i].output);
696 			puts("</span></td>\n"
697 			     "  </tr>");
698 		}
699 		puts("</table>");
700 	}
701 
702 	if (req->q.equal || sz == 1) {
703 		puts("<hr>");
704 		resp_show(req, r[iuse].file);
705 	}
706 
707 	resp_end_html();
708 }
709 
710 static void
711 resp_catman(const struct req *req, const char *file)
712 {
713 	FILE		*f;
714 	char		*p;
715 	size_t		 sz;
716 	ssize_t		 len;
717 	int		 i;
718 	int		 italic, bold;
719 
720 	if ((f = fopen(file, "r")) == NULL) {
721 		puts("<p>You specified an invalid manual file.</p>");
722 		return;
723 	}
724 
725 	puts("<div class=\"catman\">\n"
726 	     "<pre>");
727 
728 	p = NULL;
729 	sz = 0;
730 
731 	while ((len = getline(&p, &sz, f)) != -1) {
732 		bold = italic = 0;
733 		for (i = 0; i < len - 1; i++) {
734 			/*
735 			 * This means that the catpage is out of state.
736 			 * Ignore it and keep going (although the
737 			 * catpage is bogus).
738 			 */
739 
740 			if ('\b' == p[i] || '\n' == p[i])
741 				continue;
742 
743 			/*
744 			 * Print a regular character.
745 			 * Close out any bold/italic scopes.
746 			 * If we're in back-space mode, make sure we'll
747 			 * have something to enter when we backspace.
748 			 */
749 
750 			if ('\b' != p[i + 1]) {
751 				if (italic)
752 					printf("</i>");
753 				if (bold)
754 					printf("</b>");
755 				italic = bold = 0;
756 				html_putchar(p[i]);
757 				continue;
758 			} else if (i + 2 >= len)
759 				continue;
760 
761 			/* Italic mode. */
762 
763 			if ('_' == p[i]) {
764 				if (bold)
765 					printf("</b>");
766 				if ( ! italic)
767 					printf("<i>");
768 				bold = 0;
769 				italic = 1;
770 				i += 2;
771 				html_putchar(p[i]);
772 				continue;
773 			}
774 
775 			/*
776 			 * Handle funny behaviour troff-isms.
777 			 * These grok'd from the original man2html.c.
778 			 */
779 
780 			if (('+' == p[i] && 'o' == p[i + 2]) ||
781 					('o' == p[i] && '+' == p[i + 2]) ||
782 					('|' == p[i] && '=' == p[i + 2]) ||
783 					('=' == p[i] && '|' == p[i + 2]) ||
784 					('*' == p[i] && '=' == p[i + 2]) ||
785 					('=' == p[i] && '*' == p[i + 2]) ||
786 					('*' == p[i] && '|' == p[i + 2]) ||
787 					('|' == p[i] && '*' == p[i + 2]))  {
788 				if (italic)
789 					printf("</i>");
790 				if (bold)
791 					printf("</b>");
792 				italic = bold = 0;
793 				putchar('*');
794 				i += 2;
795 				continue;
796 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
797 					('-' == p[i] && '|' == p[i + 1]) ||
798 					('+' == p[i] && '-' == p[i + 1]) ||
799 					('-' == p[i] && '+' == p[i + 1]) ||
800 					('+' == p[i] && '|' == p[i + 1]) ||
801 					('|' == p[i] && '+' == p[i + 1]))  {
802 				if (italic)
803 					printf("</i>");
804 				if (bold)
805 					printf("</b>");
806 				italic = bold = 0;
807 				putchar('+');
808 				i += 2;
809 				continue;
810 			}
811 
812 			/* Bold mode. */
813 
814 			if (italic)
815 				printf("</i>");
816 			if ( ! bold)
817 				printf("<b>");
818 			bold = 1;
819 			italic = 0;
820 			i += 2;
821 			html_putchar(p[i]);
822 		}
823 
824 		/*
825 		 * Clean up the last character.
826 		 * We can get to a newline; don't print that.
827 		 */
828 
829 		if (italic)
830 			printf("</i>");
831 		if (bold)
832 			printf("</b>");
833 
834 		if (i == len - 1 && p[i] != '\n')
835 			html_putchar(p[i]);
836 
837 		putchar('\n');
838 	}
839 	free(p);
840 
841 	puts("</pre>\n"
842 	     "</div>");
843 
844 	fclose(f);
845 }
846 
847 static void
848 resp_format(const struct req *req, const char *file)
849 {
850 	struct manoutput conf;
851 	struct mparse	*mp;
852 	struct roff_meta *meta;
853 	void		*vp;
854 	int		 fd;
855 	int		 usepath;
856 
857 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
858 		puts("<p>You specified an invalid manual file.</p>");
859 		return;
860 	}
861 
862 	mchars_alloc();
863 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
864 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
865 	mparse_readfd(mp, fd, file);
866 	close(fd);
867 	meta = mparse_result(mp);
868 
869 	memset(&conf, 0, sizeof(conf));
870 	conf.fragment = 1;
871 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
872 	conf.toc = 1;
873 	usepath = strcmp(req->q.manpath, req->p[0]);
874 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
875 	    scriptname, *scriptname == '\0' ? "" : "/",
876 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
877 
878 	vp = html_alloc(&conf);
879 	if (meta->macroset == MACROSET_MDOC)
880 		html_mdoc(vp, meta);
881 	else
882 		html_man(vp, meta);
883 
884 	html_free(vp);
885 	mparse_free(mp);
886 	mchars_free();
887 	free(conf.man);
888 	free(conf.style);
889 }
890 
891 static void
892 resp_show(const struct req *req, const char *file)
893 {
894 
895 	if ('.' == file[0] && '/' == file[1])
896 		file += 2;
897 
898 	if ('c' == *file)
899 		resp_catman(req, file);
900 	else
901 		resp_format(req, file);
902 }
903 
904 static void
905 pg_show(struct req *req, const char *fullpath)
906 {
907 	char		*manpath;
908 	const char	*file;
909 
910 	if ((file = strchr(fullpath, '/')) == NULL) {
911 		pg_error_badrequest(
912 		    "You did not specify a page to show.");
913 		return;
914 	}
915 	manpath = mandoc_strndup(fullpath, file - fullpath);
916 	file++;
917 
918 	if ( ! validate_manpath(req, manpath)) {
919 		pg_error_badrequest(
920 		    "You specified an invalid manpath.");
921 		free(manpath);
922 		return;
923 	}
924 
925 	/*
926 	 * Begin by chdir()ing into the manpath.
927 	 * This way we can pick up the database files, which are
928 	 * relative to the manpath root.
929 	 */
930 
931 	if (chdir(manpath) == -1) {
932 		warn("chdir %s", manpath);
933 		pg_error_internal();
934 		free(manpath);
935 		return;
936 	}
937 	free(manpath);
938 
939 	if ( ! validate_filename(file)) {
940 		pg_error_badrequest(
941 		    "You specified an invalid manual file.");
942 		return;
943 	}
944 
945 	resp_begin_html(200, NULL, file);
946 	resp_searchform(req, FOCUS_NONE);
947 	resp_show(req, file);
948 	resp_end_html();
949 }
950 
951 static void
952 pg_search(const struct req *req)
953 {
954 	struct mansearch	  search;
955 	struct manpaths		  paths;
956 	struct manpage		 *res;
957 	char			**argv;
958 	char			 *query, *rp, *wp;
959 	size_t			  ressz;
960 	int			  argc;
961 
962 	/*
963 	 * Begin by chdir()ing into the root of the manpath.
964 	 * This way we can pick up the database files, which are
965 	 * relative to the manpath root.
966 	 */
967 
968 	if (chdir(req->q.manpath) == -1) {
969 		warn("chdir %s", req->q.manpath);
970 		pg_error_internal();
971 		return;
972 	}
973 
974 	search.arch = req->q.arch;
975 	search.sec = req->q.sec;
976 	search.outkey = "Nd";
977 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
978 	search.firstmatch = 1;
979 
980 	paths.sz = 1;
981 	paths.paths = mandoc_malloc(sizeof(char *));
982 	paths.paths[0] = mandoc_strdup(".");
983 
984 	/*
985 	 * Break apart at spaces with backslash-escaping.
986 	 */
987 
988 	argc = 0;
989 	argv = NULL;
990 	rp = query = mandoc_strdup(req->q.query);
991 	for (;;) {
992 		while (isspace((unsigned char)*rp))
993 			rp++;
994 		if (*rp == '\0')
995 			break;
996 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
997 		argv[argc++] = wp = rp;
998 		for (;;) {
999 			if (isspace((unsigned char)*rp)) {
1000 				*wp = '\0';
1001 				rp++;
1002 				break;
1003 			}
1004 			if (rp[0] == '\\' && rp[1] != '\0')
1005 				rp++;
1006 			if (wp != rp)
1007 				*wp = *rp;
1008 			if (*rp == '\0')
1009 				break;
1010 			wp++;
1011 			rp++;
1012 		}
1013 	}
1014 
1015 	res = NULL;
1016 	ressz = 0;
1017 	if (req->isquery && req->q.equal && argc == 1)
1018 		pg_redirect(req, argv[0]);
1019 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1020 		pg_noresult(req, "You entered an invalid query.");
1021 	else if (ressz == 0)
1022 		pg_noresult(req, "No results found.");
1023 	else
1024 		pg_searchres(req, res, ressz);
1025 
1026 	free(query);
1027 	mansearch_free(res, ressz);
1028 	free(paths.paths[0]);
1029 	free(paths.paths);
1030 }
1031 
1032 int
1033 main(void)
1034 {
1035 	struct req	 req;
1036 	struct itimerval itimer;
1037 	const char	*path;
1038 	const char	*querystring;
1039 	int		 i;
1040 
1041 #if HAVE_PLEDGE
1042 	/*
1043 	 * The "rpath" pledge could be revoked after mparse_readfd()
1044 	 * if the file desciptor to "/footer.html" would be opened
1045 	 * up front, but it's probably not worth the complication
1046 	 * of the code it would cause: it would require scattering
1047 	 * pledge() calls in multiple low-level resp_*() functions.
1048 	 */
1049 
1050 	if (pledge("stdio rpath", NULL) == -1) {
1051 		warn("pledge");
1052 		pg_error_internal();
1053 		return EXIT_FAILURE;
1054 	}
1055 #endif
1056 
1057 	/* Poor man's ReDoS mitigation. */
1058 
1059 	itimer.it_value.tv_sec = 2;
1060 	itimer.it_value.tv_usec = 0;
1061 	itimer.it_interval.tv_sec = 2;
1062 	itimer.it_interval.tv_usec = 0;
1063 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1064 		warn("setitimer");
1065 		pg_error_internal();
1066 		return EXIT_FAILURE;
1067 	}
1068 
1069 	/*
1070 	 * First we change directory into the MAN_DIR so that
1071 	 * subsequent scanning for manpath directories is rooted
1072 	 * relative to the same position.
1073 	 */
1074 
1075 	if (chdir(MAN_DIR) == -1) {
1076 		warn("MAN_DIR: %s", MAN_DIR);
1077 		pg_error_internal();
1078 		return EXIT_FAILURE;
1079 	}
1080 
1081 	memset(&req, 0, sizeof(struct req));
1082 	req.q.equal = 1;
1083 	parse_manpath_conf(&req);
1084 
1085 	/* Parse the path info and the query string. */
1086 
1087 	if ((path = getenv("PATH_INFO")) == NULL)
1088 		path = "";
1089 	else if (*path == '/')
1090 		path++;
1091 
1092 	if (*path != '\0') {
1093 		parse_path_info(&req, path);
1094 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1095 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1096 			path = "";
1097 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1098 		parse_query_string(&req, querystring);
1099 
1100 	/* Validate parsed data and add defaults. */
1101 
1102 	if (req.q.manpath == NULL)
1103 		req.q.manpath = mandoc_strdup(req.p[0]);
1104 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1105 		pg_error_badrequest(
1106 		    "You specified an invalid manpath.");
1107 		return EXIT_FAILURE;
1108 	}
1109 
1110 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1111 		pg_error_badrequest(
1112 		    "You specified an invalid architecture.");
1113 		return EXIT_FAILURE;
1114 	}
1115 
1116 	/* Dispatch to the three different pages. */
1117 
1118 	if ('\0' != *path)
1119 		pg_show(&req, path);
1120 	else if (NULL != req.q.query)
1121 		pg_search(&req);
1122 	else
1123 		pg_index(&req);
1124 
1125 	free(req.q.manpath);
1126 	free(req.q.arch);
1127 	free(req.q.sec);
1128 	free(req.q.query);
1129 	for (i = 0; i < (int)req.psz; i++)
1130 		free(req.p[i]);
1131 	free(req.p);
1132 	return EXIT_SUCCESS;
1133 }
1134 
1135 /*
1136  * Translate PATH_INFO to a query.
1137  */
1138 static void
1139 parse_path_info(struct req *req, const char *path)
1140 {
1141 	const char	*name, *sec, *end;
1142 
1143 	req->isquery = 0;
1144 	req->q.equal = 1;
1145 	req->q.manpath = NULL;
1146 	req->q.arch = NULL;
1147 
1148 	/* Mandatory manual page name. */
1149 	if ((name = strrchr(path, '/')) == NULL)
1150 		name = path;
1151 	else
1152 		name++;
1153 
1154 	/* Optional trailing section. */
1155 	sec = strrchr(name, '.');
1156 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1157 		req->q.query = mandoc_strndup(name, sec - name - 1);
1158 		req->q.sec = mandoc_strdup(sec);
1159 	} else {
1160 		req->q.query = mandoc_strdup(name);
1161 		req->q.sec = NULL;
1162 	}
1163 
1164 	/* Handle the case of name[.section] only. */
1165 	if (name == path)
1166 		return;
1167 
1168 	/* Optional manpath. */
1169 	end = strchr(path, '/');
1170 	req->q.manpath = mandoc_strndup(path, end - path);
1171 	if (validate_manpath(req, req->q.manpath)) {
1172 		path = end + 1;
1173 		if (name == path)
1174 			return;
1175 	} else {
1176 		free(req->q.manpath);
1177 		req->q.manpath = NULL;
1178 	}
1179 
1180 	/* Optional section. */
1181 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1182 		path += 3;
1183 		end = strchr(path, '/');
1184 		free(req->q.sec);
1185 		req->q.sec = mandoc_strndup(path, end - path);
1186 		path = end + 1;
1187 		if (name == path)
1188 			return;
1189 	}
1190 
1191 	/* Optional architecture. */
1192 	end = strchr(path, '/');
1193 	if (end + 1 != name) {
1194 		pg_error_badrequest(
1195 		    "You specified too many directory components.");
1196 		exit(EXIT_FAILURE);
1197 	}
1198 	req->q.arch = mandoc_strndup(path, end - path);
1199 	if (validate_arch(req->q.arch) == 0) {
1200 		pg_error_badrequest(
1201 		    "You specified an invalid directory component.");
1202 		exit(EXIT_FAILURE);
1203 	}
1204 }
1205 
1206 /*
1207  * Scan for indexable paths.
1208  */
1209 static void
1210 parse_manpath_conf(struct req *req)
1211 {
1212 	FILE	*fp;
1213 	char	*dp;
1214 	size_t	 dpsz;
1215 	ssize_t	 len;
1216 
1217 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1218 		warn("%s/manpath.conf", MAN_DIR);
1219 		pg_error_internal();
1220 		exit(EXIT_FAILURE);
1221 	}
1222 
1223 	dp = NULL;
1224 	dpsz = 0;
1225 
1226 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1227 		if (dp[len - 1] == '\n')
1228 			dp[--len] = '\0';
1229 		req->p = mandoc_realloc(req->p,
1230 		    (req->psz + 1) * sizeof(char *));
1231 		if ( ! validate_urifrag(dp)) {
1232 			warnx("%s/manpath.conf contains "
1233 			    "unsafe path \"%s\"", MAN_DIR, dp);
1234 			pg_error_internal();
1235 			exit(EXIT_FAILURE);
1236 		}
1237 		if (strchr(dp, '/') != NULL) {
1238 			warnx("%s/manpath.conf contains "
1239 			    "path with slash \"%s\"", MAN_DIR, dp);
1240 			pg_error_internal();
1241 			exit(EXIT_FAILURE);
1242 		}
1243 		req->p[req->psz++] = dp;
1244 		dp = NULL;
1245 		dpsz = 0;
1246 	}
1247 	free(dp);
1248 
1249 	if (req->p == NULL) {
1250 		warnx("%s/manpath.conf is empty", MAN_DIR);
1251 		pg_error_internal();
1252 		exit(EXIT_FAILURE);
1253 	}
1254 }
1255