xref: /freebsd/contrib/mandoc/cgi.c (revision 732a02b4e77866604a120a275c082bb6221bd2ff)
1 /*	$Id: cgi.c,v 1.167 2019/07/10 12:49:20 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017, 2018 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "mandoc_parse.h"
42 #include "main.h"
43 #include "manconf.h"
44 #include "mansearch.h"
45 #include "cgi.h"
46 
47 /*
48  * A query as passed to the search function.
49  */
50 struct	query {
51 	char		*manpath; /* desired manual directory */
52 	char		*arch; /* architecture */
53 	char		*sec; /* manual section */
54 	char		*query; /* unparsed query expression */
55 	int		 equal; /* match whole names, not substrings */
56 };
57 
58 struct	req {
59 	struct query	  q;
60 	char		**p; /* array of available manpaths */
61 	size_t		  psz; /* number of available manpaths */
62 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
63 };
64 
65 enum	focus {
66 	FOCUS_NONE = 0,
67 	FOCUS_QUERY
68 };
69 
70 static	void		 html_print(const char *);
71 static	void		 html_putchar(char);
72 static	int		 http_decode(char *);
73 static	void		 http_encode(const char *p);
74 static	void		 parse_manpath_conf(struct req *);
75 static	void		 parse_path_info(struct req *req, const char *path);
76 static	void		 parse_query_string(struct req *, const char *);
77 static	void		 pg_error_badrequest(const char *);
78 static	void		 pg_error_internal(void);
79 static	void		 pg_index(const struct req *);
80 static	void		 pg_noresult(const struct req *, const char *);
81 static	void		 pg_redirect(const struct req *, const char *);
82 static	void		 pg_search(const struct req *);
83 static	void		 pg_searchres(const struct req *,
84 				struct manpage *, size_t);
85 static	void		 pg_show(struct req *, const char *);
86 static	void		 resp_begin_html(int, const char *, const char *);
87 static	void		 resp_begin_http(int, const char *);
88 static	void		 resp_catman(const struct req *, const char *);
89 static	void		 resp_copy(const char *);
90 static	void		 resp_end_html(void);
91 static	void		 resp_format(const struct req *, const char *);
92 static	void		 resp_searchform(const struct req *, enum focus);
93 static	void		 resp_show(const struct req *, const char *);
94 static	void		 set_query_attr(char **, char **);
95 static	int		 validate_arch(const char *);
96 static	int		 validate_filename(const char *);
97 static	int		 validate_manpath(const struct req *, const char *);
98 static	int		 validate_urifrag(const char *);
99 
100 static	const char	 *scriptname = SCRIPT_NAME;
101 
102 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
103 static	const char *const sec_numbers[] = {
104     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
105 };
106 static	const char *const sec_names[] = {
107     "All Sections",
108     "1 - General Commands",
109     "2 - System Calls",
110     "3 - Library Functions",
111     "3p - Perl Library",
112     "4 - Device Drivers",
113     "5 - File Formats",
114     "6 - Games",
115     "7 - Miscellaneous Information",
116     "8 - System Manager\'s Manual",
117     "9 - Kernel Developer\'s Manual"
118 };
119 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
120 
121 static	const char *const arch_names[] = {
122     "amd64",       "alpha",       "armv7",	"arm64",
123     "hppa",        "i386",        "landisk",
124     "loongson",    "luna88k",     "macppc",      "mips64",
125     "octeon",      "sgi",         "socppc",      "sparc64",
126     "amiga",       "arc",         "armish",      "arm32",
127     "atari",       "aviion",      "beagle",      "cats",
128     "hppa64",      "hp300",
129     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
130     "mvmeppc",     "palm",        "pc532",       "pegasos",
131     "pmax",        "powerpc",     "solbourne",   "sparc",
132     "sun3",        "vax",         "wgrisc",      "x68k",
133     "zaurus"
134 };
135 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
136 
137 /*
138  * Print a character, escaping HTML along the way.
139  * This will pass non-ASCII straight to output: be warned!
140  */
141 static void
142 html_putchar(char c)
143 {
144 
145 	switch (c) {
146 	case '"':
147 		printf("&quot;");
148 		break;
149 	case '&':
150 		printf("&amp;");
151 		break;
152 	case '>':
153 		printf("&gt;");
154 		break;
155 	case '<':
156 		printf("&lt;");
157 		break;
158 	default:
159 		putchar((unsigned char)c);
160 		break;
161 	}
162 }
163 
164 /*
165  * Call through to html_putchar().
166  * Accepts NULL strings.
167  */
168 static void
169 html_print(const char *p)
170 {
171 
172 	if (NULL == p)
173 		return;
174 	while ('\0' != *p)
175 		html_putchar(*p++);
176 }
177 
178 /*
179  * Transfer the responsibility for the allocated string *val
180  * to the query structure.
181  */
182 static void
183 set_query_attr(char **attr, char **val)
184 {
185 
186 	free(*attr);
187 	if (**val == '\0') {
188 		*attr = NULL;
189 		free(*val);
190 	} else
191 		*attr = *val;
192 	*val = NULL;
193 }
194 
195 /*
196  * Parse the QUERY_STRING for key-value pairs
197  * and store the values into the query structure.
198  */
199 static void
200 parse_query_string(struct req *req, const char *qs)
201 {
202 	char		*key, *val;
203 	size_t		 keysz, valsz;
204 
205 	req->isquery	= 1;
206 	req->q.manpath	= NULL;
207 	req->q.arch	= NULL;
208 	req->q.sec	= NULL;
209 	req->q.query	= NULL;
210 	req->q.equal	= 1;
211 
212 	key = val = NULL;
213 	while (*qs != '\0') {
214 
215 		/* Parse one key. */
216 
217 		keysz = strcspn(qs, "=;&");
218 		key = mandoc_strndup(qs, keysz);
219 		qs += keysz;
220 		if (*qs != '=')
221 			goto next;
222 
223 		/* Parse one value. */
224 
225 		valsz = strcspn(++qs, ";&");
226 		val = mandoc_strndup(qs, valsz);
227 		qs += valsz;
228 
229 		/* Decode and catch encoding errors. */
230 
231 		if ( ! (http_decode(key) && http_decode(val)))
232 			goto next;
233 
234 		/* Handle key-value pairs. */
235 
236 		if ( ! strcmp(key, "query"))
237 			set_query_attr(&req->q.query, &val);
238 
239 		else if ( ! strcmp(key, "apropos"))
240 			req->q.equal = !strcmp(val, "0");
241 
242 		else if ( ! strcmp(key, "manpath")) {
243 #ifdef COMPAT_OLDURI
244 			if ( ! strncmp(val, "OpenBSD ", 8)) {
245 				val[7] = '-';
246 				if ('C' == val[8])
247 					val[8] = 'c';
248 			}
249 #endif
250 			set_query_attr(&req->q.manpath, &val);
251 		}
252 
253 		else if ( ! (strcmp(key, "sec")
254 #ifdef COMPAT_OLDURI
255 		    && strcmp(key, "sektion")
256 #endif
257 		    )) {
258 			if ( ! strcmp(val, "0"))
259 				*val = '\0';
260 			set_query_attr(&req->q.sec, &val);
261 		}
262 
263 		else if ( ! strcmp(key, "arch")) {
264 			if ( ! strcmp(val, "default"))
265 				*val = '\0';
266 			set_query_attr(&req->q.arch, &val);
267 		}
268 
269 		/*
270 		 * The key must be freed in any case.
271 		 * The val may have been handed over to the query
272 		 * structure, in which case it is now NULL.
273 		 */
274 next:
275 		free(key);
276 		key = NULL;
277 		free(val);
278 		val = NULL;
279 
280 		if (*qs != '\0')
281 			qs++;
282 	}
283 }
284 
285 /*
286  * HTTP-decode a string.  The standard explanation is that this turns
287  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
288  * over the allocated string.
289  */
290 static int
291 http_decode(char *p)
292 {
293 	char             hex[3];
294 	char		*q;
295 	int              c;
296 
297 	hex[2] = '\0';
298 
299 	q = p;
300 	for ( ; '\0' != *p; p++, q++) {
301 		if ('%' == *p) {
302 			if ('\0' == (hex[0] = *(p + 1)))
303 				return 0;
304 			if ('\0' == (hex[1] = *(p + 2)))
305 				return 0;
306 			if (1 != sscanf(hex, "%x", &c))
307 				return 0;
308 			if ('\0' == c)
309 				return 0;
310 
311 			*q = (char)c;
312 			p += 2;
313 		} else
314 			*q = '+' == *p ? ' ' : *p;
315 	}
316 
317 	*q = '\0';
318 	return 1;
319 }
320 
321 static void
322 http_encode(const char *p)
323 {
324 	for (; *p != '\0'; p++) {
325 		if (isalnum((unsigned char)*p) == 0 &&
326 		    strchr("-._~", *p) == NULL)
327 			printf("%%%2.2X", (unsigned char)*p);
328 		else
329 			putchar(*p);
330 	}
331 }
332 
333 static void
334 resp_begin_http(int code, const char *msg)
335 {
336 
337 	if (200 != code)
338 		printf("Status: %d %s\r\n", code, msg);
339 
340 	printf("Content-Type: text/html; charset=utf-8\r\n"
341 	     "Cache-Control: no-cache\r\n"
342 	     "Pragma: no-cache\r\n"
343 	     "\r\n");
344 
345 	fflush(stdout);
346 }
347 
348 static void
349 resp_copy(const char *filename)
350 {
351 	char	 buf[4096];
352 	ssize_t	 sz;
353 	int	 fd;
354 
355 	if ((fd = open(filename, O_RDONLY)) != -1) {
356 		fflush(stdout);
357 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
358 			write(STDOUT_FILENO, buf, sz);
359 		close(fd);
360 	}
361 }
362 
363 static void
364 resp_begin_html(int code, const char *msg, const char *file)
365 {
366 	char	*cp;
367 
368 	resp_begin_http(code, msg);
369 
370 	printf("<!DOCTYPE html>\n"
371 	       "<html>\n"
372 	       "<head>\n"
373 	       "  <meta charset=\"UTF-8\"/>\n"
374 	       "  <meta name=\"viewport\""
375 		      " content=\"width=device-width, initial-scale=1.0\">\n"
376 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
377 	       " type=\"text/css\" media=\"all\">\n"
378 	       "  <title>",
379 	       CSS_DIR);
380 	if (file != NULL) {
381 		if ((cp = strrchr(file, '/')) != NULL)
382 			file = cp + 1;
383 		if ((cp = strrchr(file, '.')) != NULL) {
384 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
385 		} else
386 			printf("%s - ", file);
387 	}
388 	printf("%s</title>\n"
389 	       "</head>\n"
390 	       "<body>\n",
391 	       CUSTOMIZE_TITLE);
392 
393 	resp_copy(MAN_DIR "/header.html");
394 }
395 
396 static void
397 resp_end_html(void)
398 {
399 
400 	resp_copy(MAN_DIR "/footer.html");
401 
402 	puts("</body>\n"
403 	     "</html>");
404 }
405 
406 static void
407 resp_searchform(const struct req *req, enum focus focus)
408 {
409 	int		 i;
410 
411 	printf("<form action=\"/%s\" method=\"get\">\n"
412 	       "  <fieldset>\n"
413 	       "    <legend>Manual Page Search Parameters</legend>\n",
414 	       scriptname);
415 
416 	/* Write query input box. */
417 
418 	printf("    <input type=\"search\" name=\"query\" value=\"");
419 	if (req->q.query != NULL)
420 		html_print(req->q.query);
421 	printf( "\" size=\"40\"");
422 	if (focus == FOCUS_QUERY)
423 		printf(" autofocus");
424 	puts(">");
425 
426 	/* Write submission buttons. */
427 
428 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
429 		"man</button>\n"
430 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
431 		"apropos</button>\n"
432 		"    <br/>\n");
433 
434 	/* Write section selector. */
435 
436 	puts("    <select name=\"sec\">");
437 	for (i = 0; i < sec_MAX; i++) {
438 		printf("      <option value=\"%s\"", sec_numbers[i]);
439 		if (NULL != req->q.sec &&
440 		    0 == strcmp(sec_numbers[i], req->q.sec))
441 			printf(" selected=\"selected\"");
442 		printf(">%s</option>\n", sec_names[i]);
443 	}
444 	puts("    </select>");
445 
446 	/* Write architecture selector. */
447 
448 	printf(	"    <select name=\"arch\">\n"
449 		"      <option value=\"default\"");
450 	if (NULL == req->q.arch)
451 		printf(" selected=\"selected\"");
452 	puts(">All Architectures</option>");
453 	for (i = 0; i < arch_MAX; i++) {
454 		printf("      <option");
455 		if (NULL != req->q.arch &&
456 		    0 == strcmp(arch_names[i], req->q.arch))
457 			printf(" selected=\"selected\"");
458 		printf(">%s</option>\n", arch_names[i]);
459 	}
460 	puts("    </select>");
461 
462 	/* Write manpath selector. */
463 
464 	if (req->psz > 1) {
465 		puts("    <select name=\"manpath\">");
466 		for (i = 0; i < (int)req->psz; i++) {
467 			printf("      <option");
468 			if (strcmp(req->q.manpath, req->p[i]) == 0)
469 				printf(" selected=\"selected\"");
470 			printf(">");
471 			html_print(req->p[i]);
472 			puts("</option>");
473 		}
474 		puts("    </select>");
475 	}
476 
477 	puts("  </fieldset>\n"
478 	     "</form>");
479 }
480 
481 static int
482 validate_urifrag(const char *frag)
483 {
484 
485 	while ('\0' != *frag) {
486 		if ( ! (isalnum((unsigned char)*frag) ||
487 		    '-' == *frag || '.' == *frag ||
488 		    '/' == *frag || '_' == *frag))
489 			return 0;
490 		frag++;
491 	}
492 	return 1;
493 }
494 
495 static int
496 validate_manpath(const struct req *req, const char* manpath)
497 {
498 	size_t	 i;
499 
500 	for (i = 0; i < req->psz; i++)
501 		if ( ! strcmp(manpath, req->p[i]))
502 			return 1;
503 
504 	return 0;
505 }
506 
507 static int
508 validate_arch(const char *arch)
509 {
510 	int	 i;
511 
512 	for (i = 0; i < arch_MAX; i++)
513 		if (strcmp(arch, arch_names[i]) == 0)
514 			return 1;
515 
516 	return 0;
517 }
518 
519 static int
520 validate_filename(const char *file)
521 {
522 
523 	if ('.' == file[0] && '/' == file[1])
524 		file += 2;
525 
526 	return ! (strstr(file, "../") || strstr(file, "/..") ||
527 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
528 }
529 
530 static void
531 pg_index(const struct req *req)
532 {
533 
534 	resp_begin_html(200, NULL, NULL);
535 	resp_searchform(req, FOCUS_QUERY);
536 	printf("<p>\n"
537 	       "This web interface is documented in the\n"
538 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
539 	       "manual, and the\n"
540 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
541 	       "manual explains the query syntax.\n"
542 	       "</p>\n",
543 	       scriptname, *scriptname == '\0' ? "" : "/",
544 	       scriptname, *scriptname == '\0' ? "" : "/");
545 	resp_end_html();
546 }
547 
548 static void
549 pg_noresult(const struct req *req, const char *msg)
550 {
551 	resp_begin_html(200, NULL, NULL);
552 	resp_searchform(req, FOCUS_QUERY);
553 	puts("<p>");
554 	puts(msg);
555 	puts("</p>");
556 	resp_end_html();
557 }
558 
559 static void
560 pg_error_badrequest(const char *msg)
561 {
562 
563 	resp_begin_html(400, "Bad Request", NULL);
564 	puts("<h1>Bad Request</h1>\n"
565 	     "<p>\n");
566 	puts(msg);
567 	printf("Try again from the\n"
568 	       "<a href=\"/%s\">main page</a>.\n"
569 	       "</p>", scriptname);
570 	resp_end_html();
571 }
572 
573 static void
574 pg_error_internal(void)
575 {
576 	resp_begin_html(500, "Internal Server Error", NULL);
577 	puts("<p>Internal Server Error</p>");
578 	resp_end_html();
579 }
580 
581 static void
582 pg_redirect(const struct req *req, const char *name)
583 {
584 	printf("Status: 303 See Other\r\n"
585 	    "Location: /");
586 	if (*scriptname != '\0')
587 		printf("%s/", scriptname);
588 	if (strcmp(req->q.manpath, req->p[0]))
589 		printf("%s/", req->q.manpath);
590 	if (req->q.arch != NULL)
591 		printf("%s/", req->q.arch);
592 	http_encode(name);
593 	if (req->q.sec != NULL) {
594 		putchar('.');
595 		http_encode(req->q.sec);
596 	}
597 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
598 }
599 
600 static void
601 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
602 {
603 	char		*arch, *archend;
604 	const char	*sec;
605 	size_t		 i, iuse;
606 	int		 archprio, archpriouse;
607 	int		 prio, priouse;
608 
609 	for (i = 0; i < sz; i++) {
610 		if (validate_filename(r[i].file))
611 			continue;
612 		warnx("invalid filename %s in %s database",
613 		    r[i].file, req->q.manpath);
614 		pg_error_internal();
615 		return;
616 	}
617 
618 	if (req->isquery && sz == 1) {
619 		/*
620 		 * If we have just one result, then jump there now
621 		 * without any delay.
622 		 */
623 		printf("Status: 303 See Other\r\n"
624 		    "Location: /");
625 		if (*scriptname != '\0')
626 			printf("%s/", scriptname);
627 		if (strcmp(req->q.manpath, req->p[0]))
628 			printf("%s/", req->q.manpath);
629 		printf("%s\r\n"
630 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
631 		    r[0].file);
632 		return;
633 	}
634 
635 	/*
636 	 * In man(1) mode, show one of the pages
637 	 * even if more than one is found.
638 	 */
639 
640 	iuse = 0;
641 	if (req->q.equal || sz == 1) {
642 		priouse = 20;
643 		archpriouse = 3;
644 		for (i = 0; i < sz; i++) {
645 			sec = r[i].file;
646 			sec += strcspn(sec, "123456789");
647 			if (sec[0] == '\0')
648 				continue;
649 			prio = sec_prios[sec[0] - '1'];
650 			if (sec[1] != '/')
651 				prio += 10;
652 			if (req->q.arch == NULL) {
653 				archprio =
654 				    ((arch = strchr(sec + 1, '/'))
655 					== NULL) ? 3 :
656 				    ((archend = strchr(arch + 1, '/'))
657 					== NULL) ? 0 :
658 				    strncmp(arch, "amd64/",
659 					archend - arch) ? 2 : 1;
660 				if (archprio < archpriouse) {
661 					archpriouse = archprio;
662 					priouse = prio;
663 					iuse = i;
664 					continue;
665 				}
666 				if (archprio > archpriouse)
667 					continue;
668 			}
669 			if (prio >= priouse)
670 				continue;
671 			priouse = prio;
672 			iuse = i;
673 		}
674 		resp_begin_html(200, NULL, r[iuse].file);
675 	} else
676 		resp_begin_html(200, NULL, NULL);
677 
678 	resp_searchform(req,
679 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
680 
681 	if (sz > 1) {
682 		puts("<table class=\"results\">");
683 		for (i = 0; i < sz; i++) {
684 			printf("  <tr>\n"
685 			       "    <td>"
686 			       "<a class=\"Xr\" href=\"/");
687 			if (*scriptname != '\0')
688 				printf("%s/", scriptname);
689 			if (strcmp(req->q.manpath, req->p[0]))
690 				printf("%s/", req->q.manpath);
691 			printf("%s\">", r[i].file);
692 			html_print(r[i].names);
693 			printf("</a></td>\n"
694 			       "    <td><span class=\"Nd\">");
695 			html_print(r[i].output);
696 			puts("</span></td>\n"
697 			     "  </tr>");
698 		}
699 		puts("</table>");
700 	}
701 
702 	if (req->q.equal || sz == 1) {
703 		puts("<hr>");
704 		resp_show(req, r[iuse].file);
705 	}
706 
707 	resp_end_html();
708 }
709 
710 static void
711 resp_catman(const struct req *req, const char *file)
712 {
713 	FILE		*f;
714 	char		*p;
715 	size_t		 sz;
716 	ssize_t		 len;
717 	int		 i;
718 	int		 italic, bold;
719 
720 	if ((f = fopen(file, "r")) == NULL) {
721 		puts("<p>You specified an invalid manual file.</p>");
722 		return;
723 	}
724 
725 	puts("<div class=\"catman\">\n"
726 	     "<pre>");
727 
728 	p = NULL;
729 	sz = 0;
730 
731 	while ((len = getline(&p, &sz, f)) != -1) {
732 		bold = italic = 0;
733 		for (i = 0; i < len - 1; i++) {
734 			/*
735 			 * This means that the catpage is out of state.
736 			 * Ignore it and keep going (although the
737 			 * catpage is bogus).
738 			 */
739 
740 			if ('\b' == p[i] || '\n' == p[i])
741 				continue;
742 
743 			/*
744 			 * Print a regular character.
745 			 * Close out any bold/italic scopes.
746 			 * If we're in back-space mode, make sure we'll
747 			 * have something to enter when we backspace.
748 			 */
749 
750 			if ('\b' != p[i + 1]) {
751 				if (italic)
752 					printf("</i>");
753 				if (bold)
754 					printf("</b>");
755 				italic = bold = 0;
756 				html_putchar(p[i]);
757 				continue;
758 			} else if (i + 2 >= len)
759 				continue;
760 
761 			/* Italic mode. */
762 
763 			if ('_' == p[i]) {
764 				if (bold)
765 					printf("</b>");
766 				if ( ! italic)
767 					printf("<i>");
768 				bold = 0;
769 				italic = 1;
770 				i += 2;
771 				html_putchar(p[i]);
772 				continue;
773 			}
774 
775 			/*
776 			 * Handle funny behaviour troff-isms.
777 			 * These grok'd from the original man2html.c.
778 			 */
779 
780 			if (('+' == p[i] && 'o' == p[i + 2]) ||
781 					('o' == p[i] && '+' == p[i + 2]) ||
782 					('|' == p[i] && '=' == p[i + 2]) ||
783 					('=' == p[i] && '|' == p[i + 2]) ||
784 					('*' == p[i] && '=' == p[i + 2]) ||
785 					('=' == p[i] && '*' == p[i + 2]) ||
786 					('*' == p[i] && '|' == p[i + 2]) ||
787 					('|' == p[i] && '*' == p[i + 2]))  {
788 				if (italic)
789 					printf("</i>");
790 				if (bold)
791 					printf("</b>");
792 				italic = bold = 0;
793 				putchar('*');
794 				i += 2;
795 				continue;
796 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
797 					('-' == p[i] && '|' == p[i + 1]) ||
798 					('+' == p[i] && '-' == p[i + 1]) ||
799 					('-' == p[i] && '+' == p[i + 1]) ||
800 					('+' == p[i] && '|' == p[i + 1]) ||
801 					('|' == p[i] && '+' == p[i + 1]))  {
802 				if (italic)
803 					printf("</i>");
804 				if (bold)
805 					printf("</b>");
806 				italic = bold = 0;
807 				putchar('+');
808 				i += 2;
809 				continue;
810 			}
811 
812 			/* Bold mode. */
813 
814 			if (italic)
815 				printf("</i>");
816 			if ( ! bold)
817 				printf("<b>");
818 			bold = 1;
819 			italic = 0;
820 			i += 2;
821 			html_putchar(p[i]);
822 		}
823 
824 		/*
825 		 * Clean up the last character.
826 		 * We can get to a newline; don't print that.
827 		 */
828 
829 		if (italic)
830 			printf("</i>");
831 		if (bold)
832 			printf("</b>");
833 
834 		if (i == len - 1 && p[i] != '\n')
835 			html_putchar(p[i]);
836 
837 		putchar('\n');
838 	}
839 	free(p);
840 
841 	puts("</pre>\n"
842 	     "</div>");
843 
844 	fclose(f);
845 }
846 
847 static void
848 resp_format(const struct req *req, const char *file)
849 {
850 	struct manoutput conf;
851 	struct mparse	*mp;
852 	struct roff_meta *meta;
853 	void		*vp;
854 	int		 fd;
855 	int		 usepath;
856 
857 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
858 		puts("<p>You specified an invalid manual file.</p>");
859 		return;
860 	}
861 
862 	mchars_alloc();
863 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
864 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
865 	mparse_readfd(mp, fd, file);
866 	close(fd);
867 	meta = mparse_result(mp);
868 
869 	memset(&conf, 0, sizeof(conf));
870 	conf.fragment = 1;
871 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
872 	usepath = strcmp(req->q.manpath, req->p[0]);
873 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
874 	    scriptname, *scriptname == '\0' ? "" : "/",
875 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
876 
877 	vp = html_alloc(&conf);
878 	if (meta->macroset == MACROSET_MDOC)
879 		html_mdoc(vp, meta);
880 	else
881 		html_man(vp, meta);
882 
883 	html_free(vp);
884 	mparse_free(mp);
885 	mchars_free();
886 	free(conf.man);
887 	free(conf.style);
888 }
889 
890 static void
891 resp_show(const struct req *req, const char *file)
892 {
893 
894 	if ('.' == file[0] && '/' == file[1])
895 		file += 2;
896 
897 	if ('c' == *file)
898 		resp_catman(req, file);
899 	else
900 		resp_format(req, file);
901 }
902 
903 static void
904 pg_show(struct req *req, const char *fullpath)
905 {
906 	char		*manpath;
907 	const char	*file;
908 
909 	if ((file = strchr(fullpath, '/')) == NULL) {
910 		pg_error_badrequest(
911 		    "You did not specify a page to show.");
912 		return;
913 	}
914 	manpath = mandoc_strndup(fullpath, file - fullpath);
915 	file++;
916 
917 	if ( ! validate_manpath(req, manpath)) {
918 		pg_error_badrequest(
919 		    "You specified an invalid manpath.");
920 		free(manpath);
921 		return;
922 	}
923 
924 	/*
925 	 * Begin by chdir()ing into the manpath.
926 	 * This way we can pick up the database files, which are
927 	 * relative to the manpath root.
928 	 */
929 
930 	if (chdir(manpath) == -1) {
931 		warn("chdir %s", manpath);
932 		pg_error_internal();
933 		free(manpath);
934 		return;
935 	}
936 	free(manpath);
937 
938 	if ( ! validate_filename(file)) {
939 		pg_error_badrequest(
940 		    "You specified an invalid manual file.");
941 		return;
942 	}
943 
944 	resp_begin_html(200, NULL, file);
945 	resp_searchform(req, FOCUS_NONE);
946 	resp_show(req, file);
947 	resp_end_html();
948 }
949 
950 static void
951 pg_search(const struct req *req)
952 {
953 	struct mansearch	  search;
954 	struct manpaths		  paths;
955 	struct manpage		 *res;
956 	char			**argv;
957 	char			 *query, *rp, *wp;
958 	size_t			  ressz;
959 	int			  argc;
960 
961 	/*
962 	 * Begin by chdir()ing into the root of the manpath.
963 	 * This way we can pick up the database files, which are
964 	 * relative to the manpath root.
965 	 */
966 
967 	if (chdir(req->q.manpath) == -1) {
968 		warn("chdir %s", req->q.manpath);
969 		pg_error_internal();
970 		return;
971 	}
972 
973 	search.arch = req->q.arch;
974 	search.sec = req->q.sec;
975 	search.outkey = "Nd";
976 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
977 	search.firstmatch = 1;
978 
979 	paths.sz = 1;
980 	paths.paths = mandoc_malloc(sizeof(char *));
981 	paths.paths[0] = mandoc_strdup(".");
982 
983 	/*
984 	 * Break apart at spaces with backslash-escaping.
985 	 */
986 
987 	argc = 0;
988 	argv = NULL;
989 	rp = query = mandoc_strdup(req->q.query);
990 	for (;;) {
991 		while (isspace((unsigned char)*rp))
992 			rp++;
993 		if (*rp == '\0')
994 			break;
995 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
996 		argv[argc++] = wp = rp;
997 		for (;;) {
998 			if (isspace((unsigned char)*rp)) {
999 				*wp = '\0';
1000 				rp++;
1001 				break;
1002 			}
1003 			if (rp[0] == '\\' && rp[1] != '\0')
1004 				rp++;
1005 			if (wp != rp)
1006 				*wp = *rp;
1007 			if (*rp == '\0')
1008 				break;
1009 			wp++;
1010 			rp++;
1011 		}
1012 	}
1013 
1014 	res = NULL;
1015 	ressz = 0;
1016 	if (req->isquery && req->q.equal && argc == 1)
1017 		pg_redirect(req, argv[0]);
1018 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1019 		pg_noresult(req, "You entered an invalid query.");
1020 	else if (ressz == 0)
1021 		pg_noresult(req, "No results found.");
1022 	else
1023 		pg_searchres(req, res, ressz);
1024 
1025 	free(query);
1026 	mansearch_free(res, ressz);
1027 	free(paths.paths[0]);
1028 	free(paths.paths);
1029 }
1030 
1031 int
1032 main(void)
1033 {
1034 	struct req	 req;
1035 	struct itimerval itimer;
1036 	const char	*path;
1037 	const char	*querystring;
1038 	int		 i;
1039 
1040 #if HAVE_PLEDGE
1041 	/*
1042 	 * The "rpath" pledge could be revoked after mparse_readfd()
1043 	 * if the file desciptor to "/footer.html" would be opened
1044 	 * up front, but it's probably not worth the complication
1045 	 * of the code it would cause: it would require scattering
1046 	 * pledge() calls in multiple low-level resp_*() functions.
1047 	 */
1048 
1049 	if (pledge("stdio rpath", NULL) == -1) {
1050 		warn("pledge");
1051 		pg_error_internal();
1052 		return EXIT_FAILURE;
1053 	}
1054 #endif
1055 
1056 	/* Poor man's ReDoS mitigation. */
1057 
1058 	itimer.it_value.tv_sec = 2;
1059 	itimer.it_value.tv_usec = 0;
1060 	itimer.it_interval.tv_sec = 2;
1061 	itimer.it_interval.tv_usec = 0;
1062 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1063 		warn("setitimer");
1064 		pg_error_internal();
1065 		return EXIT_FAILURE;
1066 	}
1067 
1068 	/*
1069 	 * First we change directory into the MAN_DIR so that
1070 	 * subsequent scanning for manpath directories is rooted
1071 	 * relative to the same position.
1072 	 */
1073 
1074 	if (chdir(MAN_DIR) == -1) {
1075 		warn("MAN_DIR: %s", MAN_DIR);
1076 		pg_error_internal();
1077 		return EXIT_FAILURE;
1078 	}
1079 
1080 	memset(&req, 0, sizeof(struct req));
1081 	req.q.equal = 1;
1082 	parse_manpath_conf(&req);
1083 
1084 	/* Parse the path info and the query string. */
1085 
1086 	if ((path = getenv("PATH_INFO")) == NULL)
1087 		path = "";
1088 	else if (*path == '/')
1089 		path++;
1090 
1091 	if (*path != '\0') {
1092 		parse_path_info(&req, path);
1093 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1094 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1095 			path = "";
1096 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1097 		parse_query_string(&req, querystring);
1098 
1099 	/* Validate parsed data and add defaults. */
1100 
1101 	if (req.q.manpath == NULL)
1102 		req.q.manpath = mandoc_strdup(req.p[0]);
1103 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1104 		pg_error_badrequest(
1105 		    "You specified an invalid manpath.");
1106 		return EXIT_FAILURE;
1107 	}
1108 
1109 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1110 		pg_error_badrequest(
1111 		    "You specified an invalid architecture.");
1112 		return EXIT_FAILURE;
1113 	}
1114 
1115 	/* Dispatch to the three different pages. */
1116 
1117 	if ('\0' != *path)
1118 		pg_show(&req, path);
1119 	else if (NULL != req.q.query)
1120 		pg_search(&req);
1121 	else
1122 		pg_index(&req);
1123 
1124 	free(req.q.manpath);
1125 	free(req.q.arch);
1126 	free(req.q.sec);
1127 	free(req.q.query);
1128 	for (i = 0; i < (int)req.psz; i++)
1129 		free(req.p[i]);
1130 	free(req.p);
1131 	return EXIT_SUCCESS;
1132 }
1133 
1134 /*
1135  * Translate PATH_INFO to a query.
1136  */
1137 static void
1138 parse_path_info(struct req *req, const char *path)
1139 {
1140 	const char	*name, *sec, *end;
1141 
1142 	req->isquery = 0;
1143 	req->q.equal = 1;
1144 	req->q.manpath = NULL;
1145 	req->q.arch = NULL;
1146 
1147 	/* Mandatory manual page name. */
1148 	if ((name = strrchr(path, '/')) == NULL)
1149 		name = path;
1150 	else
1151 		name++;
1152 
1153 	/* Optional trailing section. */
1154 	sec = strrchr(name, '.');
1155 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1156 		req->q.query = mandoc_strndup(name, sec - name - 1);
1157 		req->q.sec = mandoc_strdup(sec);
1158 	} else {
1159 		req->q.query = mandoc_strdup(name);
1160 		req->q.sec = NULL;
1161 	}
1162 
1163 	/* Handle the case of name[.section] only. */
1164 	if (name == path)
1165 		return;
1166 
1167 	/* Optional manpath. */
1168 	end = strchr(path, '/');
1169 	req->q.manpath = mandoc_strndup(path, end - path);
1170 	if (validate_manpath(req, req->q.manpath)) {
1171 		path = end + 1;
1172 		if (name == path)
1173 			return;
1174 	} else {
1175 		free(req->q.manpath);
1176 		req->q.manpath = NULL;
1177 	}
1178 
1179 	/* Optional section. */
1180 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1181 		path += 3;
1182 		end = strchr(path, '/');
1183 		free(req->q.sec);
1184 		req->q.sec = mandoc_strndup(path, end - path);
1185 		path = end + 1;
1186 		if (name == path)
1187 			return;
1188 	}
1189 
1190 	/* Optional architecture. */
1191 	end = strchr(path, '/');
1192 	if (end + 1 != name) {
1193 		pg_error_badrequest(
1194 		    "You specified too many directory components.");
1195 		exit(EXIT_FAILURE);
1196 	}
1197 	req->q.arch = mandoc_strndup(path, end - path);
1198 	if (validate_arch(req->q.arch) == 0) {
1199 		pg_error_badrequest(
1200 		    "You specified an invalid directory component.");
1201 		exit(EXIT_FAILURE);
1202 	}
1203 }
1204 
1205 /*
1206  * Scan for indexable paths.
1207  */
1208 static void
1209 parse_manpath_conf(struct req *req)
1210 {
1211 	FILE	*fp;
1212 	char	*dp;
1213 	size_t	 dpsz;
1214 	ssize_t	 len;
1215 
1216 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1217 		warn("%s/manpath.conf", MAN_DIR);
1218 		pg_error_internal();
1219 		exit(EXIT_FAILURE);
1220 	}
1221 
1222 	dp = NULL;
1223 	dpsz = 0;
1224 
1225 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1226 		if (dp[len - 1] == '\n')
1227 			dp[--len] = '\0';
1228 		req->p = mandoc_realloc(req->p,
1229 		    (req->psz + 1) * sizeof(char *));
1230 		if ( ! validate_urifrag(dp)) {
1231 			warnx("%s/manpath.conf contains "
1232 			    "unsafe path \"%s\"", MAN_DIR, dp);
1233 			pg_error_internal();
1234 			exit(EXIT_FAILURE);
1235 		}
1236 		if (strchr(dp, '/') != NULL) {
1237 			warnx("%s/manpath.conf contains "
1238 			    "path with slash \"%s\"", MAN_DIR, dp);
1239 			pg_error_internal();
1240 			exit(EXIT_FAILURE);
1241 		}
1242 		req->p[req->psz++] = dp;
1243 		dp = NULL;
1244 		dpsz = 0;
1245 	}
1246 	free(dp);
1247 
1248 	if (req->p == NULL) {
1249 		warnx("%s/manpath.conf is empty", MAN_DIR);
1250 		pg_error_internal();
1251 		exit(EXIT_FAILURE);
1252 	}
1253 }
1254