xref: /freebsd/contrib/mandoc/cgi.c (revision 6c05f3a74f30934ee60919cc97e16ec69b542b06)
1 /* $Id: cgi.c,v 1.181 2023/04/28 19:11:03 schwarze Exp $ */
2 /*
3  * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de>
4  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5  * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  *
19  * Implementation of the man.cgi(8) program.
20  */
21 #include "config.h"
22 
23 #include <sys/types.h>
24 #include <sys/time.h>
25 
26 #include <ctype.h>
27 #if HAVE_ERR
28 #include <err.h>
29 #endif
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <limits.h>
33 #include <stdint.h>
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 
39 #include "mandoc_aux.h"
40 #include "mandoc.h"
41 #include "roff.h"
42 #include "mdoc.h"
43 #include "man.h"
44 #include "mandoc_parse.h"
45 #include "main.h"
46 #include "manconf.h"
47 #include "mansearch.h"
48 #include "cgi.h"
49 
50 /*
51  * A query as passed to the search function.
52  */
53 struct	query {
54 	char		*manpath; /* desired manual directory */
55 	char		*arch; /* architecture */
56 	char		*sec; /* manual section */
57 	char		*query; /* unparsed query expression */
58 	int		 equal; /* match whole names, not substrings */
59 };
60 
61 struct	req {
62 	struct query	  q;
63 	char		**p; /* array of available manpaths */
64 	size_t		  psz; /* number of available manpaths */
65 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
66 };
67 
68 enum	focus {
69 	FOCUS_NONE = 0,
70 	FOCUS_QUERY
71 };
72 
73 static	void		 html_print(const char *);
74 static	void		 html_putchar(char);
75 static	int		 http_decode(char *);
76 static	void		 http_encode(const char *);
77 static	void		 parse_manpath_conf(struct req *);
78 static	void		 parse_path_info(struct req *, const char *);
79 static	void		 parse_query_string(struct req *, const char *);
80 static	void		 pg_error_badrequest(const char *);
81 static	void		 pg_error_internal(void);
82 static	void		 pg_index(const struct req *);
83 static	void		 pg_noresult(const struct req *, int, const char *,
84 				const char *);
85 static	void		 pg_redirect(const struct req *, const char *);
86 static	void		 pg_search(const struct req *);
87 static	void		 pg_searchres(const struct req *,
88 				struct manpage *, size_t);
89 static	void		 pg_show(struct req *, const char *);
90 static	int		 resp_begin_html(int, const char *, const char *);
91 static	void		 resp_begin_http(int, const char *);
92 static	void		 resp_catman(const struct req *, const char *);
93 static	int		 resp_copy(const char *, const char *);
94 static	void		 resp_end_html(void);
95 static	void		 resp_format(const struct req *, const char *);
96 static	void		 resp_searchform(const struct req *, enum focus);
97 static	void		 resp_show(const struct req *, const char *);
98 static	void		 set_query_attr(char **, char **);
99 static	int		 validate_arch(const char *);
100 static	int		 validate_filename(const char *);
101 static	int		 validate_manpath(const struct req *, const char *);
102 static	int		 validate_urifrag(const char *);
103 
104 static	const char	 *scriptname = SCRIPT_NAME;
105 
106 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
107 static	const char *const sec_numbers[] = {
108     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
109 };
110 static	const char *const sec_names[] = {
111     "All Sections",
112     "1 - General Commands",
113     "2 - System Calls",
114     "3 - Library Functions",
115     "3p - Perl Library",
116     "4 - Device Drivers",
117     "5 - File Formats",
118     "6 - Games",
119     "7 - Miscellaneous Information",
120     "8 - System Manager\'s Manual",
121     "9 - Kernel Developer\'s Manual"
122 };
123 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
124 
125 static	const char *const arch_names[] = {
126     "amd64",       "alpha",       "armv7",       "arm64",
127     "hppa",        "i386",        "landisk",     "loongson",
128     "luna88k",     "macppc",      "mips64",      "octeon",
129     "powerpc64",   "riscv64",     "sparc64",
130 
131     "amiga",       "arc",         "armish",      "arm32",
132     "atari",       "aviion",      "beagle",      "cats",
133     "hppa64",      "hp300",
134     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
135     "mvmeppc",     "palm",        "pc532",       "pegasos",
136     "pmax",        "powerpc",     "sgi",         "socppc",
137     "solbourne",   "sparc",
138     "sun3",        "vax",         "wgrisc",      "x68k",
139     "zaurus"
140 };
141 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
142 
143 /*
144  * Print a character, escaping HTML along the way.
145  * This will pass non-ASCII straight to output: be warned!
146  */
147 static void
148 html_putchar(char c)
149 {
150 
151 	switch (c) {
152 	case '"':
153 		printf("&quot;");
154 		break;
155 	case '&':
156 		printf("&amp;");
157 		break;
158 	case '>':
159 		printf("&gt;");
160 		break;
161 	case '<':
162 		printf("&lt;");
163 		break;
164 	default:
165 		putchar((unsigned char)c);
166 		break;
167 	}
168 }
169 
170 /*
171  * Call through to html_putchar().
172  * Accepts NULL strings.
173  */
174 static void
175 html_print(const char *p)
176 {
177 
178 	if (NULL == p)
179 		return;
180 	while ('\0' != *p)
181 		html_putchar(*p++);
182 }
183 
184 /*
185  * Transfer the responsibility for the allocated string *val
186  * to the query structure.
187  */
188 static void
189 set_query_attr(char **attr, char **val)
190 {
191 
192 	free(*attr);
193 	if (**val == '\0') {
194 		*attr = NULL;
195 		free(*val);
196 	} else
197 		*attr = *val;
198 	*val = NULL;
199 }
200 
201 /*
202  * Parse the QUERY_STRING for key-value pairs
203  * and store the values into the query structure.
204  */
205 static void
206 parse_query_string(struct req *req, const char *qs)
207 {
208 	char		*key, *val;
209 	size_t		 keysz, valsz;
210 
211 	req->isquery	= 1;
212 	req->q.manpath	= NULL;
213 	req->q.arch	= NULL;
214 	req->q.sec	= NULL;
215 	req->q.query	= NULL;
216 	req->q.equal	= 1;
217 
218 	key = val = NULL;
219 	while (*qs != '\0') {
220 
221 		/* Parse one key. */
222 
223 		keysz = strcspn(qs, "=;&");
224 		key = mandoc_strndup(qs, keysz);
225 		qs += keysz;
226 		if (*qs != '=')
227 			goto next;
228 
229 		/* Parse one value. */
230 
231 		valsz = strcspn(++qs, ";&");
232 		val = mandoc_strndup(qs, valsz);
233 		qs += valsz;
234 
235 		/* Decode and catch encoding errors. */
236 
237 		if ( ! (http_decode(key) && http_decode(val)))
238 			goto next;
239 
240 		/* Handle key-value pairs. */
241 
242 		if ( ! strcmp(key, "query"))
243 			set_query_attr(&req->q.query, &val);
244 
245 		else if ( ! strcmp(key, "apropos"))
246 			req->q.equal = !strcmp(val, "0");
247 
248 		else if ( ! strcmp(key, "manpath")) {
249 #ifdef COMPAT_OLDURI
250 			if ( ! strncmp(val, "OpenBSD ", 8)) {
251 				val[7] = '-';
252 				if ('C' == val[8])
253 					val[8] = 'c';
254 			}
255 #endif
256 			set_query_attr(&req->q.manpath, &val);
257 		}
258 
259 		else if ( ! (strcmp(key, "sec")
260 #ifdef COMPAT_OLDURI
261 		    && strcmp(key, "sektion")
262 #endif
263 		    )) {
264 			if ( ! strcmp(val, "0"))
265 				*val = '\0';
266 			set_query_attr(&req->q.sec, &val);
267 		}
268 
269 		else if ( ! strcmp(key, "arch")) {
270 			if ( ! strcmp(val, "default"))
271 				*val = '\0';
272 			set_query_attr(&req->q.arch, &val);
273 		}
274 
275 		/*
276 		 * The key must be freed in any case.
277 		 * The val may have been handed over to the query
278 		 * structure, in which case it is now NULL.
279 		 */
280 next:
281 		free(key);
282 		key = NULL;
283 		free(val);
284 		val = NULL;
285 
286 		if (*qs != '\0')
287 			qs++;
288 	}
289 }
290 
291 /*
292  * HTTP-decode a string.  The standard explanation is that this turns
293  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
294  * over the allocated string.
295  */
296 static int
297 http_decode(char *p)
298 {
299 	char             hex[3];
300 	char		*q;
301 	int              c;
302 
303 	hex[2] = '\0';
304 
305 	q = p;
306 	for ( ; '\0' != *p; p++, q++) {
307 		if ('%' == *p) {
308 			if ('\0' == (hex[0] = *(p + 1)))
309 				return 0;
310 			if ('\0' == (hex[1] = *(p + 2)))
311 				return 0;
312 			if (1 != sscanf(hex, "%x", &c))
313 				return 0;
314 			if ('\0' == c)
315 				return 0;
316 
317 			*q = (char)c;
318 			p += 2;
319 		} else
320 			*q = '+' == *p ? ' ' : *p;
321 	}
322 
323 	*q = '\0';
324 	return 1;
325 }
326 
327 static void
328 http_encode(const char *p)
329 {
330 	for (; *p != '\0'; p++) {
331 		if (isalnum((unsigned char)*p) == 0 &&
332 		    strchr("-._~", *p) == NULL)
333 			printf("%%%2.2X", (unsigned char)*p);
334 		else
335 			putchar(*p);
336 	}
337 }
338 
339 static void
340 resp_begin_http(int code, const char *msg)
341 {
342 
343 	if (200 != code)
344 		printf("Status: %d %s\r\n", code, msg);
345 
346 	printf("Content-Type: text/html; charset=utf-8\r\n"
347 	     "Cache-Control: no-cache\r\n"
348 	     "Content-Security-Policy: default-src 'none'; "
349 	     "style-src 'self' 'unsafe-inline'\r\n"
350 	     "Pragma: no-cache\r\n"
351 	     "\r\n");
352 
353 	fflush(stdout);
354 }
355 
356 static int
357 resp_copy(const char *element, const char *filename)
358 {
359 	char	 buf[4096];
360 	ssize_t	 sz;
361 	int	 fd;
362 
363 	if ((fd = open(filename, O_RDONLY)) == -1)
364 		return 0;
365 
366 	if (element != NULL)
367 		printf("<%s>\n", element);
368 	fflush(stdout);
369 	while ((sz = read(fd, buf, sizeof(buf))) > 0)
370 		write(STDOUT_FILENO, buf, sz);
371 	close(fd);
372 	return 1;
373 }
374 
375 static int
376 resp_begin_html(int code, const char *msg, const char *file)
377 {
378 	const char	*name, *sec, *cp;
379 	int		 namesz, secsz;
380 
381 	resp_begin_http(code, msg);
382 
383 	printf("<!DOCTYPE html>\n"
384 	       "<html>\n"
385 	       "<head>\n"
386 	       "  <meta charset=\"UTF-8\"/>\n"
387 	       "  <meta name=\"viewport\""
388 		      " content=\"width=device-width, initial-scale=1.0\">\n"
389 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
390 	       " type=\"text/css\" media=\"all\">\n"
391 	       "  <title>",
392 	       CSS_DIR);
393 	if (file != NULL) {
394 		cp = strrchr(file, '/');
395 		name = cp == NULL ? file : cp + 1;
396 		cp = strrchr(name, '.');
397 		namesz = cp == NULL ? strlen(name) : cp - name;
398 		sec = NULL;
399 		if (cp != NULL && cp[1] != '0') {
400 			sec = cp + 1;
401 			secsz = strlen(sec);
402 		} else if (name - file > 1) {
403 			for (cp = name - 2; cp >= file; cp--) {
404 				if (*cp < '1' || *cp > '9')
405 					continue;
406 				sec = cp;
407 				secsz = name - cp - 1;
408 				break;
409 			}
410 		}
411 		printf("%.*s", namesz, name);
412 		if (sec != NULL)
413 			printf("(%.*s)", secsz, sec);
414 		fputs(" - ", stdout);
415 	}
416 	printf("%s</title>\n"
417 	       "</head>\n"
418 	       "<body>\n",
419 	       CUSTOMIZE_TITLE);
420 
421 	return resp_copy("header", MAN_DIR "/header.html");
422 }
423 
424 static void
425 resp_end_html(void)
426 {
427 	if (resp_copy("footer", MAN_DIR "/footer.html"))
428 		puts("</footer>");
429 
430 	puts("</body>\n"
431 	     "</html>");
432 }
433 
434 static void
435 resp_searchform(const struct req *req, enum focus focus)
436 {
437 	int		 i;
438 
439 	printf("<form role=\"search\" action=\"/%s\" method=\"get\" "
440 	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
441 	       "  <fieldset>\n"
442 	       "    <legend>Manual Page Search Parameters</legend>\n",
443 	       scriptname);
444 
445 	/* Write query input box. */
446 
447 	printf("    <label>Search query:\n"
448 	       "      <input type=\"search\" name=\"query\" value=\"");
449 	if (req->q.query != NULL)
450 		html_print(req->q.query);
451 	printf("\" size=\"40\"");
452 	if (focus == FOCUS_QUERY)
453 		printf(" autofocus");
454 	puts(">\n    </label>");
455 
456 	/* Write submission buttons. */
457 
458 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
459 		"man</button>\n"
460 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
461 		"apropos</button>\n"
462 		"    <br/>\n");
463 
464 	/* Write section selector. */
465 
466 	puts("    <select name=\"sec\" aria-label=\"Manual section\">");
467 	for (i = 0; i < sec_MAX; i++) {
468 		printf("      <option value=\"%s\"", sec_numbers[i]);
469 		if (NULL != req->q.sec &&
470 		    0 == strcmp(sec_numbers[i], req->q.sec))
471 			printf(" selected=\"selected\"");
472 		printf(">%s</option>\n", sec_names[i]);
473 	}
474 	puts("    </select>");
475 
476 	/* Write architecture selector. */
477 
478 	printf(	"    <select name=\"arch\" aria-label=\"CPU architecture\">\n"
479 		"      <option value=\"default\"");
480 	if (NULL == req->q.arch)
481 		printf(" selected=\"selected\"");
482 	puts(">All Architectures</option>");
483 	for (i = 0; i < arch_MAX; i++) {
484 		printf("      <option");
485 		if (NULL != req->q.arch &&
486 		    0 == strcmp(arch_names[i], req->q.arch))
487 			printf(" selected=\"selected\"");
488 		printf(">%s</option>\n", arch_names[i]);
489 	}
490 	puts("    </select>");
491 
492 	/* Write manpath selector. */
493 
494 	if (req->psz > 1) {
495 		puts("    <select name=\"manpath\""
496 		     " aria-label=\"Manual path\">");
497 		for (i = 0; i < (int)req->psz; i++) {
498 			printf("      <option");
499 			if (strcmp(req->q.manpath, req->p[i]) == 0)
500 				printf(" selected=\"selected\"");
501 			printf(">");
502 			html_print(req->p[i]);
503 			puts("</option>");
504 		}
505 		puts("    </select>");
506 	}
507 
508 	puts("  </fieldset>\n"
509 	     "</form>");
510 }
511 
512 static int
513 validate_urifrag(const char *frag)
514 {
515 
516 	while ('\0' != *frag) {
517 		if ( ! (isalnum((unsigned char)*frag) ||
518 		    '-' == *frag || '.' == *frag ||
519 		    '/' == *frag || '_' == *frag))
520 			return 0;
521 		frag++;
522 	}
523 	return 1;
524 }
525 
526 static int
527 validate_manpath(const struct req *req, const char* manpath)
528 {
529 	size_t	 i;
530 
531 	for (i = 0; i < req->psz; i++)
532 		if ( ! strcmp(manpath, req->p[i]))
533 			return 1;
534 
535 	return 0;
536 }
537 
538 static int
539 validate_arch(const char *arch)
540 {
541 	int	 i;
542 
543 	for (i = 0; i < arch_MAX; i++)
544 		if (strcmp(arch, arch_names[i]) == 0)
545 			return 1;
546 
547 	return 0;
548 }
549 
550 static int
551 validate_filename(const char *file)
552 {
553 
554 	if ('.' == file[0] && '/' == file[1])
555 		file += 2;
556 
557 	return ! (strstr(file, "../") || strstr(file, "/..") ||
558 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
559 }
560 
561 static void
562 pg_index(const struct req *req)
563 {
564 	if (resp_begin_html(200, NULL, NULL) == 0)
565 		puts("<header>");
566 	resp_searchform(req, FOCUS_QUERY);
567 	printf("</header>\n"
568 	       "<main>\n"
569 	       "<p role=\"doc-notice\" aria-label=\"Usage\">\n"
570 	       "This web interface is documented in the\n"
571 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
572 	       " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
573 	       "manual, and the\n"
574 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\""
575 	       " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
576 	       "manual explains the query syntax.\n"
577 	       "</p>\n"
578 	       "</main>\n",
579 	       scriptname, *scriptname == '\0' ? "" : "/",
580 	       scriptname, *scriptname == '\0' ? "" : "/");
581 	resp_end_html();
582 }
583 
584 static void
585 pg_noresult(const struct req *req, int code, const char *http_msg,
586     const char *user_msg)
587 {
588 	if (resp_begin_html(code, http_msg, NULL) == 0)
589 		puts("<header>");
590 	resp_searchform(req, FOCUS_QUERY);
591 	puts("</header>");
592 	puts("<main>");
593 	puts("<p role=\"doc-notice\" aria-label=\"No result\">");
594 	puts(user_msg);
595 	puts("</p>");
596 	puts("</main>");
597 	resp_end_html();
598 }
599 
600 static void
601 pg_error_badrequest(const char *msg)
602 {
603 	if (resp_begin_html(400, "Bad Request", NULL))
604 		puts("</header>");
605 	puts("<main>\n"
606 	     "<h1>Bad Request</h1>\n"
607 	     "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
608 	puts(msg);
609 	printf("Try again from the\n"
610 	       "<a href=\"/%s\">main page</a>.\n"
611 	       "</p>\n"
612 	       "</main>\n", scriptname);
613 	resp_end_html();
614 }
615 
616 static void
617 pg_error_internal(void)
618 {
619 	if (resp_begin_html(500, "Internal Server Error", NULL))
620 		puts("</header>");
621 	puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
622 	resp_end_html();
623 }
624 
625 static void
626 pg_redirect(const struct req *req, const char *name)
627 {
628 	printf("Status: 303 See Other\r\n"
629 	    "Location: /");
630 	if (*scriptname != '\0')
631 		printf("%s/", scriptname);
632 	if (strcmp(req->q.manpath, req->p[0]))
633 		printf("%s/", req->q.manpath);
634 	if (req->q.arch != NULL)
635 		printf("%s/", req->q.arch);
636 	http_encode(name);
637 	if (req->q.sec != NULL) {
638 		putchar('.');
639 		http_encode(req->q.sec);
640 	}
641 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
642 }
643 
644 static void
645 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
646 {
647 	char		*arch, *archend;
648 	const char	*sec;
649 	size_t		 i, iuse;
650 	int		 archprio, archpriouse;
651 	int		 prio, priouse;
652 	int		 have_header;
653 
654 	for (i = 0; i < sz; i++) {
655 		if (validate_filename(r[i].file))
656 			continue;
657 		warnx("invalid filename %s in %s database",
658 		    r[i].file, req->q.manpath);
659 		pg_error_internal();
660 		return;
661 	}
662 
663 	if (req->isquery && sz == 1) {
664 		/*
665 		 * If we have just one result, then jump there now
666 		 * without any delay.
667 		 */
668 		printf("Status: 303 See Other\r\n"
669 		    "Location: /");
670 		if (*scriptname != '\0')
671 			printf("%s/", scriptname);
672 		if (strcmp(req->q.manpath, req->p[0]))
673 			printf("%s/", req->q.manpath);
674 		printf("%s\r\n"
675 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
676 		    r[0].file);
677 		return;
678 	}
679 
680 	/*
681 	 * In man(1) mode, show one of the pages
682 	 * even if more than one is found.
683 	 */
684 
685 	iuse = 0;
686 	if (req->q.equal || sz == 1) {
687 		priouse = 20;
688 		archpriouse = 3;
689 		for (i = 0; i < sz; i++) {
690 			sec = r[i].file;
691 			sec += strcspn(sec, "123456789");
692 			if (sec[0] == '\0')
693 				continue;
694 			prio = sec_prios[sec[0] - '1'];
695 			if (sec[1] != '/')
696 				prio += 10;
697 			if (req->q.arch == NULL) {
698 				archprio =
699 				    ((arch = strchr(sec + 1, '/'))
700 					== NULL) ? 3 :
701 				    ((archend = strchr(arch + 1, '/'))
702 					== NULL) ? 0 :
703 				    strncmp(arch, "amd64/",
704 					archend - arch) ? 2 : 1;
705 				if (archprio < archpriouse) {
706 					archpriouse = archprio;
707 					priouse = prio;
708 					iuse = i;
709 					continue;
710 				}
711 				if (archprio > archpriouse)
712 					continue;
713 			}
714 			if (prio >= priouse)
715 				continue;
716 			priouse = prio;
717 			iuse = i;
718 		}
719 		have_header = resp_begin_html(200, NULL, r[iuse].file);
720 	} else
721 		have_header = resp_begin_html(200, NULL, NULL);
722 
723 	if (have_header == 0)
724 		puts("<header>");
725 	resp_searchform(req,
726 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
727 	puts("</header>");
728 
729 	if (sz > 1) {
730 		puts("<nav>");
731 		puts("<table class=\"results\">");
732 		for (i = 0; i < sz; i++) {
733 			printf("  <tr>\n"
734 			       "    <td>"
735 			       "<a class=\"Xr\" href=\"/");
736 			if (*scriptname != '\0')
737 				printf("%s/", scriptname);
738 			if (strcmp(req->q.manpath, req->p[0]))
739 				printf("%s/", req->q.manpath);
740 			printf("%s\">", r[i].file);
741 			html_print(r[i].names);
742 			printf("</a></td>\n"
743 			       "    <td><span class=\"Nd\">");
744 			html_print(r[i].output);
745 			puts("</span></td>\n"
746 			     "  </tr>");
747 		}
748 		puts("</table>");
749 		puts("</nav>");
750 	}
751 
752 	if (req->q.equal || sz == 1) {
753 		puts("<hr>");
754 		resp_show(req, r[iuse].file);
755 	}
756 
757 	resp_end_html();
758 }
759 
760 static void
761 resp_catman(const struct req *req, const char *file)
762 {
763 	FILE		*f;
764 	char		*p;
765 	size_t		 sz;
766 	ssize_t		 len;
767 	int		 i;
768 	int		 italic, bold;
769 
770 	if ((f = fopen(file, "r")) == NULL) {
771 		puts("<p role=\"doc-notice\">\n"
772 		     "  You specified an invalid manual file.\n"
773 		     "</p>");
774 		return;
775 	}
776 
777 	puts("<div class=\"catman\">\n"
778 	     "<pre>");
779 
780 	p = NULL;
781 	sz = 0;
782 
783 	while ((len = getline(&p, &sz, f)) != -1) {
784 		bold = italic = 0;
785 		for (i = 0; i < len - 1; i++) {
786 			/*
787 			 * This means that the catpage is out of state.
788 			 * Ignore it and keep going (although the
789 			 * catpage is bogus).
790 			 */
791 
792 			if ('\b' == p[i] || '\n' == p[i])
793 				continue;
794 
795 			/*
796 			 * Print a regular character.
797 			 * Close out any bold/italic scopes.
798 			 * If we're in back-space mode, make sure we'll
799 			 * have something to enter when we backspace.
800 			 */
801 
802 			if ('\b' != p[i + 1]) {
803 				if (italic)
804 					printf("</i>");
805 				if (bold)
806 					printf("</b>");
807 				italic = bold = 0;
808 				html_putchar(p[i]);
809 				continue;
810 			} else if (i + 2 >= len)
811 				continue;
812 
813 			/* Italic mode. */
814 
815 			if ('_' == p[i]) {
816 				if (bold)
817 					printf("</b>");
818 				if ( ! italic)
819 					printf("<i>");
820 				bold = 0;
821 				italic = 1;
822 				i += 2;
823 				html_putchar(p[i]);
824 				continue;
825 			}
826 
827 			/*
828 			 * Handle funny behaviour troff-isms.
829 			 * These grok'd from the original man2html.c.
830 			 */
831 
832 			if (('+' == p[i] && 'o' == p[i + 2]) ||
833 					('o' == p[i] && '+' == p[i + 2]) ||
834 					('|' == p[i] && '=' == p[i + 2]) ||
835 					('=' == p[i] && '|' == p[i + 2]) ||
836 					('*' == p[i] && '=' == p[i + 2]) ||
837 					('=' == p[i] && '*' == p[i + 2]) ||
838 					('*' == p[i] && '|' == p[i + 2]) ||
839 					('|' == p[i] && '*' == p[i + 2]))  {
840 				if (italic)
841 					printf("</i>");
842 				if (bold)
843 					printf("</b>");
844 				italic = bold = 0;
845 				putchar('*');
846 				i += 2;
847 				continue;
848 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
849 					('-' == p[i] && '|' == p[i + 1]) ||
850 					('+' == p[i] && '-' == p[i + 1]) ||
851 					('-' == p[i] && '+' == p[i + 1]) ||
852 					('+' == p[i] && '|' == p[i + 1]) ||
853 					('|' == p[i] && '+' == p[i + 1]))  {
854 				if (italic)
855 					printf("</i>");
856 				if (bold)
857 					printf("</b>");
858 				italic = bold = 0;
859 				putchar('+');
860 				i += 2;
861 				continue;
862 			}
863 
864 			/* Bold mode. */
865 
866 			if (italic)
867 				printf("</i>");
868 			if ( ! bold)
869 				printf("<b>");
870 			bold = 1;
871 			italic = 0;
872 			i += 2;
873 			html_putchar(p[i]);
874 		}
875 
876 		/*
877 		 * Clean up the last character.
878 		 * We can get to a newline; don't print that.
879 		 */
880 
881 		if (italic)
882 			printf("</i>");
883 		if (bold)
884 			printf("</b>");
885 
886 		if (i == len - 1 && p[i] != '\n')
887 			html_putchar(p[i]);
888 
889 		putchar('\n');
890 	}
891 	free(p);
892 
893 	puts("</pre>\n"
894 	     "</div>");
895 
896 	fclose(f);
897 }
898 
899 static void
900 resp_format(const struct req *req, const char *file)
901 {
902 	struct manoutput conf;
903 	struct mparse	*mp;
904 	struct roff_meta *meta;
905 	void		*vp;
906 	int		 fd;
907 	int		 usepath;
908 
909 	if (-1 == (fd = open(file, O_RDONLY))) {
910 		puts("<p role=\"doc-notice\">\n"
911 		     "  You specified an invalid manual file.\n"
912 		     "</p>");
913 		return;
914 	}
915 
916 	mchars_alloc();
917 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
918 	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
919 	mparse_readfd(mp, fd, file);
920 	close(fd);
921 	meta = mparse_result(mp);
922 
923 	memset(&conf, 0, sizeof(conf));
924 	conf.fragment = 1;
925 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
926 	usepath = strcmp(req->q.manpath, req->p[0]);
927 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
928 	    scriptname, *scriptname == '\0' ? "" : "/",
929 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
930 
931 	vp = html_alloc(&conf);
932 	if (meta->macroset == MACROSET_MDOC)
933 		html_mdoc(vp, meta);
934 	else
935 		html_man(vp, meta);
936 
937 	html_free(vp);
938 	mparse_free(mp);
939 	mchars_free();
940 	free(conf.man);
941 	free(conf.style);
942 }
943 
944 static void
945 resp_show(const struct req *req, const char *file)
946 {
947 
948 	if ('.' == file[0] && '/' == file[1])
949 		file += 2;
950 
951 	if ('c' == *file)
952 		resp_catman(req, file);
953 	else
954 		resp_format(req, file);
955 }
956 
957 static void
958 pg_show(struct req *req, const char *fullpath)
959 {
960 	char		*manpath;
961 	const char	*file;
962 
963 	if ((file = strchr(fullpath, '/')) == NULL) {
964 		pg_error_badrequest(
965 		    "You did not specify a page to show.");
966 		return;
967 	}
968 	manpath = mandoc_strndup(fullpath, file - fullpath);
969 	file++;
970 
971 	if ( ! validate_manpath(req, manpath)) {
972 		pg_error_badrequest(
973 		    "You specified an invalid manpath.");
974 		free(manpath);
975 		return;
976 	}
977 
978 	/*
979 	 * Begin by chdir()ing into the manpath.
980 	 * This way we can pick up the database files, which are
981 	 * relative to the manpath root.
982 	 */
983 
984 	if (chdir(manpath) == -1) {
985 		warn("chdir %s", manpath);
986 		pg_error_internal();
987 		free(manpath);
988 		return;
989 	}
990 	free(manpath);
991 
992 	if ( ! validate_filename(file)) {
993 		pg_error_badrequest(
994 		    "You specified an invalid manual file.");
995 		return;
996 	}
997 
998 	if (resp_begin_html(200, NULL, file) == 0)
999 		puts("<header>");
1000 	resp_searchform(req, FOCUS_NONE);
1001 	puts("</header>");
1002 	resp_show(req, file);
1003 	resp_end_html();
1004 }
1005 
1006 static void
1007 pg_search(const struct req *req)
1008 {
1009 	struct mansearch	  search;
1010 	struct manpaths		  paths;
1011 	struct manpage		 *res;
1012 	char			**argv;
1013 	char			 *query, *rp, *wp;
1014 	size_t			  ressz;
1015 	int			  argc;
1016 
1017 	/*
1018 	 * Begin by chdir()ing into the root of the manpath.
1019 	 * This way we can pick up the database files, which are
1020 	 * relative to the manpath root.
1021 	 */
1022 
1023 	if (chdir(req->q.manpath) == -1) {
1024 		warn("chdir %s", req->q.manpath);
1025 		pg_error_internal();
1026 		return;
1027 	}
1028 
1029 	search.arch = req->q.arch;
1030 	search.sec = req->q.sec;
1031 	search.outkey = "Nd";
1032 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1033 	search.firstmatch = 1;
1034 
1035 	paths.sz = 1;
1036 	paths.paths = mandoc_malloc(sizeof(char *));
1037 	paths.paths[0] = mandoc_strdup(".");
1038 
1039 	/*
1040 	 * Break apart at spaces with backslash-escaping.
1041 	 */
1042 
1043 	argc = 0;
1044 	argv = NULL;
1045 	rp = query = mandoc_strdup(req->q.query);
1046 	for (;;) {
1047 		while (isspace((unsigned char)*rp))
1048 			rp++;
1049 		if (*rp == '\0')
1050 			break;
1051 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1052 		argv[argc++] = wp = rp;
1053 		for (;;) {
1054 			if (isspace((unsigned char)*rp)) {
1055 				*wp = '\0';
1056 				rp++;
1057 				break;
1058 			}
1059 			if (rp[0] == '\\' && rp[1] != '\0')
1060 				rp++;
1061 			if (wp != rp)
1062 				*wp = *rp;
1063 			if (*rp == '\0')
1064 				break;
1065 			wp++;
1066 			rp++;
1067 		}
1068 	}
1069 
1070 	res = NULL;
1071 	ressz = 0;
1072 	if (req->isquery && req->q.equal && argc == 1)
1073 		pg_redirect(req, argv[0]);
1074 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1075 		pg_noresult(req, 400, "Bad Request",
1076 		    "You entered an invalid query.");
1077 	else if (ressz == 0)
1078 		pg_noresult(req, 404, "Not Found", "No results found.");
1079 	else
1080 		pg_searchres(req, res, ressz);
1081 
1082 	free(query);
1083 	mansearch_free(res, ressz);
1084 	free(paths.paths[0]);
1085 	free(paths.paths);
1086 }
1087 
1088 int
1089 main(void)
1090 {
1091 	struct req	 req;
1092 	struct itimerval itimer;
1093 	const char	*path;
1094 	const char	*querystring;
1095 	int		 i;
1096 
1097 #if HAVE_PLEDGE
1098 	/*
1099 	 * The "rpath" pledge could be revoked after mparse_readfd()
1100 	 * if the file descriptor to "/footer.html" would be opened
1101 	 * up front, but it's probably not worth the complication
1102 	 * of the code it would cause: it would require scattering
1103 	 * pledge() calls in multiple low-level resp_*() functions.
1104 	 */
1105 
1106 	if (pledge("stdio rpath", NULL) == -1) {
1107 		warn("pledge");
1108 		pg_error_internal();
1109 		return EXIT_FAILURE;
1110 	}
1111 #endif
1112 
1113 	/* Poor man's ReDoS mitigation. */
1114 
1115 	itimer.it_value.tv_sec = 2;
1116 	itimer.it_value.tv_usec = 0;
1117 	itimer.it_interval.tv_sec = 2;
1118 	itimer.it_interval.tv_usec = 0;
1119 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1120 		warn("setitimer");
1121 		pg_error_internal();
1122 		return EXIT_FAILURE;
1123 	}
1124 
1125 	/*
1126 	 * First we change directory into the MAN_DIR so that
1127 	 * subsequent scanning for manpath directories is rooted
1128 	 * relative to the same position.
1129 	 */
1130 
1131 	if (chdir(MAN_DIR) == -1) {
1132 		warn("MAN_DIR: %s", MAN_DIR);
1133 		pg_error_internal();
1134 		return EXIT_FAILURE;
1135 	}
1136 
1137 	memset(&req, 0, sizeof(struct req));
1138 	req.q.equal = 1;
1139 	parse_manpath_conf(&req);
1140 
1141 	/* Parse the path info and the query string. */
1142 
1143 	if ((path = getenv("PATH_INFO")) == NULL)
1144 		path = "";
1145 	else if (*path == '/')
1146 		path++;
1147 
1148 	if (*path != '\0') {
1149 		parse_path_info(&req, path);
1150 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1151 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1152 			path = "";
1153 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1154 		parse_query_string(&req, querystring);
1155 
1156 	/* Validate parsed data and add defaults. */
1157 
1158 	if (req.q.manpath == NULL)
1159 		req.q.manpath = mandoc_strdup(req.p[0]);
1160 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1161 		pg_error_badrequest(
1162 		    "You specified an invalid manpath.");
1163 		return EXIT_FAILURE;
1164 	}
1165 
1166 	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1167 		pg_error_badrequest(
1168 		    "You specified an invalid architecture.");
1169 		return EXIT_FAILURE;
1170 	}
1171 
1172 	/* Dispatch to the three different pages. */
1173 
1174 	if ('\0' != *path)
1175 		pg_show(&req, path);
1176 	else if (NULL != req.q.query)
1177 		pg_search(&req);
1178 	else
1179 		pg_index(&req);
1180 
1181 	free(req.q.manpath);
1182 	free(req.q.arch);
1183 	free(req.q.sec);
1184 	free(req.q.query);
1185 	for (i = 0; i < (int)req.psz; i++)
1186 		free(req.p[i]);
1187 	free(req.p);
1188 	return EXIT_SUCCESS;
1189 }
1190 
1191 /*
1192  * Translate PATH_INFO to a query.
1193  */
1194 static void
1195 parse_path_info(struct req *req, const char *path)
1196 {
1197 	const char	*name, *sec, *end;
1198 
1199 	req->isquery = 0;
1200 	req->q.equal = 1;
1201 	req->q.manpath = NULL;
1202 	req->q.arch = NULL;
1203 
1204 	/* Mandatory manual page name. */
1205 	if ((name = strrchr(path, '/')) == NULL)
1206 		name = path;
1207 	else
1208 		name++;
1209 
1210 	/* Optional trailing section. */
1211 	sec = strrchr(name, '.');
1212 	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1213 		req->q.query = mandoc_strndup(name, sec - name - 1);
1214 		req->q.sec = mandoc_strdup(sec);
1215 	} else {
1216 		req->q.query = mandoc_strdup(name);
1217 		req->q.sec = NULL;
1218 	}
1219 
1220 	/* Handle the case of name[.section] only. */
1221 	if (name == path)
1222 		return;
1223 
1224 	/* Optional manpath. */
1225 	end = strchr(path, '/');
1226 	req->q.manpath = mandoc_strndup(path, end - path);
1227 	if (validate_manpath(req, req->q.manpath)) {
1228 		path = end + 1;
1229 		if (name == path)
1230 			return;
1231 	} else {
1232 		free(req->q.manpath);
1233 		req->q.manpath = NULL;
1234 	}
1235 
1236 	/* Optional section. */
1237 	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1238 		path += 3;
1239 		end = strchr(path, '/');
1240 		free(req->q.sec);
1241 		req->q.sec = mandoc_strndup(path, end - path);
1242 		path = end + 1;
1243 		if (name == path)
1244 			return;
1245 	}
1246 
1247 	/* Optional architecture. */
1248 	end = strchr(path, '/');
1249 	if (end + 1 != name) {
1250 		pg_error_badrequest(
1251 		    "You specified too many directory components.");
1252 		exit(EXIT_FAILURE);
1253 	}
1254 	req->q.arch = mandoc_strndup(path, end - path);
1255 	if (validate_arch(req->q.arch) == 0) {
1256 		pg_error_badrequest(
1257 		    "You specified an invalid directory component.");
1258 		exit(EXIT_FAILURE);
1259 	}
1260 }
1261 
1262 /*
1263  * Scan for indexable paths.
1264  */
1265 static void
1266 parse_manpath_conf(struct req *req)
1267 {
1268 	FILE	*fp;
1269 	char	*dp;
1270 	size_t	 dpsz;
1271 	ssize_t	 len;
1272 
1273 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1274 		warn("%s/manpath.conf", MAN_DIR);
1275 		pg_error_internal();
1276 		exit(EXIT_FAILURE);
1277 	}
1278 
1279 	dp = NULL;
1280 	dpsz = 0;
1281 
1282 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1283 		if (dp[len - 1] == '\n')
1284 			dp[--len] = '\0';
1285 		req->p = mandoc_realloc(req->p,
1286 		    (req->psz + 1) * sizeof(char *));
1287 		if ( ! validate_urifrag(dp)) {
1288 			warnx("%s/manpath.conf contains "
1289 			    "unsafe path \"%s\"", MAN_DIR, dp);
1290 			pg_error_internal();
1291 			exit(EXIT_FAILURE);
1292 		}
1293 		if (strchr(dp, '/') != NULL) {
1294 			warnx("%s/manpath.conf contains "
1295 			    "path with slash \"%s\"", MAN_DIR, dp);
1296 			pg_error_internal();
1297 			exit(EXIT_FAILURE);
1298 		}
1299 		req->p[req->psz++] = dp;
1300 		dp = NULL;
1301 		dpsz = 0;
1302 	}
1303 	free(dp);
1304 
1305 	if (req->p == NULL) {
1306 		warnx("%s/manpath.conf is empty", MAN_DIR);
1307 		pg_error_internal();
1308 		exit(EXIT_FAILURE);
1309 	}
1310 }
1311