xref: /freebsd/contrib/mandoc/cgi.c (revision 6829dae12bb055451fa467da4589c43bd03b1e64)
1 /*	$Id: cgi.c,v 1.158 2018/05/29 20:32:45 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2016, 2017 Ingo Schwarze <schwarze@usta.de>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 #include <sys/time.h>
22 
23 #include <ctype.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <limits.h>
30 #include <stdint.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <unistd.h>
35 
36 #include "mandoc_aux.h"
37 #include "mandoc.h"
38 #include "roff.h"
39 #include "mdoc.h"
40 #include "man.h"
41 #include "main.h"
42 #include "manconf.h"
43 #include "mansearch.h"
44 #include "cgi.h"
45 
46 /*
47  * A query as passed to the search function.
48  */
49 struct	query {
50 	char		*manpath; /* desired manual directory */
51 	char		*arch; /* architecture */
52 	char		*sec; /* manual section */
53 	char		*query; /* unparsed query expression */
54 	int		 equal; /* match whole names, not substrings */
55 };
56 
57 struct	req {
58 	struct query	  q;
59 	char		**p; /* array of available manpaths */
60 	size_t		  psz; /* number of available manpaths */
61 	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
62 };
63 
64 enum	focus {
65 	FOCUS_NONE = 0,
66 	FOCUS_QUERY
67 };
68 
69 static	void		 html_print(const char *);
70 static	void		 html_putchar(char);
71 static	int		 http_decode(char *);
72 static	void		 parse_manpath_conf(struct req *);
73 static	void		 parse_path_info(struct req *req, const char *path);
74 static	void		 parse_query_string(struct req *, const char *);
75 static	void		 pg_error_badrequest(const char *);
76 static	void		 pg_error_internal(void);
77 static	void		 pg_index(const struct req *);
78 static	void		 pg_noresult(const struct req *, const char *);
79 static	void		 pg_redirect(const struct req *, const char *);
80 static	void		 pg_search(const struct req *);
81 static	void		 pg_searchres(const struct req *,
82 				struct manpage *, size_t);
83 static	void		 pg_show(struct req *, const char *);
84 static	void		 resp_begin_html(int, const char *, const char *);
85 static	void		 resp_begin_http(int, const char *);
86 static	void		 resp_catman(const struct req *, const char *);
87 static	void		 resp_copy(const char *);
88 static	void		 resp_end_html(void);
89 static	void		 resp_format(const struct req *, const char *);
90 static	void		 resp_searchform(const struct req *, enum focus);
91 static	void		 resp_show(const struct req *, const char *);
92 static	void		 set_query_attr(char **, char **);
93 static	int		 validate_filename(const char *);
94 static	int		 validate_manpath(const struct req *, const char *);
95 static	int		 validate_urifrag(const char *);
96 
97 static	const char	 *scriptname = SCRIPT_NAME;
98 
99 static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
100 static	const char *const sec_numbers[] = {
101     "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
102 };
103 static	const char *const sec_names[] = {
104     "All Sections",
105     "1 - General Commands",
106     "2 - System Calls",
107     "3 - Library Functions",
108     "3p - Perl Library",
109     "4 - Device Drivers",
110     "5 - File Formats",
111     "6 - Games",
112     "7 - Miscellaneous Information",
113     "8 - System Manager\'s Manual",
114     "9 - Kernel Developer\'s Manual"
115 };
116 static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
117 
118 static	const char *const arch_names[] = {
119     "amd64",       "alpha",       "armv7",	"arm64",
120     "hppa",        "i386",        "landisk",
121     "loongson",    "luna88k",     "macppc",      "mips64",
122     "octeon",      "sgi",         "socppc",      "sparc64",
123     "amiga",       "arc",         "armish",      "arm32",
124     "atari",       "aviion",      "beagle",      "cats",
125     "hppa64",      "hp300",
126     "ia64",        "mac68k",      "mvme68k",     "mvme88k",
127     "mvmeppc",     "palm",        "pc532",       "pegasos",
128     "pmax",        "powerpc",     "solbourne",   "sparc",
129     "sun3",        "vax",         "wgrisc",      "x68k",
130     "zaurus"
131 };
132 static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
133 
134 /*
135  * Print a character, escaping HTML along the way.
136  * This will pass non-ASCII straight to output: be warned!
137  */
138 static void
139 html_putchar(char c)
140 {
141 
142 	switch (c) {
143 	case '"':
144 		printf("&quot;");
145 		break;
146 	case '&':
147 		printf("&amp;");
148 		break;
149 	case '>':
150 		printf("&gt;");
151 		break;
152 	case '<':
153 		printf("&lt;");
154 		break;
155 	default:
156 		putchar((unsigned char)c);
157 		break;
158 	}
159 }
160 
161 /*
162  * Call through to html_putchar().
163  * Accepts NULL strings.
164  */
165 static void
166 html_print(const char *p)
167 {
168 
169 	if (NULL == p)
170 		return;
171 	while ('\0' != *p)
172 		html_putchar(*p++);
173 }
174 
175 /*
176  * Transfer the responsibility for the allocated string *val
177  * to the query structure.
178  */
179 static void
180 set_query_attr(char **attr, char **val)
181 {
182 
183 	free(*attr);
184 	if (**val == '\0') {
185 		*attr = NULL;
186 		free(*val);
187 	} else
188 		*attr = *val;
189 	*val = NULL;
190 }
191 
192 /*
193  * Parse the QUERY_STRING for key-value pairs
194  * and store the values into the query structure.
195  */
196 static void
197 parse_query_string(struct req *req, const char *qs)
198 {
199 	char		*key, *val;
200 	size_t		 keysz, valsz;
201 
202 	req->isquery	= 1;
203 	req->q.manpath	= NULL;
204 	req->q.arch	= NULL;
205 	req->q.sec	= NULL;
206 	req->q.query	= NULL;
207 	req->q.equal	= 1;
208 
209 	key = val = NULL;
210 	while (*qs != '\0') {
211 
212 		/* Parse one key. */
213 
214 		keysz = strcspn(qs, "=;&");
215 		key = mandoc_strndup(qs, keysz);
216 		qs += keysz;
217 		if (*qs != '=')
218 			goto next;
219 
220 		/* Parse one value. */
221 
222 		valsz = strcspn(++qs, ";&");
223 		val = mandoc_strndup(qs, valsz);
224 		qs += valsz;
225 
226 		/* Decode and catch encoding errors. */
227 
228 		if ( ! (http_decode(key) && http_decode(val)))
229 			goto next;
230 
231 		/* Handle key-value pairs. */
232 
233 		if ( ! strcmp(key, "query"))
234 			set_query_attr(&req->q.query, &val);
235 
236 		else if ( ! strcmp(key, "apropos"))
237 			req->q.equal = !strcmp(val, "0");
238 
239 		else if ( ! strcmp(key, "manpath")) {
240 #ifdef COMPAT_OLDURI
241 			if ( ! strncmp(val, "OpenBSD ", 8)) {
242 				val[7] = '-';
243 				if ('C' == val[8])
244 					val[8] = 'c';
245 			}
246 #endif
247 			set_query_attr(&req->q.manpath, &val);
248 		}
249 
250 		else if ( ! (strcmp(key, "sec")
251 #ifdef COMPAT_OLDURI
252 		    && strcmp(key, "sektion")
253 #endif
254 		    )) {
255 			if ( ! strcmp(val, "0"))
256 				*val = '\0';
257 			set_query_attr(&req->q.sec, &val);
258 		}
259 
260 		else if ( ! strcmp(key, "arch")) {
261 			if ( ! strcmp(val, "default"))
262 				*val = '\0';
263 			set_query_attr(&req->q.arch, &val);
264 		}
265 
266 		/*
267 		 * The key must be freed in any case.
268 		 * The val may have been handed over to the query
269 		 * structure, in which case it is now NULL.
270 		 */
271 next:
272 		free(key);
273 		key = NULL;
274 		free(val);
275 		val = NULL;
276 
277 		if (*qs != '\0')
278 			qs++;
279 	}
280 }
281 
282 /*
283  * HTTP-decode a string.  The standard explanation is that this turns
284  * "%4e+foo" into "n foo" in the regular way.  This is done in-place
285  * over the allocated string.
286  */
287 static int
288 http_decode(char *p)
289 {
290 	char             hex[3];
291 	char		*q;
292 	int              c;
293 
294 	hex[2] = '\0';
295 
296 	q = p;
297 	for ( ; '\0' != *p; p++, q++) {
298 		if ('%' == *p) {
299 			if ('\0' == (hex[0] = *(p + 1)))
300 				return 0;
301 			if ('\0' == (hex[1] = *(p + 2)))
302 				return 0;
303 			if (1 != sscanf(hex, "%x", &c))
304 				return 0;
305 			if ('\0' == c)
306 				return 0;
307 
308 			*q = (char)c;
309 			p += 2;
310 		} else
311 			*q = '+' == *p ? ' ' : *p;
312 	}
313 
314 	*q = '\0';
315 	return 1;
316 }
317 
318 static void
319 resp_begin_http(int code, const char *msg)
320 {
321 
322 	if (200 != code)
323 		printf("Status: %d %s\r\n", code, msg);
324 
325 	printf("Content-Type: text/html; charset=utf-8\r\n"
326 	     "Cache-Control: no-cache\r\n"
327 	     "Pragma: no-cache\r\n"
328 	     "\r\n");
329 
330 	fflush(stdout);
331 }
332 
333 static void
334 resp_copy(const char *filename)
335 {
336 	char	 buf[4096];
337 	ssize_t	 sz;
338 	int	 fd;
339 
340 	if ((fd = open(filename, O_RDONLY)) != -1) {
341 		fflush(stdout);
342 		while ((sz = read(fd, buf, sizeof(buf))) > 0)
343 			write(STDOUT_FILENO, buf, sz);
344 		close(fd);
345 	}
346 }
347 
348 static void
349 resp_begin_html(int code, const char *msg, const char *file)
350 {
351 	char	*cp;
352 
353 	resp_begin_http(code, msg);
354 
355 	printf("<!DOCTYPE html>\n"
356 	       "<html>\n"
357 	       "<head>\n"
358 	       "  <meta charset=\"UTF-8\"/>\n"
359 	       "  <meta name=\"viewport\""
360 		      " content=\"width=device-width, initial-scale=1.0\">\n"
361 	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
362 	       " type=\"text/css\" media=\"all\">\n"
363 	       "  <title>",
364 	       CSS_DIR);
365 	if (file != NULL) {
366 		if ((cp = strrchr(file, '/')) != NULL)
367 			file = cp + 1;
368 		if ((cp = strrchr(file, '.')) != NULL) {
369 			printf("%.*s(%s) - ", (int)(cp - file), file, cp + 1);
370 		} else
371 			printf("%s - ", file);
372 	}
373 	printf("%s</title>\n"
374 	       "</head>\n"
375 	       "<body>\n",
376 	       CUSTOMIZE_TITLE);
377 
378 	resp_copy(MAN_DIR "/header.html");
379 }
380 
381 static void
382 resp_end_html(void)
383 {
384 
385 	resp_copy(MAN_DIR "/footer.html");
386 
387 	puts("</body>\n"
388 	     "</html>");
389 }
390 
391 static void
392 resp_searchform(const struct req *req, enum focus focus)
393 {
394 	int		 i;
395 
396 	printf("<form action=\"/%s\" method=\"get\">\n"
397 	       "  <fieldset>\n"
398 	       "    <legend>Manual Page Search Parameters</legend>\n",
399 	       scriptname);
400 
401 	/* Write query input box. */
402 
403 	printf("    <input type=\"search\" name=\"query\" value=\"");
404 	if (req->q.query != NULL)
405 		html_print(req->q.query);
406 	printf( "\" size=\"40\"");
407 	if (focus == FOCUS_QUERY)
408 		printf(" autofocus");
409 	puts(">");
410 
411 	/* Write submission buttons. */
412 
413 	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
414 		"man</button>\n"
415 		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
416 		"apropos</button>\n"
417 		"    <br/>\n");
418 
419 	/* Write section selector. */
420 
421 	puts("    <select name=\"sec\">");
422 	for (i = 0; i < sec_MAX; i++) {
423 		printf("      <option value=\"%s\"", sec_numbers[i]);
424 		if (NULL != req->q.sec &&
425 		    0 == strcmp(sec_numbers[i], req->q.sec))
426 			printf(" selected=\"selected\"");
427 		printf(">%s</option>\n", sec_names[i]);
428 	}
429 	puts("    </select>");
430 
431 	/* Write architecture selector. */
432 
433 	printf(	"    <select name=\"arch\">\n"
434 		"      <option value=\"default\"");
435 	if (NULL == req->q.arch)
436 		printf(" selected=\"selected\"");
437 	puts(">All Architectures</option>");
438 	for (i = 0; i < arch_MAX; i++) {
439 		printf("      <option");
440 		if (NULL != req->q.arch &&
441 		    0 == strcmp(arch_names[i], req->q.arch))
442 			printf(" selected=\"selected\"");
443 		printf(">%s</option>\n", arch_names[i]);
444 	}
445 	puts("    </select>");
446 
447 	/* Write manpath selector. */
448 
449 	if (req->psz > 1) {
450 		puts("    <select name=\"manpath\">");
451 		for (i = 0; i < (int)req->psz; i++) {
452 			printf("      <option");
453 			if (strcmp(req->q.manpath, req->p[i]) == 0)
454 				printf(" selected=\"selected\"");
455 			printf(">");
456 			html_print(req->p[i]);
457 			puts("</option>");
458 		}
459 		puts("    </select>");
460 	}
461 
462 	puts("  </fieldset>\n"
463 	     "</form>");
464 }
465 
466 static int
467 validate_urifrag(const char *frag)
468 {
469 
470 	while ('\0' != *frag) {
471 		if ( ! (isalnum((unsigned char)*frag) ||
472 		    '-' == *frag || '.' == *frag ||
473 		    '/' == *frag || '_' == *frag))
474 			return 0;
475 		frag++;
476 	}
477 	return 1;
478 }
479 
480 static int
481 validate_manpath(const struct req *req, const char* manpath)
482 {
483 	size_t	 i;
484 
485 	for (i = 0; i < req->psz; i++)
486 		if ( ! strcmp(manpath, req->p[i]))
487 			return 1;
488 
489 	return 0;
490 }
491 
492 static int
493 validate_filename(const char *file)
494 {
495 
496 	if ('.' == file[0] && '/' == file[1])
497 		file += 2;
498 
499 	return ! (strstr(file, "../") || strstr(file, "/..") ||
500 	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
501 }
502 
503 static void
504 pg_index(const struct req *req)
505 {
506 
507 	resp_begin_html(200, NULL, NULL);
508 	resp_searchform(req, FOCUS_QUERY);
509 	printf("<p>\n"
510 	       "This web interface is documented in the\n"
511 	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\">man.cgi(8)</a>\n"
512 	       "manual, and the\n"
513 	       "<a class=\"Xr\" href=\"/%s%sapropos.1\">apropos(1)</a>\n"
514 	       "manual explains the query syntax.\n"
515 	       "</p>\n",
516 	       scriptname, *scriptname == '\0' ? "" : "/",
517 	       scriptname, *scriptname == '\0' ? "" : "/");
518 	resp_end_html();
519 }
520 
521 static void
522 pg_noresult(const struct req *req, const char *msg)
523 {
524 	resp_begin_html(200, NULL, NULL);
525 	resp_searchform(req, FOCUS_QUERY);
526 	puts("<p>");
527 	puts(msg);
528 	puts("</p>");
529 	resp_end_html();
530 }
531 
532 static void
533 pg_error_badrequest(const char *msg)
534 {
535 
536 	resp_begin_html(400, "Bad Request", NULL);
537 	puts("<h1>Bad Request</h1>\n"
538 	     "<p>\n");
539 	puts(msg);
540 	printf("Try again from the\n"
541 	       "<a href=\"/%s\">main page</a>.\n"
542 	       "</p>", scriptname);
543 	resp_end_html();
544 }
545 
546 static void
547 pg_error_internal(void)
548 {
549 	resp_begin_html(500, "Internal Server Error", NULL);
550 	puts("<p>Internal Server Error</p>");
551 	resp_end_html();
552 }
553 
554 static void
555 pg_redirect(const struct req *req, const char *name)
556 {
557 	printf("Status: 303 See Other\r\n"
558 	    "Location: /");
559 	if (*scriptname != '\0')
560 		printf("%s/", scriptname);
561 	if (strcmp(req->q.manpath, req->p[0]))
562 		printf("%s/", req->q.manpath);
563 	if (req->q.arch != NULL)
564 		printf("%s/", req->q.arch);
565 	printf("%s", name);
566 	if (req->q.sec != NULL)
567 		printf(".%s", req->q.sec);
568 	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
569 }
570 
571 static void
572 pg_searchres(const struct req *req, struct manpage *r, size_t sz)
573 {
574 	char		*arch, *archend;
575 	const char	*sec;
576 	size_t		 i, iuse;
577 	int		 archprio, archpriouse;
578 	int		 prio, priouse;
579 
580 	for (i = 0; i < sz; i++) {
581 		if (validate_filename(r[i].file))
582 			continue;
583 		warnx("invalid filename %s in %s database",
584 		    r[i].file, req->q.manpath);
585 		pg_error_internal();
586 		return;
587 	}
588 
589 	if (req->isquery && sz == 1) {
590 		/*
591 		 * If we have just one result, then jump there now
592 		 * without any delay.
593 		 */
594 		printf("Status: 303 See Other\r\n"
595 		    "Location: /");
596 		if (*scriptname != '\0')
597 			printf("%s/", scriptname);
598 		if (strcmp(req->q.manpath, req->p[0]))
599 			printf("%s/", req->q.manpath);
600 		printf("%s\r\n"
601 		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
602 		    r[0].file);
603 		return;
604 	}
605 
606 	/*
607 	 * In man(1) mode, show one of the pages
608 	 * even if more than one is found.
609 	 */
610 
611 	iuse = 0;
612 	if (req->q.equal || sz == 1) {
613 		priouse = 20;
614 		archpriouse = 3;
615 		for (i = 0; i < sz; i++) {
616 			sec = r[i].file;
617 			sec += strcspn(sec, "123456789");
618 			if (sec[0] == '\0')
619 				continue;
620 			prio = sec_prios[sec[0] - '1'];
621 			if (sec[1] != '/')
622 				prio += 10;
623 			if (req->q.arch == NULL) {
624 				archprio =
625 				    ((arch = strchr(sec + 1, '/'))
626 					== NULL) ? 3 :
627 				    ((archend = strchr(arch + 1, '/'))
628 					== NULL) ? 0 :
629 				    strncmp(arch, "amd64/",
630 					archend - arch) ? 2 : 1;
631 				if (archprio < archpriouse) {
632 					archpriouse = archprio;
633 					priouse = prio;
634 					iuse = i;
635 					continue;
636 				}
637 				if (archprio > archpriouse)
638 					continue;
639 			}
640 			if (prio >= priouse)
641 				continue;
642 			priouse = prio;
643 			iuse = i;
644 		}
645 		resp_begin_html(200, NULL, r[iuse].file);
646 	} else
647 		resp_begin_html(200, NULL, NULL);
648 
649 	resp_searchform(req,
650 	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
651 
652 	if (sz > 1) {
653 		puts("<table class=\"results\">");
654 		for (i = 0; i < sz; i++) {
655 			printf("  <tr>\n"
656 			       "    <td>"
657 			       "<a class=\"Xr\" href=\"/");
658 			if (*scriptname != '\0')
659 				printf("%s/", scriptname);
660 			if (strcmp(req->q.manpath, req->p[0]))
661 				printf("%s/", req->q.manpath);
662 			printf("%s\">", r[i].file);
663 			html_print(r[i].names);
664 			printf("</a></td>\n"
665 			       "    <td><span class=\"Nd\">");
666 			html_print(r[i].output);
667 			puts("</span></td>\n"
668 			     "  </tr>");
669 		}
670 		puts("</table>");
671 	}
672 
673 	if (req->q.equal || sz == 1) {
674 		puts("<hr>");
675 		resp_show(req, r[iuse].file);
676 	}
677 
678 	resp_end_html();
679 }
680 
681 static void
682 resp_catman(const struct req *req, const char *file)
683 {
684 	FILE		*f;
685 	char		*p;
686 	size_t		 sz;
687 	ssize_t		 len;
688 	int		 i;
689 	int		 italic, bold;
690 
691 	if ((f = fopen(file, "r")) == NULL) {
692 		puts("<p>You specified an invalid manual file.</p>");
693 		return;
694 	}
695 
696 	puts("<div class=\"catman\">\n"
697 	     "<pre>");
698 
699 	p = NULL;
700 	sz = 0;
701 
702 	while ((len = getline(&p, &sz, f)) != -1) {
703 		bold = italic = 0;
704 		for (i = 0; i < len - 1; i++) {
705 			/*
706 			 * This means that the catpage is out of state.
707 			 * Ignore it and keep going (although the
708 			 * catpage is bogus).
709 			 */
710 
711 			if ('\b' == p[i] || '\n' == p[i])
712 				continue;
713 
714 			/*
715 			 * Print a regular character.
716 			 * Close out any bold/italic scopes.
717 			 * If we're in back-space mode, make sure we'll
718 			 * have something to enter when we backspace.
719 			 */
720 
721 			if ('\b' != p[i + 1]) {
722 				if (italic)
723 					printf("</i>");
724 				if (bold)
725 					printf("</b>");
726 				italic = bold = 0;
727 				html_putchar(p[i]);
728 				continue;
729 			} else if (i + 2 >= len)
730 				continue;
731 
732 			/* Italic mode. */
733 
734 			if ('_' == p[i]) {
735 				if (bold)
736 					printf("</b>");
737 				if ( ! italic)
738 					printf("<i>");
739 				bold = 0;
740 				italic = 1;
741 				i += 2;
742 				html_putchar(p[i]);
743 				continue;
744 			}
745 
746 			/*
747 			 * Handle funny behaviour troff-isms.
748 			 * These grok'd from the original man2html.c.
749 			 */
750 
751 			if (('+' == p[i] && 'o' == p[i + 2]) ||
752 					('o' == p[i] && '+' == p[i + 2]) ||
753 					('|' == p[i] && '=' == p[i + 2]) ||
754 					('=' == p[i] && '|' == p[i + 2]) ||
755 					('*' == p[i] && '=' == p[i + 2]) ||
756 					('=' == p[i] && '*' == p[i + 2]) ||
757 					('*' == p[i] && '|' == p[i + 2]) ||
758 					('|' == p[i] && '*' == p[i + 2]))  {
759 				if (italic)
760 					printf("</i>");
761 				if (bold)
762 					printf("</b>");
763 				italic = bold = 0;
764 				putchar('*');
765 				i += 2;
766 				continue;
767 			} else if (('|' == p[i] && '-' == p[i + 2]) ||
768 					('-' == p[i] && '|' == p[i + 1]) ||
769 					('+' == p[i] && '-' == p[i + 1]) ||
770 					('-' == p[i] && '+' == p[i + 1]) ||
771 					('+' == p[i] && '|' == p[i + 1]) ||
772 					('|' == p[i] && '+' == p[i + 1]))  {
773 				if (italic)
774 					printf("</i>");
775 				if (bold)
776 					printf("</b>");
777 				italic = bold = 0;
778 				putchar('+');
779 				i += 2;
780 				continue;
781 			}
782 
783 			/* Bold mode. */
784 
785 			if (italic)
786 				printf("</i>");
787 			if ( ! bold)
788 				printf("<b>");
789 			bold = 1;
790 			italic = 0;
791 			i += 2;
792 			html_putchar(p[i]);
793 		}
794 
795 		/*
796 		 * Clean up the last character.
797 		 * We can get to a newline; don't print that.
798 		 */
799 
800 		if (italic)
801 			printf("</i>");
802 		if (bold)
803 			printf("</b>");
804 
805 		if (i == len - 1 && p[i] != '\n')
806 			html_putchar(p[i]);
807 
808 		putchar('\n');
809 	}
810 	free(p);
811 
812 	puts("</pre>\n"
813 	     "</div>");
814 
815 	fclose(f);
816 }
817 
818 static void
819 resp_format(const struct req *req, const char *file)
820 {
821 	struct manoutput conf;
822 	struct mparse	*mp;
823 	struct roff_man	*man;
824 	void		*vp;
825 	int		 fd;
826 	int		 usepath;
827 
828 	if (-1 == (fd = open(file, O_RDONLY, 0))) {
829 		puts("<p>You specified an invalid manual file.</p>");
830 		return;
831 	}
832 
833 	mchars_alloc();
834 	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1,
835 	    MANDOCERR_MAX, NULL, MANDOC_OS_OTHER, req->q.manpath);
836 	mparse_readfd(mp, fd, file);
837 	close(fd);
838 
839 	memset(&conf, 0, sizeof(conf));
840 	conf.fragment = 1;
841 	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
842 	usepath = strcmp(req->q.manpath, req->p[0]);
843 	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
844 	    scriptname, *scriptname == '\0' ? "" : "/",
845 	    usepath ? req->q.manpath : "", usepath ? "/" : "");
846 
847 	mparse_result(mp, &man, NULL);
848 	if (man == NULL) {
849 		warnx("fatal mandoc error: %s/%s", req->q.manpath, file);
850 		pg_error_internal();
851 		mparse_free(mp);
852 		mchars_free();
853 		return;
854 	}
855 
856 	vp = html_alloc(&conf);
857 
858 	if (man->macroset == MACROSET_MDOC) {
859 		mdoc_validate(man);
860 		html_mdoc(vp, man);
861 	} else {
862 		man_validate(man);
863 		html_man(vp, man);
864 	}
865 
866 	html_free(vp);
867 	mparse_free(mp);
868 	mchars_free();
869 	free(conf.man);
870 	free(conf.style);
871 }
872 
873 static void
874 resp_show(const struct req *req, const char *file)
875 {
876 
877 	if ('.' == file[0] && '/' == file[1])
878 		file += 2;
879 
880 	if ('c' == *file)
881 		resp_catman(req, file);
882 	else
883 		resp_format(req, file);
884 }
885 
886 static void
887 pg_show(struct req *req, const char *fullpath)
888 {
889 	char		*manpath;
890 	const char	*file;
891 
892 	if ((file = strchr(fullpath, '/')) == NULL) {
893 		pg_error_badrequest(
894 		    "You did not specify a page to show.");
895 		return;
896 	}
897 	manpath = mandoc_strndup(fullpath, file - fullpath);
898 	file++;
899 
900 	if ( ! validate_manpath(req, manpath)) {
901 		pg_error_badrequest(
902 		    "You specified an invalid manpath.");
903 		free(manpath);
904 		return;
905 	}
906 
907 	/*
908 	 * Begin by chdir()ing into the manpath.
909 	 * This way we can pick up the database files, which are
910 	 * relative to the manpath root.
911 	 */
912 
913 	if (chdir(manpath) == -1) {
914 		warn("chdir %s", manpath);
915 		pg_error_internal();
916 		free(manpath);
917 		return;
918 	}
919 	free(manpath);
920 
921 	if ( ! validate_filename(file)) {
922 		pg_error_badrequest(
923 		    "You specified an invalid manual file.");
924 		return;
925 	}
926 
927 	resp_begin_html(200, NULL, file);
928 	resp_searchform(req, FOCUS_NONE);
929 	resp_show(req, file);
930 	resp_end_html();
931 }
932 
933 static void
934 pg_search(const struct req *req)
935 {
936 	struct mansearch	  search;
937 	struct manpaths		  paths;
938 	struct manpage		 *res;
939 	char			**argv;
940 	char			 *query, *rp, *wp;
941 	size_t			  ressz;
942 	int			  argc;
943 
944 	/*
945 	 * Begin by chdir()ing into the root of the manpath.
946 	 * This way we can pick up the database files, which are
947 	 * relative to the manpath root.
948 	 */
949 
950 	if (chdir(req->q.manpath) == -1) {
951 		warn("chdir %s", req->q.manpath);
952 		pg_error_internal();
953 		return;
954 	}
955 
956 	search.arch = req->q.arch;
957 	search.sec = req->q.sec;
958 	search.outkey = "Nd";
959 	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
960 	search.firstmatch = 1;
961 
962 	paths.sz = 1;
963 	paths.paths = mandoc_malloc(sizeof(char *));
964 	paths.paths[0] = mandoc_strdup(".");
965 
966 	/*
967 	 * Break apart at spaces with backslash-escaping.
968 	 */
969 
970 	argc = 0;
971 	argv = NULL;
972 	rp = query = mandoc_strdup(req->q.query);
973 	for (;;) {
974 		while (isspace((unsigned char)*rp))
975 			rp++;
976 		if (*rp == '\0')
977 			break;
978 		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
979 		argv[argc++] = wp = rp;
980 		for (;;) {
981 			if (isspace((unsigned char)*rp)) {
982 				*wp = '\0';
983 				rp++;
984 				break;
985 			}
986 			if (rp[0] == '\\' && rp[1] != '\0')
987 				rp++;
988 			if (wp != rp)
989 				*wp = *rp;
990 			if (*rp == '\0')
991 				break;
992 			wp++;
993 			rp++;
994 		}
995 	}
996 
997 	res = NULL;
998 	ressz = 0;
999 	if (req->isquery && req->q.equal && argc == 1)
1000 		pg_redirect(req, argv[0]);
1001 	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1002 		pg_noresult(req, "You entered an invalid query.");
1003 	else if (ressz == 0)
1004 		pg_noresult(req, "No results found.");
1005 	else
1006 		pg_searchres(req, res, ressz);
1007 
1008 	free(query);
1009 	mansearch_free(res, ressz);
1010 	free(paths.paths[0]);
1011 	free(paths.paths);
1012 }
1013 
1014 int
1015 main(void)
1016 {
1017 	struct req	 req;
1018 	struct itimerval itimer;
1019 	const char	*path;
1020 	const char	*querystring;
1021 	int		 i;
1022 
1023 #if HAVE_PLEDGE
1024 	/*
1025 	 * The "rpath" pledge could be revoked after mparse_readfd()
1026 	 * if the file desciptor to "/footer.html" would be opened
1027 	 * up front, but it's probably not worth the complication
1028 	 * of the code it would cause: it would require scattering
1029 	 * pledge() calls in multiple low-level resp_*() functions.
1030 	 */
1031 
1032 	if (pledge("stdio rpath", NULL) == -1) {
1033 		warn("pledge");
1034 		pg_error_internal();
1035 		return EXIT_FAILURE;
1036 	}
1037 #endif
1038 
1039 	/* Poor man's ReDoS mitigation. */
1040 
1041 	itimer.it_value.tv_sec = 2;
1042 	itimer.it_value.tv_usec = 0;
1043 	itimer.it_interval.tv_sec = 2;
1044 	itimer.it_interval.tv_usec = 0;
1045 	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1046 		warn("setitimer");
1047 		pg_error_internal();
1048 		return EXIT_FAILURE;
1049 	}
1050 
1051 	/*
1052 	 * First we change directory into the MAN_DIR so that
1053 	 * subsequent scanning for manpath directories is rooted
1054 	 * relative to the same position.
1055 	 */
1056 
1057 	if (chdir(MAN_DIR) == -1) {
1058 		warn("MAN_DIR: %s", MAN_DIR);
1059 		pg_error_internal();
1060 		return EXIT_FAILURE;
1061 	}
1062 
1063 	memset(&req, 0, sizeof(struct req));
1064 	req.q.equal = 1;
1065 	parse_manpath_conf(&req);
1066 
1067 	/* Parse the path info and the query string. */
1068 
1069 	if ((path = getenv("PATH_INFO")) == NULL)
1070 		path = "";
1071 	else if (*path == '/')
1072 		path++;
1073 
1074 	if (*path != '\0') {
1075 		parse_path_info(&req, path);
1076 		if (req.q.manpath == NULL || req.q.sec == NULL ||
1077 		    *req.q.query == '\0' || access(path, F_OK) == -1)
1078 			path = "";
1079 	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1080 		parse_query_string(&req, querystring);
1081 
1082 	/* Validate parsed data and add defaults. */
1083 
1084 	if (req.q.manpath == NULL)
1085 		req.q.manpath = mandoc_strdup(req.p[0]);
1086 	else if ( ! validate_manpath(&req, req.q.manpath)) {
1087 		pg_error_badrequest(
1088 		    "You specified an invalid manpath.");
1089 		return EXIT_FAILURE;
1090 	}
1091 
1092 	if ( ! (NULL == req.q.arch || validate_urifrag(req.q.arch))) {
1093 		pg_error_badrequest(
1094 		    "You specified an invalid architecture.");
1095 		return EXIT_FAILURE;
1096 	}
1097 
1098 	/* Dispatch to the three different pages. */
1099 
1100 	if ('\0' != *path)
1101 		pg_show(&req, path);
1102 	else if (NULL != req.q.query)
1103 		pg_search(&req);
1104 	else
1105 		pg_index(&req);
1106 
1107 	free(req.q.manpath);
1108 	free(req.q.arch);
1109 	free(req.q.sec);
1110 	free(req.q.query);
1111 	for (i = 0; i < (int)req.psz; i++)
1112 		free(req.p[i]);
1113 	free(req.p);
1114 	return EXIT_SUCCESS;
1115 }
1116 
1117 /*
1118  * If PATH_INFO is not a file name, translate it to a query.
1119  */
1120 static void
1121 parse_path_info(struct req *req, const char *path)
1122 {
1123 	char	*dir[4];
1124 	int	 i;
1125 
1126 	req->isquery = 0;
1127 	req->q.equal = 1;
1128 	req->q.manpath = mandoc_strdup(path);
1129 	req->q.arch = NULL;
1130 
1131 	/* Mandatory manual page name. */
1132 	if ((req->q.query = strrchr(req->q.manpath, '/')) == NULL) {
1133 		req->q.query = req->q.manpath;
1134 		req->q.manpath = NULL;
1135 	} else
1136 		*req->q.query++ = '\0';
1137 
1138 	/* Optional trailing section. */
1139 	if ((req->q.sec = strrchr(req->q.query, '.')) != NULL) {
1140 		if(isdigit((unsigned char)req->q.sec[1])) {
1141 			*req->q.sec++ = '\0';
1142 			req->q.sec = mandoc_strdup(req->q.sec);
1143 		} else
1144 			req->q.sec = NULL;
1145 	}
1146 
1147 	/* Handle the case of name[.section] only. */
1148 	if (req->q.manpath == NULL)
1149 		return;
1150 	req->q.query = mandoc_strdup(req->q.query);
1151 
1152 	/* Split directory components. */
1153 	dir[i = 0] = req->q.manpath;
1154 	while ((dir[i + 1] = strchr(dir[i], '/')) != NULL) {
1155 		if (++i == 3) {
1156 			pg_error_badrequest(
1157 			    "You specified too many directory components.");
1158 			exit(EXIT_FAILURE);
1159 		}
1160 		*dir[i]++ = '\0';
1161 	}
1162 
1163 	/* Optional manpath. */
1164 	if ((i = validate_manpath(req, req->q.manpath)) == 0)
1165 		req->q.manpath = NULL;
1166 	else if (dir[1] == NULL)
1167 		return;
1168 
1169 	/* Optional section. */
1170 	if (strncmp(dir[i], "man", 3) == 0) {
1171 		free(req->q.sec);
1172 		req->q.sec = mandoc_strdup(dir[i++] + 3);
1173 	}
1174 	if (dir[i] == NULL) {
1175 		if (req->q.manpath == NULL)
1176 			free(dir[0]);
1177 		return;
1178 	}
1179 	if (dir[i + 1] != NULL) {
1180 		pg_error_badrequest(
1181 		    "You specified an invalid directory component.");
1182 		exit(EXIT_FAILURE);
1183 	}
1184 
1185 	/* Optional architecture. */
1186 	if (i) {
1187 		req->q.arch = mandoc_strdup(dir[i]);
1188 		if (req->q.manpath == NULL)
1189 			free(dir[0]);
1190 	} else
1191 		req->q.arch = dir[0];
1192 }
1193 
1194 /*
1195  * Scan for indexable paths.
1196  */
1197 static void
1198 parse_manpath_conf(struct req *req)
1199 {
1200 	FILE	*fp;
1201 	char	*dp;
1202 	size_t	 dpsz;
1203 	ssize_t	 len;
1204 
1205 	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1206 		warn("%s/manpath.conf", MAN_DIR);
1207 		pg_error_internal();
1208 		exit(EXIT_FAILURE);
1209 	}
1210 
1211 	dp = NULL;
1212 	dpsz = 0;
1213 
1214 	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1215 		if (dp[len - 1] == '\n')
1216 			dp[--len] = '\0';
1217 		req->p = mandoc_realloc(req->p,
1218 		    (req->psz + 1) * sizeof(char *));
1219 		if ( ! validate_urifrag(dp)) {
1220 			warnx("%s/manpath.conf contains "
1221 			    "unsafe path \"%s\"", MAN_DIR, dp);
1222 			pg_error_internal();
1223 			exit(EXIT_FAILURE);
1224 		}
1225 		if (strchr(dp, '/') != NULL) {
1226 			warnx("%s/manpath.conf contains "
1227 			    "path with slash \"%s\"", MAN_DIR, dp);
1228 			pg_error_internal();
1229 			exit(EXIT_FAILURE);
1230 		}
1231 		req->p[req->psz++] = dp;
1232 		dp = NULL;
1233 		dpsz = 0;
1234 	}
1235 	free(dp);
1236 
1237 	if (req->p == NULL) {
1238 		warnx("%s/manpath.conf is empty", MAN_DIR);
1239 		pg_error_internal();
1240 		exit(EXIT_FAILURE);
1241 	}
1242 }
1243