xref: /freebsd/contrib/mandoc/cgi.c (revision c1c95add8c80843ba15d784f95c361d795b1f593)
1  /* $Id: cgi.c,v 1.181 2023/04/28 19:11:03 schwarze Exp $ */
2  /*
3   * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de>
4   * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5   * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6   *
7   * Permission to use, copy, modify, and distribute this software for any
8   * purpose with or without fee is hereby granted, provided that the above
9   * copyright notice and this permission notice appear in all copies.
10   *
11   * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12   * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13   * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14   * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15   * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16   * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18   *
19   * Implementation of the man.cgi(8) program.
20   */
21  #include "config.h"
22  
23  #include <sys/types.h>
24  #include <sys/time.h>
25  
26  #include <ctype.h>
27  #if HAVE_ERR
28  #include <err.h>
29  #endif
30  #include <errno.h>
31  #include <fcntl.h>
32  #include <limits.h>
33  #include <stdint.h>
34  #include <stdio.h>
35  #include <stdlib.h>
36  #include <string.h>
37  #include <unistd.h>
38  
39  #include "mandoc_aux.h"
40  #include "mandoc.h"
41  #include "roff.h"
42  #include "mdoc.h"
43  #include "man.h"
44  #include "mandoc_parse.h"
45  #include "main.h"
46  #include "manconf.h"
47  #include "mansearch.h"
48  #include "cgi.h"
49  
50  /*
51   * A query as passed to the search function.
52   */
53  struct	query {
54  	char		*manpath; /* desired manual directory */
55  	char		*arch; /* architecture */
56  	char		*sec; /* manual section */
57  	char		*query; /* unparsed query expression */
58  	int		 equal; /* match whole names, not substrings */
59  };
60  
61  struct	req {
62  	struct query	  q;
63  	char		**p; /* array of available manpaths */
64  	size_t		  psz; /* number of available manpaths */
65  	int		  isquery; /* QUERY_STRING used, not PATH_INFO */
66  };
67  
68  enum	focus {
69  	FOCUS_NONE = 0,
70  	FOCUS_QUERY
71  };
72  
73  static	void		 html_print(const char *);
74  static	void		 html_putchar(char);
75  static	int		 http_decode(char *);
76  static	void		 http_encode(const char *);
77  static	void		 parse_manpath_conf(struct req *);
78  static	void		 parse_path_info(struct req *, const char *);
79  static	void		 parse_query_string(struct req *, const char *);
80  static	void		 pg_error_badrequest(const char *);
81  static	void		 pg_error_internal(void);
82  static	void		 pg_index(const struct req *);
83  static	void		 pg_noresult(const struct req *, int, const char *,
84  				const char *);
85  static	void		 pg_redirect(const struct req *, const char *);
86  static	void		 pg_search(const struct req *);
87  static	void		 pg_searchres(const struct req *,
88  				struct manpage *, size_t);
89  static	void		 pg_show(struct req *, const char *);
90  static	int		 resp_begin_html(int, const char *, const char *);
91  static	void		 resp_begin_http(int, const char *);
92  static	void		 resp_catman(const struct req *, const char *);
93  static	int		 resp_copy(const char *, const char *);
94  static	void		 resp_end_html(void);
95  static	void		 resp_format(const struct req *, const char *);
96  static	void		 resp_searchform(const struct req *, enum focus);
97  static	void		 resp_show(const struct req *, const char *);
98  static	void		 set_query_attr(char **, char **);
99  static	int		 validate_arch(const char *);
100  static	int		 validate_filename(const char *);
101  static	int		 validate_manpath(const struct req *, const char *);
102  static	int		 validate_urifrag(const char *);
103  
104  static	const char	 *scriptname = SCRIPT_NAME;
105  
106  static	const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
107  static	const char *const sec_numbers[] = {
108      "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
109  };
110  static	const char *const sec_names[] = {
111      "All Sections",
112      "1 - General Commands",
113      "2 - System Calls",
114      "3 - Library Functions",
115      "3p - Perl Library",
116      "4 - Device Drivers",
117      "5 - File Formats",
118      "6 - Games",
119      "7 - Miscellaneous Information",
120      "8 - System Manager\'s Manual",
121      "9 - Kernel Developer\'s Manual"
122  };
123  static	const int sec_MAX = sizeof(sec_names) / sizeof(char *);
124  
125  static	const char *const arch_names[] = {
126      "amd64",       "alpha",       "armv7",       "arm64",
127      "hppa",        "i386",        "landisk",     "loongson",
128      "luna88k",     "macppc",      "mips64",      "octeon",
129      "powerpc64",   "riscv64",     "sparc64",
130  
131      "amiga",       "arc",         "armish",      "arm32",
132      "atari",       "aviion",      "beagle",      "cats",
133      "hppa64",      "hp300",
134      "ia64",        "mac68k",      "mvme68k",     "mvme88k",
135      "mvmeppc",     "palm",        "pc532",       "pegasos",
136      "pmax",        "powerpc",     "sgi",         "socppc",
137      "solbourne",   "sparc",
138      "sun3",        "vax",         "wgrisc",      "x68k",
139      "zaurus"
140  };
141  static	const int arch_MAX = sizeof(arch_names) / sizeof(char *);
142  
143  /*
144   * Print a character, escaping HTML along the way.
145   * This will pass non-ASCII straight to output: be warned!
146   */
147  static void
html_putchar(char c)148  html_putchar(char c)
149  {
150  
151  	switch (c) {
152  	case '"':
153  		printf("&quot;");
154  		break;
155  	case '&':
156  		printf("&amp;");
157  		break;
158  	case '>':
159  		printf("&gt;");
160  		break;
161  	case '<':
162  		printf("&lt;");
163  		break;
164  	default:
165  		putchar((unsigned char)c);
166  		break;
167  	}
168  }
169  
170  /*
171   * Call through to html_putchar().
172   * Accepts NULL strings.
173   */
174  static void
html_print(const char * p)175  html_print(const char *p)
176  {
177  
178  	if (NULL == p)
179  		return;
180  	while ('\0' != *p)
181  		html_putchar(*p++);
182  }
183  
184  /*
185   * Transfer the responsibility for the allocated string *val
186   * to the query structure.
187   */
188  static void
set_query_attr(char ** attr,char ** val)189  set_query_attr(char **attr, char **val)
190  {
191  
192  	free(*attr);
193  	if (**val == '\0') {
194  		*attr = NULL;
195  		free(*val);
196  	} else
197  		*attr = *val;
198  	*val = NULL;
199  }
200  
201  /*
202   * Parse the QUERY_STRING for key-value pairs
203   * and store the values into the query structure.
204   */
205  static void
parse_query_string(struct req * req,const char * qs)206  parse_query_string(struct req *req, const char *qs)
207  {
208  	char		*key, *val;
209  	size_t		 keysz, valsz;
210  
211  	req->isquery	= 1;
212  	req->q.manpath	= NULL;
213  	req->q.arch	= NULL;
214  	req->q.sec	= NULL;
215  	req->q.query	= NULL;
216  	req->q.equal	= 1;
217  
218  	key = val = NULL;
219  	while (*qs != '\0') {
220  
221  		/* Parse one key. */
222  
223  		keysz = strcspn(qs, "=;&");
224  		key = mandoc_strndup(qs, keysz);
225  		qs += keysz;
226  		if (*qs != '=')
227  			goto next;
228  
229  		/* Parse one value. */
230  
231  		valsz = strcspn(++qs, ";&");
232  		val = mandoc_strndup(qs, valsz);
233  		qs += valsz;
234  
235  		/* Decode and catch encoding errors. */
236  
237  		if ( ! (http_decode(key) && http_decode(val)))
238  			goto next;
239  
240  		/* Handle key-value pairs. */
241  
242  		if ( ! strcmp(key, "query"))
243  			set_query_attr(&req->q.query, &val);
244  
245  		else if ( ! strcmp(key, "apropos"))
246  			req->q.equal = !strcmp(val, "0");
247  
248  		else if ( ! strcmp(key, "manpath")) {
249  #ifdef COMPAT_OLDURI
250  			if ( ! strncmp(val, "OpenBSD ", 8)) {
251  				val[7] = '-';
252  				if ('C' == val[8])
253  					val[8] = 'c';
254  			}
255  #endif
256  			set_query_attr(&req->q.manpath, &val);
257  		}
258  
259  		else if ( ! (strcmp(key, "sec")
260  #ifdef COMPAT_OLDURI
261  		    && strcmp(key, "sektion")
262  #endif
263  		    )) {
264  			if ( ! strcmp(val, "0"))
265  				*val = '\0';
266  			set_query_attr(&req->q.sec, &val);
267  		}
268  
269  		else if ( ! strcmp(key, "arch")) {
270  			if ( ! strcmp(val, "default"))
271  				*val = '\0';
272  			set_query_attr(&req->q.arch, &val);
273  		}
274  
275  		/*
276  		 * The key must be freed in any case.
277  		 * The val may have been handed over to the query
278  		 * structure, in which case it is now NULL.
279  		 */
280  next:
281  		free(key);
282  		key = NULL;
283  		free(val);
284  		val = NULL;
285  
286  		if (*qs != '\0')
287  			qs++;
288  	}
289  }
290  
291  /*
292   * HTTP-decode a string.  The standard explanation is that this turns
293   * "%4e+foo" into "n foo" in the regular way.  This is done in-place
294   * over the allocated string.
295   */
296  static int
http_decode(char * p)297  http_decode(char *p)
298  {
299  	char             hex[3];
300  	char		*q;
301  	int              c;
302  
303  	hex[2] = '\0';
304  
305  	q = p;
306  	for ( ; '\0' != *p; p++, q++) {
307  		if ('%' == *p) {
308  			if ('\0' == (hex[0] = *(p + 1)))
309  				return 0;
310  			if ('\0' == (hex[1] = *(p + 2)))
311  				return 0;
312  			if (1 != sscanf(hex, "%x", &c))
313  				return 0;
314  			if ('\0' == c)
315  				return 0;
316  
317  			*q = (char)c;
318  			p += 2;
319  		} else
320  			*q = '+' == *p ? ' ' : *p;
321  	}
322  
323  	*q = '\0';
324  	return 1;
325  }
326  
327  static void
http_encode(const char * p)328  http_encode(const char *p)
329  {
330  	for (; *p != '\0'; p++) {
331  		if (isalnum((unsigned char)*p) == 0 &&
332  		    strchr("-._~", *p) == NULL)
333  			printf("%%%2.2X", (unsigned char)*p);
334  		else
335  			putchar(*p);
336  	}
337  }
338  
339  static void
resp_begin_http(int code,const char * msg)340  resp_begin_http(int code, const char *msg)
341  {
342  
343  	if (200 != code)
344  		printf("Status: %d %s\r\n", code, msg);
345  
346  	printf("Content-Type: text/html; charset=utf-8\r\n"
347  	     "Cache-Control: no-cache\r\n"
348  	     "Content-Security-Policy: default-src 'none'; "
349  	     "style-src 'self' 'unsafe-inline'\r\n"
350  	     "Pragma: no-cache\r\n"
351  	     "\r\n");
352  
353  	fflush(stdout);
354  }
355  
356  static int
resp_copy(const char * element,const char * filename)357  resp_copy(const char *element, const char *filename)
358  {
359  	char	 buf[4096];
360  	ssize_t	 sz;
361  	int	 fd;
362  
363  	if ((fd = open(filename, O_RDONLY)) == -1)
364  		return 0;
365  
366  	if (element != NULL)
367  		printf("<%s>\n", element);
368  	fflush(stdout);
369  	while ((sz = read(fd, buf, sizeof(buf))) > 0)
370  		write(STDOUT_FILENO, buf, sz);
371  	close(fd);
372  	return 1;
373  }
374  
375  static int
resp_begin_html(int code,const char * msg,const char * file)376  resp_begin_html(int code, const char *msg, const char *file)
377  {
378  	const char	*name, *sec, *cp;
379  	int		 namesz, secsz;
380  
381  	resp_begin_http(code, msg);
382  
383  	printf("<!DOCTYPE html>\n"
384  	       "<html>\n"
385  	       "<head>\n"
386  	       "  <meta charset=\"UTF-8\"/>\n"
387  	       "  <meta name=\"viewport\""
388  		      " content=\"width=device-width, initial-scale=1.0\">\n"
389  	       "  <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
390  	       " type=\"text/css\" media=\"all\">\n"
391  	       "  <title>",
392  	       CSS_DIR);
393  	if (file != NULL) {
394  		cp = strrchr(file, '/');
395  		name = cp == NULL ? file : cp + 1;
396  		cp = strrchr(name, '.');
397  		namesz = cp == NULL ? strlen(name) : cp - name;
398  		sec = NULL;
399  		if (cp != NULL && cp[1] != '0') {
400  			sec = cp + 1;
401  			secsz = strlen(sec);
402  		} else if (name - file > 1) {
403  			for (cp = name - 2; cp >= file; cp--) {
404  				if (*cp < '1' || *cp > '9')
405  					continue;
406  				sec = cp;
407  				secsz = name - cp - 1;
408  				break;
409  			}
410  		}
411  		printf("%.*s", namesz, name);
412  		if (sec != NULL)
413  			printf("(%.*s)", secsz, sec);
414  		fputs(" - ", stdout);
415  	}
416  	printf("%s</title>\n"
417  	       "</head>\n"
418  	       "<body>\n",
419  	       CUSTOMIZE_TITLE);
420  
421  	return resp_copy("header", MAN_DIR "/header.html");
422  }
423  
424  static void
resp_end_html(void)425  resp_end_html(void)
426  {
427  	if (resp_copy("footer", MAN_DIR "/footer.html"))
428  		puts("</footer>");
429  
430  	puts("</body>\n"
431  	     "</html>");
432  }
433  
434  static void
resp_searchform(const struct req * req,enum focus focus)435  resp_searchform(const struct req *req, enum focus focus)
436  {
437  	int		 i;
438  
439  	printf("<form role=\"search\" action=\"/%s\" method=\"get\" "
440  	       "autocomplete=\"off\" autocapitalize=\"none\">\n"
441  	       "  <fieldset>\n"
442  	       "    <legend>Manual Page Search Parameters</legend>\n",
443  	       scriptname);
444  
445  	/* Write query input box. */
446  
447  	printf("    <label>Search query:\n"
448  	       "      <input type=\"search\" name=\"query\" value=\"");
449  	if (req->q.query != NULL)
450  		html_print(req->q.query);
451  	printf("\" size=\"40\"");
452  	if (focus == FOCUS_QUERY)
453  		printf(" autofocus");
454  	puts(">\n    </label>");
455  
456  	/* Write submission buttons. */
457  
458  	printf(	"    <button type=\"submit\" name=\"apropos\" value=\"0\">"
459  		"man</button>\n"
460  		"    <button type=\"submit\" name=\"apropos\" value=\"1\">"
461  		"apropos</button>\n"
462  		"    <br/>\n");
463  
464  	/* Write section selector. */
465  
466  	puts("    <select name=\"sec\" aria-label=\"Manual section\">");
467  	for (i = 0; i < sec_MAX; i++) {
468  		printf("      <option value=\"%s\"", sec_numbers[i]);
469  		if (NULL != req->q.sec &&
470  		    0 == strcmp(sec_numbers[i], req->q.sec))
471  			printf(" selected=\"selected\"");
472  		printf(">%s</option>\n", sec_names[i]);
473  	}
474  	puts("    </select>");
475  
476  	/* Write architecture selector. */
477  
478  	printf(	"    <select name=\"arch\" aria-label=\"CPU architecture\">\n"
479  		"      <option value=\"default\"");
480  	if (NULL == req->q.arch)
481  		printf(" selected=\"selected\"");
482  	puts(">All Architectures</option>");
483  	for (i = 0; i < arch_MAX; i++) {
484  		printf("      <option");
485  		if (NULL != req->q.arch &&
486  		    0 == strcmp(arch_names[i], req->q.arch))
487  			printf(" selected=\"selected\"");
488  		printf(">%s</option>\n", arch_names[i]);
489  	}
490  	puts("    </select>");
491  
492  	/* Write manpath selector. */
493  
494  	if (req->psz > 1) {
495  		puts("    <select name=\"manpath\""
496  		     " aria-label=\"Manual path\">");
497  		for (i = 0; i < (int)req->psz; i++) {
498  			printf("      <option");
499  			if (strcmp(req->q.manpath, req->p[i]) == 0)
500  				printf(" selected=\"selected\"");
501  			printf(">");
502  			html_print(req->p[i]);
503  			puts("</option>");
504  		}
505  		puts("    </select>");
506  	}
507  
508  	puts("  </fieldset>\n"
509  	     "</form>");
510  }
511  
512  static int
validate_urifrag(const char * frag)513  validate_urifrag(const char *frag)
514  {
515  
516  	while ('\0' != *frag) {
517  		if ( ! (isalnum((unsigned char)*frag) ||
518  		    '-' == *frag || '.' == *frag ||
519  		    '/' == *frag || '_' == *frag))
520  			return 0;
521  		frag++;
522  	}
523  	return 1;
524  }
525  
526  static int
validate_manpath(const struct req * req,const char * manpath)527  validate_manpath(const struct req *req, const char* manpath)
528  {
529  	size_t	 i;
530  
531  	for (i = 0; i < req->psz; i++)
532  		if ( ! strcmp(manpath, req->p[i]))
533  			return 1;
534  
535  	return 0;
536  }
537  
538  static int
validate_arch(const char * arch)539  validate_arch(const char *arch)
540  {
541  	int	 i;
542  
543  	for (i = 0; i < arch_MAX; i++)
544  		if (strcmp(arch, arch_names[i]) == 0)
545  			return 1;
546  
547  	return 0;
548  }
549  
550  static int
validate_filename(const char * file)551  validate_filename(const char *file)
552  {
553  
554  	if ('.' == file[0] && '/' == file[1])
555  		file += 2;
556  
557  	return ! (strstr(file, "../") || strstr(file, "/..") ||
558  	    (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
559  }
560  
561  static void
pg_index(const struct req * req)562  pg_index(const struct req *req)
563  {
564  	if (resp_begin_html(200, NULL, NULL) == 0)
565  		puts("<header>");
566  	resp_searchform(req, FOCUS_QUERY);
567  	printf("</header>\n"
568  	       "<main>\n"
569  	       "<p role=\"doc-notice\" aria-label=\"Usage\">\n"
570  	       "This web interface is documented in the\n"
571  	       "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
572  	       " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
573  	       "manual, and the\n"
574  	       "<a class=\"Xr\" href=\"/%s%sapropos.1\""
575  	       " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
576  	       "manual explains the query syntax.\n"
577  	       "</p>\n"
578  	       "</main>\n",
579  	       scriptname, *scriptname == '\0' ? "" : "/",
580  	       scriptname, *scriptname == '\0' ? "" : "/");
581  	resp_end_html();
582  }
583  
584  static void
pg_noresult(const struct req * req,int code,const char * http_msg,const char * user_msg)585  pg_noresult(const struct req *req, int code, const char *http_msg,
586      const char *user_msg)
587  {
588  	if (resp_begin_html(code, http_msg, NULL) == 0)
589  		puts("<header>");
590  	resp_searchform(req, FOCUS_QUERY);
591  	puts("</header>");
592  	puts("<main>");
593  	puts("<p role=\"doc-notice\" aria-label=\"No result\">");
594  	puts(user_msg);
595  	puts("</p>");
596  	puts("</main>");
597  	resp_end_html();
598  }
599  
600  static void
pg_error_badrequest(const char * msg)601  pg_error_badrequest(const char *msg)
602  {
603  	if (resp_begin_html(400, "Bad Request", NULL))
604  		puts("</header>");
605  	puts("<main>\n"
606  	     "<h1>Bad Request</h1>\n"
607  	     "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
608  	puts(msg);
609  	printf("Try again from the\n"
610  	       "<a href=\"/%s\">main page</a>.\n"
611  	       "</p>\n"
612  	       "</main>\n", scriptname);
613  	resp_end_html();
614  }
615  
616  static void
pg_error_internal(void)617  pg_error_internal(void)
618  {
619  	if (resp_begin_html(500, "Internal Server Error", NULL))
620  		puts("</header>");
621  	puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
622  	resp_end_html();
623  }
624  
625  static void
pg_redirect(const struct req * req,const char * name)626  pg_redirect(const struct req *req, const char *name)
627  {
628  	printf("Status: 303 See Other\r\n"
629  	    "Location: /");
630  	if (*scriptname != '\0')
631  		printf("%s/", scriptname);
632  	if (strcmp(req->q.manpath, req->p[0]))
633  		printf("%s/", req->q.manpath);
634  	if (req->q.arch != NULL)
635  		printf("%s/", req->q.arch);
636  	http_encode(name);
637  	if (req->q.sec != NULL) {
638  		putchar('.');
639  		http_encode(req->q.sec);
640  	}
641  	printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
642  }
643  
644  static void
pg_searchres(const struct req * req,struct manpage * r,size_t sz)645  pg_searchres(const struct req *req, struct manpage *r, size_t sz)
646  {
647  	char		*arch, *archend;
648  	const char	*sec;
649  	size_t		 i, iuse;
650  	int		 archprio, archpriouse;
651  	int		 prio, priouse;
652  	int		 have_header;
653  
654  	for (i = 0; i < sz; i++) {
655  		if (validate_filename(r[i].file))
656  			continue;
657  		warnx("invalid filename %s in %s database",
658  		    r[i].file, req->q.manpath);
659  		pg_error_internal();
660  		return;
661  	}
662  
663  	if (req->isquery && sz == 1) {
664  		/*
665  		 * If we have just one result, then jump there now
666  		 * without any delay.
667  		 */
668  		printf("Status: 303 See Other\r\n"
669  		    "Location: /");
670  		if (*scriptname != '\0')
671  			printf("%s/", scriptname);
672  		if (strcmp(req->q.manpath, req->p[0]))
673  			printf("%s/", req->q.manpath);
674  		printf("%s\r\n"
675  		    "Content-Type: text/html; charset=utf-8\r\n\r\n",
676  		    r[0].file);
677  		return;
678  	}
679  
680  	/*
681  	 * In man(1) mode, show one of the pages
682  	 * even if more than one is found.
683  	 */
684  
685  	iuse = 0;
686  	if (req->q.equal || sz == 1) {
687  		priouse = 20;
688  		archpriouse = 3;
689  		for (i = 0; i < sz; i++) {
690  			sec = r[i].file;
691  			sec += strcspn(sec, "123456789");
692  			if (sec[0] == '\0')
693  				continue;
694  			prio = sec_prios[sec[0] - '1'];
695  			if (sec[1] != '/')
696  				prio += 10;
697  			if (req->q.arch == NULL) {
698  				archprio =
699  				    ((arch = strchr(sec + 1, '/'))
700  					== NULL) ? 3 :
701  				    ((archend = strchr(arch + 1, '/'))
702  					== NULL) ? 0 :
703  				    strncmp(arch, "amd64/",
704  					archend - arch) ? 2 : 1;
705  				if (archprio < archpriouse) {
706  					archpriouse = archprio;
707  					priouse = prio;
708  					iuse = i;
709  					continue;
710  				}
711  				if (archprio > archpriouse)
712  					continue;
713  			}
714  			if (prio >= priouse)
715  				continue;
716  			priouse = prio;
717  			iuse = i;
718  		}
719  		have_header = resp_begin_html(200, NULL, r[iuse].file);
720  	} else
721  		have_header = resp_begin_html(200, NULL, NULL);
722  
723  	if (have_header == 0)
724  		puts("<header>");
725  	resp_searchform(req,
726  	    req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
727  	puts("</header>");
728  
729  	if (sz > 1) {
730  		puts("<nav>");
731  		puts("<table class=\"results\">");
732  		for (i = 0; i < sz; i++) {
733  			printf("  <tr>\n"
734  			       "    <td>"
735  			       "<a class=\"Xr\" href=\"/");
736  			if (*scriptname != '\0')
737  				printf("%s/", scriptname);
738  			if (strcmp(req->q.manpath, req->p[0]))
739  				printf("%s/", req->q.manpath);
740  			printf("%s\">", r[i].file);
741  			html_print(r[i].names);
742  			printf("</a></td>\n"
743  			       "    <td><span class=\"Nd\">");
744  			html_print(r[i].output);
745  			puts("</span></td>\n"
746  			     "  </tr>");
747  		}
748  		puts("</table>");
749  		puts("</nav>");
750  	}
751  
752  	if (req->q.equal || sz == 1) {
753  		puts("<hr>");
754  		resp_show(req, r[iuse].file);
755  	}
756  
757  	resp_end_html();
758  }
759  
760  static void
resp_catman(const struct req * req,const char * file)761  resp_catman(const struct req *req, const char *file)
762  {
763  	FILE		*f;
764  	char		*p;
765  	size_t		 sz;
766  	ssize_t		 len;
767  	int		 i;
768  	int		 italic, bold;
769  
770  	if ((f = fopen(file, "r")) == NULL) {
771  		puts("<p role=\"doc-notice\">\n"
772  		     "  You specified an invalid manual file.\n"
773  		     "</p>");
774  		return;
775  	}
776  
777  	puts("<div class=\"catman\">\n"
778  	     "<pre>");
779  
780  	p = NULL;
781  	sz = 0;
782  
783  	while ((len = getline(&p, &sz, f)) != -1) {
784  		bold = italic = 0;
785  		for (i = 0; i < len - 1; i++) {
786  			/*
787  			 * This means that the catpage is out of state.
788  			 * Ignore it and keep going (although the
789  			 * catpage is bogus).
790  			 */
791  
792  			if ('\b' == p[i] || '\n' == p[i])
793  				continue;
794  
795  			/*
796  			 * Print a regular character.
797  			 * Close out any bold/italic scopes.
798  			 * If we're in back-space mode, make sure we'll
799  			 * have something to enter when we backspace.
800  			 */
801  
802  			if ('\b' != p[i + 1]) {
803  				if (italic)
804  					printf("</i>");
805  				if (bold)
806  					printf("</b>");
807  				italic = bold = 0;
808  				html_putchar(p[i]);
809  				continue;
810  			} else if (i + 2 >= len)
811  				continue;
812  
813  			/* Italic mode. */
814  
815  			if ('_' == p[i]) {
816  				if (bold)
817  					printf("</b>");
818  				if ( ! italic)
819  					printf("<i>");
820  				bold = 0;
821  				italic = 1;
822  				i += 2;
823  				html_putchar(p[i]);
824  				continue;
825  			}
826  
827  			/*
828  			 * Handle funny behaviour troff-isms.
829  			 * These grok'd from the original man2html.c.
830  			 */
831  
832  			if (('+' == p[i] && 'o' == p[i + 2]) ||
833  					('o' == p[i] && '+' == p[i + 2]) ||
834  					('|' == p[i] && '=' == p[i + 2]) ||
835  					('=' == p[i] && '|' == p[i + 2]) ||
836  					('*' == p[i] && '=' == p[i + 2]) ||
837  					('=' == p[i] && '*' == p[i + 2]) ||
838  					('*' == p[i] && '|' == p[i + 2]) ||
839  					('|' == p[i] && '*' == p[i + 2]))  {
840  				if (italic)
841  					printf("</i>");
842  				if (bold)
843  					printf("</b>");
844  				italic = bold = 0;
845  				putchar('*');
846  				i += 2;
847  				continue;
848  			} else if (('|' == p[i] && '-' == p[i + 2]) ||
849  					('-' == p[i] && '|' == p[i + 1]) ||
850  					('+' == p[i] && '-' == p[i + 1]) ||
851  					('-' == p[i] && '+' == p[i + 1]) ||
852  					('+' == p[i] && '|' == p[i + 1]) ||
853  					('|' == p[i] && '+' == p[i + 1]))  {
854  				if (italic)
855  					printf("</i>");
856  				if (bold)
857  					printf("</b>");
858  				italic = bold = 0;
859  				putchar('+');
860  				i += 2;
861  				continue;
862  			}
863  
864  			/* Bold mode. */
865  
866  			if (italic)
867  				printf("</i>");
868  			if ( ! bold)
869  				printf("<b>");
870  			bold = 1;
871  			italic = 0;
872  			i += 2;
873  			html_putchar(p[i]);
874  		}
875  
876  		/*
877  		 * Clean up the last character.
878  		 * We can get to a newline; don't print that.
879  		 */
880  
881  		if (italic)
882  			printf("</i>");
883  		if (bold)
884  			printf("</b>");
885  
886  		if (i == len - 1 && p[i] != '\n')
887  			html_putchar(p[i]);
888  
889  		putchar('\n');
890  	}
891  	free(p);
892  
893  	puts("</pre>\n"
894  	     "</div>");
895  
896  	fclose(f);
897  }
898  
899  static void
resp_format(const struct req * req,const char * file)900  resp_format(const struct req *req, const char *file)
901  {
902  	struct manoutput conf;
903  	struct mparse	*mp;
904  	struct roff_meta *meta;
905  	void		*vp;
906  	int		 fd;
907  	int		 usepath;
908  
909  	if (-1 == (fd = open(file, O_RDONLY))) {
910  		puts("<p role=\"doc-notice\">\n"
911  		     "  You specified an invalid manual file.\n"
912  		     "</p>");
913  		return;
914  	}
915  
916  	mchars_alloc();
917  	mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
918  	    MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
919  	mparse_readfd(mp, fd, file);
920  	close(fd);
921  	meta = mparse_result(mp);
922  
923  	memset(&conf, 0, sizeof(conf));
924  	conf.fragment = 1;
925  	conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
926  	usepath = strcmp(req->q.manpath, req->p[0]);
927  	mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
928  	    scriptname, *scriptname == '\0' ? "" : "/",
929  	    usepath ? req->q.manpath : "", usepath ? "/" : "");
930  
931  	vp = html_alloc(&conf);
932  	if (meta->macroset == MACROSET_MDOC)
933  		html_mdoc(vp, meta);
934  	else
935  		html_man(vp, meta);
936  
937  	html_free(vp);
938  	mparse_free(mp);
939  	mchars_free();
940  	free(conf.man);
941  	free(conf.style);
942  }
943  
944  static void
resp_show(const struct req * req,const char * file)945  resp_show(const struct req *req, const char *file)
946  {
947  
948  	if ('.' == file[0] && '/' == file[1])
949  		file += 2;
950  
951  	if ('c' == *file)
952  		resp_catman(req, file);
953  	else
954  		resp_format(req, file);
955  }
956  
957  static void
pg_show(struct req * req,const char * fullpath)958  pg_show(struct req *req, const char *fullpath)
959  {
960  	char		*manpath;
961  	const char	*file;
962  
963  	if ((file = strchr(fullpath, '/')) == NULL) {
964  		pg_error_badrequest(
965  		    "You did not specify a page to show.");
966  		return;
967  	}
968  	manpath = mandoc_strndup(fullpath, file - fullpath);
969  	file++;
970  
971  	if ( ! validate_manpath(req, manpath)) {
972  		pg_error_badrequest(
973  		    "You specified an invalid manpath.");
974  		free(manpath);
975  		return;
976  	}
977  
978  	/*
979  	 * Begin by chdir()ing into the manpath.
980  	 * This way we can pick up the database files, which are
981  	 * relative to the manpath root.
982  	 */
983  
984  	if (chdir(manpath) == -1) {
985  		warn("chdir %s", manpath);
986  		pg_error_internal();
987  		free(manpath);
988  		return;
989  	}
990  	free(manpath);
991  
992  	if ( ! validate_filename(file)) {
993  		pg_error_badrequest(
994  		    "You specified an invalid manual file.");
995  		return;
996  	}
997  
998  	if (resp_begin_html(200, NULL, file) == 0)
999  		puts("<header>");
1000  	resp_searchform(req, FOCUS_NONE);
1001  	puts("</header>");
1002  	resp_show(req, file);
1003  	resp_end_html();
1004  }
1005  
1006  static void
pg_search(const struct req * req)1007  pg_search(const struct req *req)
1008  {
1009  	struct mansearch	  search;
1010  	struct manpaths		  paths;
1011  	struct manpage		 *res;
1012  	char			**argv;
1013  	char			 *query, *rp, *wp;
1014  	size_t			  ressz;
1015  	int			  argc;
1016  
1017  	/*
1018  	 * Begin by chdir()ing into the root of the manpath.
1019  	 * This way we can pick up the database files, which are
1020  	 * relative to the manpath root.
1021  	 */
1022  
1023  	if (chdir(req->q.manpath) == -1) {
1024  		warn("chdir %s", req->q.manpath);
1025  		pg_error_internal();
1026  		return;
1027  	}
1028  
1029  	search.arch = req->q.arch;
1030  	search.sec = req->q.sec;
1031  	search.outkey = "Nd";
1032  	search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1033  	search.firstmatch = 1;
1034  
1035  	paths.sz = 1;
1036  	paths.paths = mandoc_malloc(sizeof(char *));
1037  	paths.paths[0] = mandoc_strdup(".");
1038  
1039  	/*
1040  	 * Break apart at spaces with backslash-escaping.
1041  	 */
1042  
1043  	argc = 0;
1044  	argv = NULL;
1045  	rp = query = mandoc_strdup(req->q.query);
1046  	for (;;) {
1047  		while (isspace((unsigned char)*rp))
1048  			rp++;
1049  		if (*rp == '\0')
1050  			break;
1051  		argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1052  		argv[argc++] = wp = rp;
1053  		for (;;) {
1054  			if (isspace((unsigned char)*rp)) {
1055  				*wp = '\0';
1056  				rp++;
1057  				break;
1058  			}
1059  			if (rp[0] == '\\' && rp[1] != '\0')
1060  				rp++;
1061  			if (wp != rp)
1062  				*wp = *rp;
1063  			if (*rp == '\0')
1064  				break;
1065  			wp++;
1066  			rp++;
1067  		}
1068  	}
1069  
1070  	res = NULL;
1071  	ressz = 0;
1072  	if (req->isquery && req->q.equal && argc == 1)
1073  		pg_redirect(req, argv[0]);
1074  	else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1075  		pg_noresult(req, 400, "Bad Request",
1076  		    "You entered an invalid query.");
1077  	else if (ressz == 0)
1078  		pg_noresult(req, 404, "Not Found", "No results found.");
1079  	else
1080  		pg_searchres(req, res, ressz);
1081  
1082  	free(query);
1083  	mansearch_free(res, ressz);
1084  	free(paths.paths[0]);
1085  	free(paths.paths);
1086  }
1087  
1088  int
main(void)1089  main(void)
1090  {
1091  	struct req	 req;
1092  	struct itimerval itimer;
1093  	const char	*path;
1094  	const char	*querystring;
1095  	int		 i;
1096  
1097  #if HAVE_PLEDGE
1098  	/*
1099  	 * The "rpath" pledge could be revoked after mparse_readfd()
1100  	 * if the file descriptor to "/footer.html" would be opened
1101  	 * up front, but it's probably not worth the complication
1102  	 * of the code it would cause: it would require scattering
1103  	 * pledge() calls in multiple low-level resp_*() functions.
1104  	 */
1105  
1106  	if (pledge("stdio rpath", NULL) == -1) {
1107  		warn("pledge");
1108  		pg_error_internal();
1109  		return EXIT_FAILURE;
1110  	}
1111  #endif
1112  
1113  	/* Poor man's ReDoS mitigation. */
1114  
1115  	itimer.it_value.tv_sec = 2;
1116  	itimer.it_value.tv_usec = 0;
1117  	itimer.it_interval.tv_sec = 2;
1118  	itimer.it_interval.tv_usec = 0;
1119  	if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1120  		warn("setitimer");
1121  		pg_error_internal();
1122  		return EXIT_FAILURE;
1123  	}
1124  
1125  	/*
1126  	 * First we change directory into the MAN_DIR so that
1127  	 * subsequent scanning for manpath directories is rooted
1128  	 * relative to the same position.
1129  	 */
1130  
1131  	if (chdir(MAN_DIR) == -1) {
1132  		warn("MAN_DIR: %s", MAN_DIR);
1133  		pg_error_internal();
1134  		return EXIT_FAILURE;
1135  	}
1136  
1137  	memset(&req, 0, sizeof(struct req));
1138  	req.q.equal = 1;
1139  	parse_manpath_conf(&req);
1140  
1141  	/* Parse the path info and the query string. */
1142  
1143  	if ((path = getenv("PATH_INFO")) == NULL)
1144  		path = "";
1145  	else if (*path == '/')
1146  		path++;
1147  
1148  	if (*path != '\0') {
1149  		parse_path_info(&req, path);
1150  		if (req.q.manpath == NULL || req.q.sec == NULL ||
1151  		    *req.q.query == '\0' || access(path, F_OK) == -1)
1152  			path = "";
1153  	} else if ((querystring = getenv("QUERY_STRING")) != NULL)
1154  		parse_query_string(&req, querystring);
1155  
1156  	/* Validate parsed data and add defaults. */
1157  
1158  	if (req.q.manpath == NULL)
1159  		req.q.manpath = mandoc_strdup(req.p[0]);
1160  	else if ( ! validate_manpath(&req, req.q.manpath)) {
1161  		pg_error_badrequest(
1162  		    "You specified an invalid manpath.");
1163  		return EXIT_FAILURE;
1164  	}
1165  
1166  	if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1167  		pg_error_badrequest(
1168  		    "You specified an invalid architecture.");
1169  		return EXIT_FAILURE;
1170  	}
1171  
1172  	/* Dispatch to the three different pages. */
1173  
1174  	if ('\0' != *path)
1175  		pg_show(&req, path);
1176  	else if (NULL != req.q.query)
1177  		pg_search(&req);
1178  	else
1179  		pg_index(&req);
1180  
1181  	free(req.q.manpath);
1182  	free(req.q.arch);
1183  	free(req.q.sec);
1184  	free(req.q.query);
1185  	for (i = 0; i < (int)req.psz; i++)
1186  		free(req.p[i]);
1187  	free(req.p);
1188  	return EXIT_SUCCESS;
1189  }
1190  
1191  /*
1192   * Translate PATH_INFO to a query.
1193   */
1194  static void
parse_path_info(struct req * req,const char * path)1195  parse_path_info(struct req *req, const char *path)
1196  {
1197  	const char	*name, *sec, *end;
1198  
1199  	req->isquery = 0;
1200  	req->q.equal = 1;
1201  	req->q.manpath = NULL;
1202  	req->q.arch = NULL;
1203  
1204  	/* Mandatory manual page name. */
1205  	if ((name = strrchr(path, '/')) == NULL)
1206  		name = path;
1207  	else
1208  		name++;
1209  
1210  	/* Optional trailing section. */
1211  	sec = strrchr(name, '.');
1212  	if (sec != NULL && isdigit((unsigned char)*++sec)) {
1213  		req->q.query = mandoc_strndup(name, sec - name - 1);
1214  		req->q.sec = mandoc_strdup(sec);
1215  	} else {
1216  		req->q.query = mandoc_strdup(name);
1217  		req->q.sec = NULL;
1218  	}
1219  
1220  	/* Handle the case of name[.section] only. */
1221  	if (name == path)
1222  		return;
1223  
1224  	/* Optional manpath. */
1225  	end = strchr(path, '/');
1226  	req->q.manpath = mandoc_strndup(path, end - path);
1227  	if (validate_manpath(req, req->q.manpath)) {
1228  		path = end + 1;
1229  		if (name == path)
1230  			return;
1231  	} else {
1232  		free(req->q.manpath);
1233  		req->q.manpath = NULL;
1234  	}
1235  
1236  	/* Optional section. */
1237  	if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1238  		path += 3;
1239  		end = strchr(path, '/');
1240  		free(req->q.sec);
1241  		req->q.sec = mandoc_strndup(path, end - path);
1242  		path = end + 1;
1243  		if (name == path)
1244  			return;
1245  	}
1246  
1247  	/* Optional architecture. */
1248  	end = strchr(path, '/');
1249  	if (end + 1 != name) {
1250  		pg_error_badrequest(
1251  		    "You specified too many directory components.");
1252  		exit(EXIT_FAILURE);
1253  	}
1254  	req->q.arch = mandoc_strndup(path, end - path);
1255  	if (validate_arch(req->q.arch) == 0) {
1256  		pg_error_badrequest(
1257  		    "You specified an invalid directory component.");
1258  		exit(EXIT_FAILURE);
1259  	}
1260  }
1261  
1262  /*
1263   * Scan for indexable paths.
1264   */
1265  static void
parse_manpath_conf(struct req * req)1266  parse_manpath_conf(struct req *req)
1267  {
1268  	FILE	*fp;
1269  	char	*dp;
1270  	size_t	 dpsz;
1271  	ssize_t	 len;
1272  
1273  	if ((fp = fopen("manpath.conf", "r")) == NULL) {
1274  		warn("%s/manpath.conf", MAN_DIR);
1275  		pg_error_internal();
1276  		exit(EXIT_FAILURE);
1277  	}
1278  
1279  	dp = NULL;
1280  	dpsz = 0;
1281  
1282  	while ((len = getline(&dp, &dpsz, fp)) != -1) {
1283  		if (dp[len - 1] == '\n')
1284  			dp[--len] = '\0';
1285  		req->p = mandoc_realloc(req->p,
1286  		    (req->psz + 1) * sizeof(char *));
1287  		if ( ! validate_urifrag(dp)) {
1288  			warnx("%s/manpath.conf contains "
1289  			    "unsafe path \"%s\"", MAN_DIR, dp);
1290  			pg_error_internal();
1291  			exit(EXIT_FAILURE);
1292  		}
1293  		if (strchr(dp, '/') != NULL) {
1294  			warnx("%s/manpath.conf contains "
1295  			    "path with slash \"%s\"", MAN_DIR, dp);
1296  			pg_error_internal();
1297  			exit(EXIT_FAILURE);
1298  		}
1299  		req->p[req->psz++] = dp;
1300  		dp = NULL;
1301  		dpsz = 0;
1302  	}
1303  	free(dp);
1304  
1305  	if (req->p == NULL) {
1306  		warnx("%s/manpath.conf is empty", MAN_DIR);
1307  		pg_error_internal();
1308  		exit(EXIT_FAILURE);
1309  	}
1310  }
1311