xref: /freebsd/contrib/mandoc/mansearch.c (revision a63915c2d7ff177ce364488f86eff99949402051)
1 /*	$Id: mansearch.c,v 1.82 2019/07/01 22:56:24 schwarze Exp $ */
2 /*
3  * Copyright (c) 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2013-2018 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/mman.h>
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #if HAVE_ERR
25 #include <err.h>
26 #endif
27 #include <errno.h>
28 #include <fcntl.h>
29 #include <glob.h>
30 #include <limits.h>
31 #include <regex.h>
32 #include <stdio.h>
33 #include <stdint.h>
34 #include <stddef.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <unistd.h>
38 
39 #include "mandoc_aux.h"
40 #include "mandoc_ohash.h"
41 #include "manconf.h"
42 #include "mansearch.h"
43 #include "dbm.h"
44 
45 struct	expr {
46 	/* Used for terms: */
47 	struct dbm_match match;   /* Match type and expression. */
48 	uint64_t	 bits;    /* Type mask. */
49 	/* Used for OR and AND groups: */
50 	struct expr	*next;    /* Next child in the parent group. */
51 	struct expr	*child;   /* First child in this group. */
52 	enum { EXPR_TERM, EXPR_OR, EXPR_AND } type;
53 };
54 
55 const char *const mansearch_keynames[KEY_MAX] = {
56 	"arch",	"sec",	"Xr",	"Ar",	"Fa",	"Fl",	"Dv",	"Fn",
57 	"Ic",	"Pa",	"Cm",	"Li",	"Em",	"Cd",	"Va",	"Ft",
58 	"Tn",	"Er",	"Ev",	"Sy",	"Sh",	"In",	"Ss",	"Ox",
59 	"An",	"Mt",	"St",	"Bx",	"At",	"Nx",	"Fx",	"Lk",
60 	"Ms",	"Bsx",	"Dx",	"Rs",	"Vt",	"Lb",	"Nm",	"Nd"
61 };
62 
63 
64 static	struct ohash	*manmerge(struct expr *, struct ohash *);
65 static	struct ohash	*manmerge_term(struct expr *, struct ohash *);
66 static	struct ohash	*manmerge_or(struct expr *, struct ohash *);
67 static	struct ohash	*manmerge_and(struct expr *, struct ohash *);
68 static	char		*buildnames(const struct dbm_page *);
69 static	char		*buildoutput(size_t, struct dbm_page *);
70 static	size_t		 lstlen(const char *, size_t);
71 static	void		 lstcat(char *, size_t *, const char *, const char *);
72 static	int		 lstmatch(const char *, const char *);
73 static	struct expr	*exprcomp(const struct mansearch *,
74 				int, char *[], int *);
75 static	struct expr	*expr_and(const struct mansearch *,
76 				int, char *[], int *);
77 static	struct expr	*exprterm(const struct mansearch *,
78 				int, char *[], int *);
79 static	void		 exprfree(struct expr *);
80 static	int		 manpage_compare(const void *, const void *);
81 
82 
83 int
mansearch(const struct mansearch * search,const struct manpaths * paths,int argc,char * argv[],struct manpage ** res,size_t * sz)84 mansearch(const struct mansearch *search,
85 		const struct manpaths *paths,
86 		int argc, char *argv[],
87 		struct manpage **res, size_t *sz)
88 {
89 	char		 buf[PATH_MAX];
90 	struct dbm_res	*rp;
91 	struct expr	*e;
92 	struct dbm_page	*page;
93 	struct manpage	*mpage;
94 	struct ohash	*htab;
95 	size_t		 cur, i, maxres, outkey;
96 	unsigned int	 slot;
97 	int		 argi, chdir_status, getcwd_status, im;
98 
99 	argi = 0;
100 	if ((e = exprcomp(search, argc, argv, &argi)) == NULL) {
101 		*sz = 0;
102 		return 0;
103 	}
104 
105 	cur = maxres = 0;
106 	if (res != NULL)
107 		*res = NULL;
108 
109 	outkey = KEY_Nd;
110 	if (search->outkey != NULL)
111 		for (im = 0; im < KEY_MAX; im++)
112 			if (0 == strcasecmp(search->outkey,
113 			    mansearch_keynames[im])) {
114 				outkey = im;
115 				break;
116 			}
117 
118 	/*
119 	 * Remember the original working directory, if possible.
120 	 * This will be needed if the second or a later directory
121 	 * is given as a relative path.
122 	 * Do not error out if the current directory is not
123 	 * searchable: Maybe it won't be needed after all.
124 	 */
125 
126 	if (getcwd(buf, PATH_MAX) == NULL) {
127 		getcwd_status = 0;
128 		(void)strlcpy(buf, strerror(errno), sizeof(buf));
129 	} else
130 		getcwd_status = 1;
131 
132 	/*
133 	 * Loop over the directories (containing databases) for us to
134 	 * search.
135 	 * Don't let missing/bad databases/directories phase us.
136 	 * In each, try to open the resident database and, if it opens,
137 	 * scan it for our match expression.
138 	 */
139 
140 	chdir_status = 0;
141 	for (i = 0; i < paths->sz; i++) {
142 		if (chdir_status && paths->paths[i][0] != '/') {
143 			if ( ! getcwd_status) {
144 				warnx("%s: getcwd: %s", paths->paths[i], buf);
145 				continue;
146 			} else if (chdir(buf) == -1) {
147 				warn("%s", buf);
148 				continue;
149 			}
150 		}
151 		if (chdir(paths->paths[i]) == -1) {
152 			warn("%s", paths->paths[i]);
153 			continue;
154 		}
155 		chdir_status = 1;
156 
157 		if (dbm_open(MANDOC_DB) == -1) {
158 			if (errno != ENOENT)
159 				warn("%s/%s", paths->paths[i], MANDOC_DB);
160 			continue;
161 		}
162 
163 		if ((htab = manmerge(e, NULL)) == NULL) {
164 			dbm_close();
165 			continue;
166 		}
167 
168 		for (rp = ohash_first(htab, &slot); rp != NULL;
169 		    rp = ohash_next(htab, &slot)) {
170 			page = dbm_page_get(rp->page);
171 
172 			if (lstmatch(search->sec, page->sect) == 0 ||
173 			    lstmatch(search->arch, page->arch) == 0 ||
174 			    (search->argmode == ARG_NAME &&
175 			     rp->bits <= (int32_t)(NAME_SYN & NAME_MASK)))
176 				continue;
177 
178 			if (res == NULL) {
179 				cur = 1;
180 				break;
181 			}
182 			if (cur + 1 > maxres) {
183 				maxres += 1024;
184 				*res = mandoc_reallocarray(*res,
185 				    maxres, sizeof(**res));
186 			}
187 			mpage = *res + cur;
188 			mandoc_asprintf(&mpage->file, "%s/%s",
189 			    paths->paths[i], page->file + 1);
190 			if (access(chdir_status ? page->file + 1 :
191 			    mpage->file, R_OK) == -1) {
192 				warn("%s", mpage->file);
193 				warnx("outdated mandoc.db contains "
194 				    "bogus %s entry, run makewhatis %s",
195 				    page->file + 1, paths->paths[i]);
196 				free(mpage->file);
197 				free(rp);
198 				continue;
199 			}
200 			mpage->names = buildnames(page);
201 			mpage->output = buildoutput(outkey, page);
202 			mpage->bits = search->firstmatch ? rp->bits : 0;
203 			mpage->ipath = i;
204 			mpage->sec = *page->sect - '0';
205 			if (mpage->sec < 0 || mpage->sec > 9)
206 				mpage->sec = 10;
207 			mpage->form = *page->file;
208 			free(rp);
209 			cur++;
210 		}
211 		ohash_delete(htab);
212 		free(htab);
213 		dbm_close();
214 
215 		/*
216 		 * In man(1) mode, prefer matches in earlier trees
217 		 * over matches in later trees.
218 		 */
219 
220 		if (cur && search->firstmatch)
221 			break;
222 	}
223 	if (res != NULL)
224 		qsort(*res, cur, sizeof(struct manpage), manpage_compare);
225 	if (chdir_status && getcwd_status && chdir(buf) == -1)
226 		warn("%s", buf);
227 	exprfree(e);
228 	*sz = cur;
229 	return res != NULL || cur;
230 }
231 
232 /*
233  * Merge the results for the expression tree rooted at e
234  * into the the result list htab.
235  */
236 static struct ohash *
manmerge(struct expr * e,struct ohash * htab)237 manmerge(struct expr *e, struct ohash *htab)
238 {
239 	switch (e->type) {
240 	case EXPR_TERM:
241 		return manmerge_term(e, htab);
242 	case EXPR_OR:
243 		return manmerge_or(e->child, htab);
244 	case EXPR_AND:
245 		return manmerge_and(e->child, htab);
246 	default:
247 		abort();
248 	}
249 }
250 
251 static struct ohash *
manmerge_term(struct expr * e,struct ohash * htab)252 manmerge_term(struct expr *e, struct ohash *htab)
253 {
254 	struct dbm_res	 res, *rp;
255 	uint64_t	 ib;
256 	unsigned int	 slot;
257 	int		 im;
258 
259 	if (htab == NULL) {
260 		htab = mandoc_malloc(sizeof(*htab));
261 		mandoc_ohash_init(htab, 4, offsetof(struct dbm_res, page));
262 	}
263 
264 	for (im = 0, ib = 1; im < KEY_MAX; im++, ib <<= 1) {
265 		if ((e->bits & ib) == 0)
266 			continue;
267 
268 		switch (ib) {
269 		case TYPE_arch:
270 			dbm_page_byarch(&e->match);
271 			break;
272 		case TYPE_sec:
273 			dbm_page_bysect(&e->match);
274 			break;
275 		case TYPE_Nm:
276 			dbm_page_byname(&e->match);
277 			break;
278 		case TYPE_Nd:
279 			dbm_page_bydesc(&e->match);
280 			break;
281 		default:
282 			dbm_page_bymacro(im - 2, &e->match);
283 			break;
284 		}
285 
286 		/*
287 		 * When hashing for deduplication, use the unique
288 		 * page ID itself instead of a hash function;
289 		 * that is quite efficient.
290 		 */
291 
292 		for (;;) {
293 			res = dbm_page_next();
294 			if (res.page == -1)
295 				break;
296 			slot = ohash_lookup_memory(htab,
297 			    (char *)&res, sizeof(res.page), res.page);
298 			if ((rp = ohash_find(htab, slot)) != NULL) {
299 				rp->bits |= res.bits;
300 				continue;
301 			}
302 			rp = mandoc_malloc(sizeof(*rp));
303 			*rp = res;
304 			ohash_insert(htab, slot, rp);
305 		}
306 	}
307 	return htab;
308 }
309 
310 static struct ohash *
manmerge_or(struct expr * e,struct ohash * htab)311 manmerge_or(struct expr *e, struct ohash *htab)
312 {
313 	while (e != NULL) {
314 		htab = manmerge(e, htab);
315 		e = e->next;
316 	}
317 	return htab;
318 }
319 
320 static struct ohash *
manmerge_and(struct expr * e,struct ohash * htab)321 manmerge_and(struct expr *e, struct ohash *htab)
322 {
323 	struct ohash	*hand, *h1, *h2;
324 	struct dbm_res	*res;
325 	unsigned int	 slot1, slot2;
326 
327 	/* Evaluate the first term of the AND clause. */
328 
329 	hand = manmerge(e, NULL);
330 
331 	while ((e = e->next) != NULL) {
332 
333 		/* Evaluate the next term and prepare for ANDing. */
334 
335 		h2 = manmerge(e, NULL);
336 		if (ohash_entries(h2) < ohash_entries(hand)) {
337 			h1 = h2;
338 			h2 = hand;
339 		} else
340 			h1 = hand;
341 		hand = mandoc_malloc(sizeof(*hand));
342 		mandoc_ohash_init(hand, 4, offsetof(struct dbm_res, page));
343 
344 		/* Keep all pages that are in both result sets. */
345 
346 		for (res = ohash_first(h1, &slot1); res != NULL;
347 		    res = ohash_next(h1, &slot1)) {
348 			if (ohash_find(h2, ohash_lookup_memory(h2,
349 			    (char *)res, sizeof(res->page),
350 			    res->page)) == NULL)
351 				free(res);
352 			else
353 				ohash_insert(hand, ohash_lookup_memory(hand,
354 				    (char *)res, sizeof(res->page),
355 				    res->page), res);
356 		}
357 
358 		/* Discard the merged results. */
359 
360 		for (res = ohash_first(h2, &slot2); res != NULL;
361 		    res = ohash_next(h2, &slot2))
362 			free(res);
363 		ohash_delete(h2);
364 		free(h2);
365 		ohash_delete(h1);
366 		free(h1);
367 	}
368 
369 	/* Merge the result of the AND into htab. */
370 
371 	if (htab == NULL)
372 		return hand;
373 
374 	for (res = ohash_first(hand, &slot1); res != NULL;
375 	    res = ohash_next(hand, &slot1)) {
376 		slot2 = ohash_lookup_memory(htab,
377 		    (char *)res, sizeof(res->page), res->page);
378 		if (ohash_find(htab, slot2) == NULL)
379 			ohash_insert(htab, slot2, res);
380 		else
381 			free(res);
382 	}
383 
384 	/* Discard the merged result. */
385 
386 	ohash_delete(hand);
387 	free(hand);
388 	return htab;
389 }
390 
391 void
mansearch_free(struct manpage * res,size_t sz)392 mansearch_free(struct manpage *res, size_t sz)
393 {
394 	size_t	 i;
395 
396 	for (i = 0; i < sz; i++) {
397 		free(res[i].file);
398 		free(res[i].names);
399 		free(res[i].output);
400 	}
401 	free(res);
402 }
403 
404 static int
manpage_compare(const void * vp1,const void * vp2)405 manpage_compare(const void *vp1, const void *vp2)
406 {
407 	const struct manpage	*mp1, *mp2;
408 	const char		*cp1, *cp2;
409 	size_t			 sz1, sz2;
410 	int			 diff;
411 
412 	mp1 = vp1;
413 	mp2 = vp2;
414 	if ((diff = mp2->bits - mp1->bits) ||
415 	    (diff = mp1->sec - mp2->sec))
416 		return diff;
417 
418 	/* Fall back to alphabetic ordering of names. */
419 	sz1 = strcspn(mp1->names, "(");
420 	sz2 = strcspn(mp2->names, "(");
421 	if (sz1 < sz2)
422 		sz1 = sz2;
423 	if ((diff = strncasecmp(mp1->names, mp2->names, sz1)))
424 		return diff;
425 
426 	/* For identical names and sections, prefer arch-dependent. */
427 	cp1 = strchr(mp1->names + sz1, '/');
428 	cp2 = strchr(mp2->names + sz2, '/');
429 	return cp1 != NULL && cp2 != NULL ? strcasecmp(cp1, cp2) :
430 	    cp1 != NULL ? -1 : cp2 != NULL ? 1 : 0;
431 }
432 
433 static char *
buildnames(const struct dbm_page * page)434 buildnames(const struct dbm_page *page)
435 {
436 	char	*buf;
437 	size_t	 i, sz;
438 
439 	sz = lstlen(page->name, 2) + 1 + lstlen(page->sect, 2) +
440 	    (page->arch == NULL ? 0 : 1 + lstlen(page->arch, 2)) + 2;
441 	buf = mandoc_malloc(sz);
442 	i = 0;
443 	lstcat(buf, &i, page->name, ", ");
444 	buf[i++] = '(';
445 	lstcat(buf, &i, page->sect, ", ");
446 	if (page->arch != NULL) {
447 		buf[i++] = '/';
448 		lstcat(buf, &i, page->arch, ", ");
449 	}
450 	buf[i++] = ')';
451 	buf[i++] = '\0';
452 	assert(i == sz);
453 	return buf;
454 }
455 
456 /*
457  * Count the buffer space needed to print the NUL-terminated
458  * list of NUL-terminated strings, when printing sep separator
459  * characters between strings.
460  */
461 static size_t
lstlen(const char * cp,size_t sep)462 lstlen(const char *cp, size_t sep)
463 {
464 	size_t	 sz;
465 
466 	for (sz = 0; *cp != '\0'; cp++) {
467 
468 		/* Skip names appearing only in the SYNOPSIS. */
469 		if (*cp <= (char)(NAME_SYN & NAME_MASK)) {
470 			while (*cp != '\0')
471 				cp++;
472 			continue;
473 		}
474 
475 		/* Skip name class markers. */
476 		if (*cp < ' ')
477 			cp++;
478 
479 		/* Print a separator before each but the first string. */
480 		if (sz)
481 			sz += sep;
482 
483 		/* Copy one string. */
484 		while (*cp != '\0') {
485 			sz++;
486 			cp++;
487 		}
488 	}
489 	return sz;
490 }
491 
492 /*
493  * Print the NUL-terminated list of NUL-terminated strings
494  * into the buffer, seperating strings with sep.
495  */
496 static void
lstcat(char * buf,size_t * i,const char * cp,const char * sep)497 lstcat(char *buf, size_t *i, const char *cp, const char *sep)
498 {
499 	const char	*s;
500 	size_t		 i_start;
501 
502 	for (i_start = *i; *cp != '\0'; cp++) {
503 
504 		/* Skip names appearing only in the SYNOPSIS. */
505 		if (*cp <= (char)(NAME_SYN & NAME_MASK)) {
506 			while (*cp != '\0')
507 				cp++;
508 			continue;
509 		}
510 
511 		/* Skip name class markers. */
512 		if (*cp < ' ')
513 			cp++;
514 
515 		/* Print a separator before each but the first string. */
516 		if (*i > i_start) {
517 			s = sep;
518 			while (*s != '\0')
519 				buf[(*i)++] = *s++;
520 		}
521 
522 		/* Copy one string. */
523 		while (*cp != '\0')
524 			buf[(*i)++] = *cp++;
525 	}
526 
527 }
528 
529 /*
530  * Return 1 if the string *want occurs in any of the strings
531  * in the NUL-terminated string list *have, or 0 otherwise.
532  * If either argument is NULL or empty, assume no filtering
533  * is desired and return 1.
534  */
535 static int
lstmatch(const char * want,const char * have)536 lstmatch(const char *want, const char *have)
537 {
538         if (want == NULL || have == NULL || *have == '\0')
539                 return 1;
540         while (*have != '\0') {
541                 if (strcasestr(have, want) != NULL)
542                         return 1;
543                 have = strchr(have, '\0') + 1;
544         }
545         return 0;
546 }
547 
548 /*
549  * Build a list of values taken by the macro im in the manual page.
550  */
551 static char *
buildoutput(size_t im,struct dbm_page * page)552 buildoutput(size_t im, struct dbm_page *page)
553 {
554 	const char	*oldoutput, *sep, *input;
555 	char		*output, *newoutput, *value;
556 	size_t		 sz, i;
557 
558 	switch (im) {
559 	case KEY_Nd:
560 		return mandoc_strdup(page->desc);
561 	case KEY_Nm:
562 		input = page->name;
563 		break;
564 	case KEY_sec:
565 		input = page->sect;
566 		break;
567 	case KEY_arch:
568 		input = page->arch;
569 		if (input == NULL)
570 			input = "all\0";
571 		break;
572 	default:
573 		input = NULL;
574 		break;
575 	}
576 
577 	if (input != NULL) {
578 		sz = lstlen(input, 3) + 1;
579 		output = mandoc_malloc(sz);
580 		i = 0;
581 		lstcat(output, &i, input, " # ");
582 		output[i++] = '\0';
583 		assert(i == sz);
584 		return output;
585 	}
586 
587 	output = NULL;
588 	dbm_macro_bypage(im - 2, page->addr);
589 	while ((value = dbm_macro_next()) != NULL) {
590 		if (output == NULL) {
591 			oldoutput = "";
592 			sep = "";
593 		} else {
594 			oldoutput = output;
595 			sep = " # ";
596 		}
597 		mandoc_asprintf(&newoutput, "%s%s%s", oldoutput, sep, value);
598 		free(output);
599 		output = newoutput;
600 	}
601 	return output;
602 }
603 
604 /*
605  * Compile a set of string tokens into an expression.
606  * Tokens in "argv" are assumed to be individual expression atoms (e.g.,
607  * "(", "foo=bar", etc.).
608  */
609 static struct expr *
exprcomp(const struct mansearch * search,int argc,char * argv[],int * argi)610 exprcomp(const struct mansearch *search, int argc, char *argv[], int *argi)
611 {
612 	struct expr	*parent, *child;
613 	int		 needterm, nested;
614 
615 	if ((nested = *argi) == argc)
616 		return NULL;
617 	needterm = 1;
618 	parent = child = NULL;
619 	while (*argi < argc) {
620 		if (strcmp(")", argv[*argi]) == 0) {
621 			if (needterm)
622 				warnx("missing term "
623 				    "before closing parenthesis");
624 			needterm = 0;
625 			if (nested)
626 				break;
627 			warnx("ignoring unmatched right parenthesis");
628 			++*argi;
629 			continue;
630 		}
631 		if (strcmp("-o", argv[*argi]) == 0) {
632 			if (needterm) {
633 				if (*argi > 0)
634 					warnx("ignoring -o after %s",
635 					    argv[*argi - 1]);
636 				else
637 					warnx("ignoring initial -o");
638 			}
639 			needterm = 1;
640 			++*argi;
641 			continue;
642 		}
643 		needterm = 0;
644 		if (child == NULL) {
645 			child = expr_and(search, argc, argv, argi);
646 			continue;
647 		}
648 		if (parent == NULL) {
649 			parent = mandoc_calloc(1, sizeof(*parent));
650 			parent->type = EXPR_OR;
651 			parent->next = NULL;
652 			parent->child = child;
653 		}
654 		child->next = expr_and(search, argc, argv, argi);
655 		child = child->next;
656 	}
657 	if (needterm && *argi)
658 		warnx("ignoring trailing %s", argv[*argi - 1]);
659 	return parent == NULL ? child : parent;
660 }
661 
662 static struct expr *
expr_and(const struct mansearch * search,int argc,char * argv[],int * argi)663 expr_and(const struct mansearch *search, int argc, char *argv[], int *argi)
664 {
665 	struct expr	*parent, *child;
666 	int		 needterm;
667 
668 	needterm = 1;
669 	parent = child = NULL;
670 	while (*argi < argc) {
671 		if (strcmp(")", argv[*argi]) == 0) {
672 			if (needterm)
673 				warnx("missing term "
674 				    "before closing parenthesis");
675 			needterm = 0;
676 			break;
677 		}
678 		if (strcmp("-o", argv[*argi]) == 0)
679 			break;
680 		if (strcmp("-a", argv[*argi]) == 0) {
681 			if (needterm) {
682 				if (*argi > 0)
683 					warnx("ignoring -a after %s",
684 					    argv[*argi - 1]);
685 				else
686 					warnx("ignoring initial -a");
687 			}
688 			needterm = 1;
689 			++*argi;
690 			continue;
691 		}
692 		if (needterm == 0)
693 			break;
694 		if (child == NULL) {
695 			child = exprterm(search, argc, argv, argi);
696 			if (child != NULL)
697 				needterm = 0;
698 			continue;
699 		}
700 		needterm = 0;
701 		if (parent == NULL) {
702 			parent = mandoc_calloc(1, sizeof(*parent));
703 			parent->type = EXPR_AND;
704 			parent->next = NULL;
705 			parent->child = child;
706 		}
707 		child->next = exprterm(search, argc, argv, argi);
708 		if (child->next != NULL) {
709 			child = child->next;
710 			needterm = 0;
711 		}
712 	}
713 	if (needterm && *argi)
714 		warnx("ignoring trailing %s", argv[*argi - 1]);
715 	return parent == NULL ? child : parent;
716 }
717 
718 static struct expr *
exprterm(const struct mansearch * search,int argc,char * argv[],int * argi)719 exprterm(const struct mansearch *search, int argc, char *argv[], int *argi)
720 {
721 	char		 errbuf[BUFSIZ];
722 	struct expr	*e;
723 	char		*key, *val;
724 	uint64_t	 iterbit;
725 	int		 cs, i, irc;
726 
727 	if (strcmp("(", argv[*argi]) == 0) {
728 		++*argi;
729 		e = exprcomp(search, argc, argv, argi);
730 		if (*argi < argc) {
731 			assert(strcmp(")", argv[*argi]) == 0);
732 			++*argi;
733 		} else
734 			warnx("unclosed parenthesis");
735 		return e;
736 	}
737 
738 	if (strcmp("-i", argv[*argi]) == 0 && *argi + 1 < argc) {
739 		cs = 0;
740 		++*argi;
741 	} else
742 		cs = 1;
743 
744 	e = mandoc_calloc(1, sizeof(*e));
745 	e->type = EXPR_TERM;
746 	e->bits = 0;
747 	e->next = NULL;
748 	e->child = NULL;
749 
750 	if (search->argmode == ARG_NAME) {
751 		e->bits = TYPE_Nm;
752 		e->match.type = DBM_EXACT;
753 		e->match.str = argv[(*argi)++];
754 		return e;
755 	}
756 
757 	/*
758 	 * Separate macro keys from search string.
759 	 * If needed, request regular expression handling.
760 	 */
761 
762 	if (search->argmode == ARG_WORD) {
763 		e->bits = TYPE_Nm;
764 		e->match.type = DBM_REGEX;
765 #if HAVE_REWB_BSD
766 		mandoc_asprintf(&val, "[[:<:]]%s[[:>:]]", argv[*argi]);
767 #elif HAVE_REWB_SYSV
768 		mandoc_asprintf(&val, "\\<%s\\>", argv[*argi]);
769 #else
770 		mandoc_asprintf(&val,
771 		    "(^|[^a-zA-Z01-9_])%s([^a-zA-Z01-9_]|$)", argv[*argi]);
772 #endif
773 		cs = 0;
774 	} else if ((val = strpbrk(argv[*argi], "=~")) == NULL) {
775 		e->bits = TYPE_Nm | TYPE_Nd;
776 		e->match.type = DBM_REGEX;
777 		val = argv[*argi];
778 		cs = 0;
779 	} else {
780 		if (val == argv[*argi])
781 			e->bits = TYPE_Nm | TYPE_Nd;
782 		if (*val == '=') {
783 			e->match.type = DBM_SUB;
784 			e->match.str = val + 1;
785 		} else
786 			e->match.type = DBM_REGEX;
787 		*val++ = '\0';
788 		if (strstr(argv[*argi], "arch") != NULL)
789 			cs = 0;
790 	}
791 
792 	/* Compile regular expressions. */
793 
794 	if (e->match.type == DBM_REGEX) {
795 		e->match.re = mandoc_malloc(sizeof(*e->match.re));
796 		irc = regcomp(e->match.re, val,
797 		    REG_EXTENDED | REG_NOSUB | (cs ? 0 : REG_ICASE));
798 		if (irc) {
799 			regerror(irc, e->match.re, errbuf, sizeof(errbuf));
800 			warnx("regcomp /%s/: %s", val, errbuf);
801 		}
802 		if (search->argmode == ARG_WORD)
803 			free(val);
804 		if (irc) {
805 			free(e->match.re);
806 			free(e);
807 			++*argi;
808 			return NULL;
809 		}
810 	}
811 
812 	if (e->bits) {
813 		++*argi;
814 		return e;
815 	}
816 
817 	/*
818 	 * Parse out all possible fields.
819 	 * If the field doesn't resolve, bail.
820 	 */
821 
822 	while (NULL != (key = strsep(&argv[*argi], ","))) {
823 		if ('\0' == *key)
824 			continue;
825 		for (i = 0, iterbit = 1; i < KEY_MAX; i++, iterbit <<= 1) {
826 			if (0 == strcasecmp(key, mansearch_keynames[i])) {
827 				e->bits |= iterbit;
828 				break;
829 			}
830 		}
831 		if (i == KEY_MAX) {
832 			if (strcasecmp(key, "any"))
833 				warnx("treating unknown key "
834 				    "\"%s\" as \"any\"", key);
835 			e->bits |= ~0ULL;
836 		}
837 	}
838 
839 	++*argi;
840 	return e;
841 }
842 
843 static void
exprfree(struct expr * e)844 exprfree(struct expr *e)
845 {
846 	if (e->next != NULL)
847 		exprfree(e->next);
848 	if (e->child != NULL)
849 		exprfree(e->child);
850 	free(e);
851 }
852