xref: /freebsd/usr.bin/whereis/whereis.c (revision 29fc4075e69fd27de0cded313ac6000165d99f8b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright © 2002, Jörg Wunsch
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT,
19  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
23  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
24  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25  * POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * 4.3BSD UI-compatible whereis(1) utility.  Rewritten from scratch
30  * since the original 4.3BSD version suffers legal problems that
31  * prevent it from being redistributed, and since the 4.4BSD version
32  * was pretty inferior in functionality.
33  */
34 
35 #include <sys/types.h>
36 
37 __FBSDID("$FreeBSD$");
38 
39 #include <sys/stat.h>
40 #include <sys/sysctl.h>
41 
42 #include <dirent.h>
43 #include <err.h>
44 #include <errno.h>
45 #include <locale.h>
46 #include <regex.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <sysexits.h>
51 #include <unistd.h>
52 
53 #include "pathnames.h"
54 
55 #define	NO_BIN_FOUND	1
56 #define	NO_MAN_FOUND	2
57 #define	NO_SRC_FOUND	4
58 
59 typedef const char *ccharp;
60 
61 static int opt_a, opt_b, opt_m, opt_q, opt_s, opt_u, opt_x;
62 static ccharp *bindirs, *mandirs, *sourcedirs;
63 static char **query;
64 
65 static const char *sourcepath = PATH_SOURCES;
66 
67 static char	*colonify(ccharp *);
68 static int	 contains(ccharp *, const char *);
69 static void	 decolonify(char *, ccharp **, int *);
70 static void	 defaults(void);
71 static void	 scanopts(int, char **);
72 static void	 usage(void);
73 
74 /*
75  * Throughout this program, a number of strings are dynamically
76  * allocated but never freed.  Their memory is written to when
77  * splitting the strings into string lists which will later be
78  * processed.  Since it's important that those string lists remain
79  * valid even after the functions allocating the memory returned,
80  * those functions cannot free them.  They could be freed only at end
81  * of main(), which is pretty pointless anyway.
82  *
83  * The overall amount of memory to be allocated for processing the
84  * strings is not expected to exceed a few kilobytes.  For that
85  * reason, allocation can usually always be assumed to succeed (within
86  * a virtual memory environment), thus we simply bail out using
87  * abort(3) in case of an allocation failure.
88  */
89 
90 static void
91 usage(void)
92 {
93 	(void)fprintf(stderr,
94 	     "usage: whereis [-abmqsux] [-BMS dir ... -f] program ...\n");
95 	exit(EX_USAGE);
96 }
97 
98 /*
99  * Scan options passed to program.
100  *
101  * Note that the -B/-M/-S options expect a list of directory
102  * names that must be terminated with -f.
103  */
104 static void
105 scanopts(int argc, char **argv)
106 {
107 	int c, i;
108 	ccharp **dirlist;
109 
110 	while ((c = getopt(argc, argv, "BMSabfmqsux")) != -1)
111 		switch (c) {
112 		case 'B':
113 			dirlist = &bindirs;
114 			goto dolist;
115 
116 		case 'M':
117 			dirlist = &mandirs;
118 			goto dolist;
119 
120 		case 'S':
121 			dirlist = &sourcedirs;
122 		  dolist:
123 			i = 0;
124 			*dirlist = realloc(*dirlist, (i + 1) * sizeof(char *));
125 			(*dirlist)[i] = NULL;
126 			while (optind < argc &&
127 			       strcmp(argv[optind], "-f") != 0 &&
128 			       strcmp(argv[optind], "-B") != 0 &&
129 			       strcmp(argv[optind], "-M") != 0 &&
130 			       strcmp(argv[optind], "-S") != 0) {
131 				decolonify(argv[optind], dirlist, &i);
132 				optind++;
133 			}
134 			break;
135 
136 		case 'a':
137 			opt_a = 1;
138 			break;
139 
140 		case 'b':
141 			opt_b = 1;
142 			break;
143 
144 		case 'f':
145 			goto breakout;
146 
147 		case 'm':
148 			opt_m = 1;
149 			break;
150 
151 		case 'q':
152 			opt_q = 1;
153 			break;
154 
155 		case 's':
156 			opt_s = 1;
157 			break;
158 
159 		case 'u':
160 			opt_u = 1;
161 			break;
162 
163 		case 'x':
164 			opt_x = 1;
165 			break;
166 
167 		default:
168 			usage();
169 		}
170   breakout:
171 	if (optind == argc)
172 		usage();
173 	query = argv + optind;
174 }
175 
176 /*
177  * Find out whether string `s' is contained in list `cpp'.
178  */
179 static int
180 contains(ccharp *cpp, const char *s)
181 {
182 	ccharp cp;
183 
184 	if (cpp == NULL)
185 		return (0);
186 
187 	while ((cp = *cpp) != NULL) {
188 		if (strcmp(cp, s) == 0)
189 			return (1);
190 		cpp++;
191 	}
192 	return (0);
193 }
194 
195 /*
196  * Split string `s' at colons, and pass it to the string list pointed
197  * to by `cppp' (which has `*ip' elements).  Note that the original
198  * string is modified by replacing the colon with a NUL byte.  The
199  * partial string is only added if it has a length greater than 0, and
200  * if it's not already contained in the string list.
201  */
202 static void
203 decolonify(char *s, ccharp **cppp, int *ip)
204 {
205 	char *cp;
206 
207 	while ((cp = strchr(s, ':')), *s != '\0') {
208 		if (cp)
209 			*cp = '\0';
210 		if (strlen(s) && !contains(*cppp, s)) {
211 			*cppp = realloc(*cppp, (*ip + 2) * sizeof(char *));
212 			if (*cppp == NULL)
213 				abort();
214 			(*cppp)[*ip] = s;
215 			(*cppp)[*ip + 1] = NULL;
216 			(*ip)++;
217 		}
218 		if (cp)
219 			s = cp + 1;
220 		else
221 			break;
222 	}
223 }
224 
225 /*
226  * Join string list `cpp' into a colon-separated string.
227  */
228 static char *
229 colonify(ccharp *cpp)
230 {
231 	size_t s;
232 	char *cp;
233 	int i;
234 
235 	if (cpp == NULL)
236 		return (0);
237 
238 	for (s = 0, i = 0; cpp[i] != NULL; i++)
239 		s += strlen(cpp[i]) + 1;
240 	if ((cp = malloc(s + 1)) == NULL)
241 		abort();
242 	for (i = 0, *cp = '\0'; cpp[i] != NULL; i++) {
243 		strcat(cp, cpp[i]);
244 		strcat(cp, ":");
245 	}
246 	cp[s - 1] = '\0';		/* eliminate last colon */
247 
248 	return (cp);
249 }
250 
251 /*
252  * Provide defaults for all options and directory lists.
253  */
254 static void
255 defaults(void)
256 {
257 	size_t s;
258 	char *b, buf[BUFSIZ], *cp;
259 	int nele;
260 	FILE *p;
261 	DIR *dir;
262 	struct stat sb;
263 	struct dirent *dirp;
264 	const int oid[2] = {CTL_USER, USER_CS_PATH};
265 
266 	/* default to -bms if none has been specified */
267 	if (!opt_b && !opt_m && !opt_s)
268 		opt_b = opt_m = opt_s = 1;
269 
270 	/* -b defaults to default path + /usr/libexec +
271 	 * user's path */
272 	if (!bindirs) {
273 		if (sysctl(oid, 2, NULL, &s, NULL, 0) == -1)
274 			err(EX_OSERR, "sysctl(\"user.cs_path\")");
275 		if ((b = malloc(s + 1)) == NULL)
276 			abort();
277 		if (sysctl(oid, 2, b, &s, NULL, 0) == -1)
278 			err(EX_OSERR, "sysctl(\"user.cs_path\")");
279 		nele = 0;
280 		decolonify(b, &bindirs, &nele);
281 		bindirs = realloc(bindirs, (nele + 2) * sizeof(char *));
282 		if (bindirs == NULL)
283 			abort();
284 		bindirs[nele++] = PATH_LIBEXEC;
285 		bindirs[nele] = NULL;
286 		if ((cp = getenv("PATH")) != NULL) {
287 			/* don't destroy the original environment... */
288 			b = strdup(cp);
289 			if (b == NULL)
290 				abort();
291 			decolonify(b, &bindirs, &nele);
292 		}
293 	}
294 
295 	/* -m defaults to $(manpath) */
296 	if (!mandirs) {
297 		if ((p = popen(MANPATHCMD, "r")) == NULL)
298 			err(EX_OSERR, "cannot execute manpath command");
299 		if (fgets(buf, BUFSIZ - 1, p) == NULL ||
300 		    pclose(p))
301 			err(EX_OSERR, "error processing manpath results");
302 		if ((b = strchr(buf, '\n')) != NULL)
303 			*b = '\0';
304 		b = strdup(buf);
305 		if (b == NULL)
306 			abort();
307 		nele = 0;
308 		decolonify(b, &mandirs, &nele);
309 	}
310 
311 	/* -s defaults to precompiled list, plus subdirs of /usr/ports */
312 	if (!sourcedirs) {
313 		b = strdup(sourcepath);
314 		if (b == NULL)
315 			abort();
316 		nele = 0;
317 		decolonify(b, &sourcedirs, &nele);
318 
319 		if (stat(PATH_PORTS, &sb) == -1) {
320 			if (errno == ENOENT)
321 				/* no /usr/ports, we are done */
322 				return;
323 			err(EX_OSERR, "stat(" PATH_PORTS ")");
324 		}
325 		if ((sb.st_mode & S_IFMT) != S_IFDIR)
326 			/* /usr/ports is not a directory, ignore */
327 			return;
328 		if (access(PATH_PORTS, R_OK | X_OK) != 0)
329 			return;
330 		if ((dir = opendir(PATH_PORTS)) == NULL)
331 			err(EX_OSERR, "opendir" PATH_PORTS ")");
332 		while ((dirp = readdir(dir)) != NULL) {
333 			/*
334 			 * Not everything below PATH_PORTS is of
335 			 * interest.  First, all dot files and
336 			 * directories (e. g. .snap) can be ignored.
337 			 * Also, all subdirectories starting with a
338 			 * capital letter are not going to be
339 			 * examined, as they are used for internal
340 			 * purposes (Mk, Tools, ...).  This also
341 			 * matches a possible CVS subdirectory.
342 			 * Finally, the distfiles subdirectory is also
343 			 * special, and should not be considered to
344 			 * avoid false matches.
345 			 */
346 			if (dirp->d_name[0] == '.' ||
347 			    /*
348 			     * isupper() not used on purpose: the
349 			     * check is supposed to default to the C
350 			     * locale instead of the current user's
351 			     * locale.
352 			     */
353 			    (dirp->d_name[0] >= 'A' && dirp->d_name[0] <= 'Z') ||
354 			    strcmp(dirp->d_name, "distfiles") == 0)
355 				continue;
356 			if ((b = malloc(sizeof PATH_PORTS + 1 + dirp->d_namlen))
357 			    == NULL)
358 				abort();
359 			strcpy(b, PATH_PORTS);
360 			strcat(b, "/");
361 			strcat(b, dirp->d_name);
362 			if (stat(b, &sb) == -1 ||
363 			    (sb.st_mode & S_IFMT) != S_IFDIR ||
364 			    access(b, R_OK | X_OK) != 0) {
365 				free(b);
366 				continue;
367 			}
368 			sourcedirs = realloc(sourcedirs,
369 					     (nele + 2) * sizeof(char *));
370 			if (sourcedirs == NULL)
371 				abort();
372 			sourcedirs[nele++] = b;
373 			sourcedirs[nele] = NULL;
374 		}
375 		closedir(dir);
376 	}
377 }
378 
379 int
380 main(int argc, char **argv)
381 {
382 	int unusual, i, printed;
383 	char *bin, buf[BUFSIZ], *cp, *cp2, *man, *name, *src;
384 	ccharp *dp;
385 	size_t nlen, olen, s;
386 	struct stat sb;
387 	regex_t re, re2;
388 	regmatch_t matches[2];
389 	regoff_t rlen;
390 	FILE *p;
391 
392 	setlocale(LC_ALL, "");
393 
394 	scanopts(argc, argv);
395 	defaults();
396 
397 	if (mandirs == NULL)
398 		opt_m = 0;
399 	if (bindirs == NULL)
400 		opt_b = 0;
401 	if (sourcedirs == NULL)
402 		opt_s = 0;
403 	if (opt_m + opt_b + opt_s == 0)
404 		errx(EX_DATAERR, "no directories to search");
405 
406 	if (opt_m) {
407 		setenv("MANPATH", colonify(mandirs), 1);
408 		if ((i = regcomp(&re, MANWHEREISMATCH, REG_EXTENDED)) != 0) {
409 			regerror(i, &re, buf, BUFSIZ - 1);
410 			errx(EX_UNAVAILABLE, "regcomp(%s) failed: %s",
411 			     MANWHEREISMATCH, buf);
412 		}
413 	}
414 
415 	for (; (name = *query) != NULL; query++) {
416 		/* strip leading path name component */
417 		if ((cp = strrchr(name, '/')) != NULL)
418 			name = cp + 1;
419 		/* strip SCCS or RCS suffix/prefix */
420 		if (strlen(name) > 2 && strncmp(name, "s.", 2) == 0)
421 			name += 2;
422 		if ((s = strlen(name)) > 2 && strcmp(name + s - 2, ",v") == 0)
423 			name[s - 2] = '\0';
424 		/* compression suffix */
425 		s = strlen(name);
426 		if (s > 2 &&
427 		    (strcmp(name + s - 2, ".z") == 0 ||
428 		     strcmp(name + s - 2, ".Z") == 0))
429 			name[s - 2] = '\0';
430 		else if (s > 3 &&
431 			 strcmp(name + s - 3, ".gz") == 0)
432 			name[s - 3] = '\0';
433 		else if (s > 4 &&
434 			 strcmp(name + s - 4, ".bz2") == 0)
435 			name[s - 4] = '\0';
436 
437 		unusual = 0;
438 		bin = man = src = NULL;
439 		s = strlen(name);
440 
441 		if (opt_b) {
442 			/*
443 			 * Binaries have to match exactly, and must be regular
444 			 * executable files.
445 			 */
446 			unusual = unusual | NO_BIN_FOUND;
447 			for (dp = bindirs; *dp != NULL; dp++) {
448 				cp = malloc(strlen(*dp) + 1 + s + 1);
449 				if (cp == NULL)
450 					abort();
451 				strcpy(cp, *dp);
452 				strcat(cp, "/");
453 				strcat(cp, name);
454 				if (stat(cp, &sb) == 0 &&
455 				    (sb.st_mode & S_IFMT) == S_IFREG &&
456 				    (sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))
457 				    != 0) {
458 					unusual = unusual & ~NO_BIN_FOUND;
459 					if (bin == NULL) {
460 						bin = strdup(cp);
461 					} else {
462 						olen = strlen(bin);
463 						nlen = strlen(cp);
464 						bin = realloc(bin,
465 							      olen + nlen + 2);
466 						if (bin == NULL)
467 							abort();
468 						strcat(bin, " ");
469 						strcat(bin, cp);
470 					}
471 					if (!opt_a) {
472 						free(cp);
473 						break;
474 					}
475 				}
476 				free(cp);
477 			}
478 		}
479 
480 		if (opt_m) {
481 			/*
482 			 * Ask the man command to perform the search for us.
483 			 */
484 			unusual = unusual | NO_MAN_FOUND;
485 			if (opt_a)
486 				cp = malloc(sizeof MANWHEREISALLCMD - 2 + s);
487 			else
488 				cp = malloc(sizeof MANWHEREISCMD - 2 + s);
489 
490 			if (cp == NULL)
491 				abort();
492 
493 			if (opt_a)
494 				sprintf(cp, MANWHEREISALLCMD, name);
495 			else
496 				sprintf(cp, MANWHEREISCMD, name);
497 
498 			if ((p = popen(cp, "r")) != NULL) {
499 
500 				while (fgets(buf, BUFSIZ - 1, p) != NULL) {
501 					unusual = unusual & ~NO_MAN_FOUND;
502 
503 					if ((cp2 = strchr(buf, '\n')) != NULL)
504 						*cp2 = '\0';
505 					if (regexec(&re, buf, 2,
506 						    matches, 0) == 0 &&
507 					    (rlen = matches[1].rm_eo -
508 					     matches[1].rm_so) > 0) {
509 						/*
510 						 * man -w found formatted
511 						 * page, need to pick up
512 						 * source page name.
513 						 */
514 						cp2 = malloc(rlen + 1);
515 						if (cp2 == NULL)
516 							abort();
517 						memcpy(cp2,
518 						       buf + matches[1].rm_so,
519 						       rlen);
520 						cp2[rlen] = '\0';
521 					} else {
522 						/*
523 						 * man -w found plain source
524 						 * page, use it.
525 						 */
526 						cp2 = strdup(buf);
527 						if (cp2 == NULL)
528 							abort();
529 					}
530 
531 					if (man == NULL) {
532 						man = strdup(cp2);
533 					} else {
534 						olen = strlen(man);
535 						nlen = strlen(cp2);
536 						man = realloc(man,
537 							      olen + nlen + 2);
538 						if (man == NULL)
539 							abort();
540 						strcat(man, " ");
541 						strcat(man, cp2);
542 					}
543 
544 					free(cp2);
545 
546 					if (!opt_a)
547 						break;
548 				}
549 				pclose(p);
550 				free(cp);
551 			}
552 		}
553 
554 		if (opt_s) {
555 			/*
556 			 * Sources match if a subdir with the exact
557 			 * name is found.
558 			 */
559 			unusual = unusual | NO_SRC_FOUND;
560 			for (dp = sourcedirs; *dp != NULL; dp++) {
561 				cp = malloc(strlen(*dp) + 1 + s + 1);
562 				if (cp == NULL)
563 					abort();
564 				strcpy(cp, *dp);
565 				strcat(cp, "/");
566 				strcat(cp, name);
567 				if (stat(cp, &sb) == 0 &&
568 				    (sb.st_mode & S_IFMT) == S_IFDIR) {
569 					unusual = unusual & ~NO_SRC_FOUND;
570 					if (src == NULL) {
571 						src = strdup(cp);
572 					} else {
573 						olen = strlen(src);
574 						nlen = strlen(cp);
575 						src = realloc(src,
576 							      olen + nlen + 2);
577 						if (src == NULL)
578 							abort();
579 						strcat(src, " ");
580 						strcat(src, cp);
581 					}
582 					if (!opt_a) {
583 						free(cp);
584 						break;
585 					}
586 				}
587 				free(cp);
588 			}
589 			/*
590 			 * If still not found, ask locate to search it
591 			 * for us.  This will find sources for things
592 			 * like lpr that are well hidden in the
593 			 * /usr/src tree, but takes a lot longer.
594 			 * Thus, option -x (`expensive') prevents this
595 			 * search.
596 			 *
597 			 * Do only match locate output that starts
598 			 * with one of our source directories, and at
599 			 * least one further level of subdirectories.
600 			 */
601 			if (opt_x || (src && !opt_a))
602 				goto done_sources;
603 
604 			cp = malloc(sizeof LOCATECMD - 2 + s);
605 			if (cp == NULL)
606 				abort();
607 			sprintf(cp, LOCATECMD, name);
608 			if ((p = popen(cp, "r")) == NULL)
609 				goto done_sources;
610 			while ((src == NULL || opt_a) &&
611 			       (fgets(buf, BUFSIZ - 1, p)) != NULL) {
612 				if ((cp2 = strchr(buf, '\n')) != NULL)
613 					*cp2 = '\0';
614 				for (dp = sourcedirs;
615 				     (src == NULL || opt_a) && *dp != NULL;
616 				     dp++) {
617 					cp2 = malloc(strlen(*dp) + 9);
618 					if (cp2 == NULL)
619 						abort();
620 					strcpy(cp2, "^");
621 					strcat(cp2, *dp);
622 					strcat(cp2, "/[^/]+/");
623 					if ((i = regcomp(&re2, cp2,
624 							 REG_EXTENDED|REG_NOSUB))
625 					    != 0) {
626 						regerror(i, &re, buf,
627 							 BUFSIZ - 1);
628 						errx(EX_UNAVAILABLE,
629 						     "regcomp(%s) failed: %s",
630 						     cp2, buf);
631 					}
632 					free(cp2);
633 					if (regexec(&re2, buf, 0,
634 						    (regmatch_t *)NULL, 0)
635 					    == 0) {
636 						unusual = unusual &
637 						          ~NO_SRC_FOUND;
638 						if (src == NULL) {
639 							src = strdup(buf);
640 						} else {
641 							olen = strlen(src);
642 							nlen = strlen(buf);
643 							src = realloc(src,
644 								      olen +
645 								      nlen + 2);
646 							if (src == NULL)
647 								abort();
648 							strcat(src, " ");
649 							strcat(src, buf);
650 						}
651 					}
652 					regfree(&re2);
653 				}
654 			}
655 			pclose(p);
656 			free(cp);
657 		}
658 	  done_sources:
659 
660 		if (opt_u && !unusual)
661 			continue;
662 
663 		printed = 0;
664 		if (!opt_q) {
665 			printf("%s:", name);
666 			printed++;
667 		}
668 		if (bin) {
669 			if (printed++)
670 				putchar(' ');
671 			fputs(bin, stdout);
672 		}
673 		if (man) {
674 			if (printed++)
675 				putchar(' ');
676 			fputs(man, stdout);
677 		}
678 		if (src) {
679 			if (printed++)
680 				putchar(' ');
681 			fputs(src, stdout);
682 		}
683 		if (printed)
684 			putchar('\n');
685 	}
686 
687 	if (opt_m)
688 		regfree(&re);
689 
690 	return (0);
691 }
692