xref: /freebsd/usr.bin/whereis/whereis.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*
2  * Copyright � 2002, J�rg Wunsch
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
16  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT,
17  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
19  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
21  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
22  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
23  * POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 /*
27  * 4.3BSD UI-compatible whereis(1) utility.  Rewritten from scratch
28  * since the original 4.3BSD version suffers legal problems that
29  * prevent it from being redistributed, and since the 4.4BSD version
30  * was pretty inferior in functionality.
31  */
32 
33 #include <sys/types.h>
34 
35 __FBSDID("$FreeBSD$");
36 
37 #include <sys/stat.h>
38 #include <sys/sysctl.h>
39 
40 #include <dirent.h>
41 #include <err.h>
42 #include <errno.h>
43 #include <locale.h>
44 #include <regex.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <sysexits.h>
49 #include <unistd.h>
50 
51 #include "pathnames.h"
52 
53 #define	NO_BIN_FOUND	1
54 #define	NO_MAN_FOUND	2
55 #define	NO_SRC_FOUND	4
56 
57 typedef const char *ccharp;
58 
59 int opt_a, opt_b, opt_m, opt_q, opt_s, opt_u, opt_x;
60 ccharp *bindirs, *mandirs, *sourcedirs;
61 char **query;
62 
63 const char *sourcepath = PATH_SOURCES;
64 
65 char	*colonify(ccharp *);
66 int	 contains(ccharp *, const char *);
67 void	 decolonify(char *, ccharp **, int *);
68 void	 defaults(void);
69 void	 scanopts(int, char **);
70 void	 usage(void);
71 
72 /*
73  * Throughout this program, a number of strings are dynamically
74  * allocated but never freed.  Their memory is written to when
75  * splitting the strings into string lists which will later be
76  * processed.  Since it's important that those string lists remain
77  * valid even after the functions allocating the memory returned,
78  * those functions cannot free them.  They could be freed only at end
79  * of main(), which is pretty pointless anyway.
80  *
81  * The overall amount of memory to be allocated for processing the
82  * strings is not expected to exceed a few kilobytes.  For that
83  * reason, allocation can usually always be assumed to succeed (within
84  * a virtual memory environment), thus we simply bail out using
85  * abort(3) in case of an allocation failure.
86  */
87 
88 void
89 usage(void)
90 {
91 	(void)fprintf(stderr,
92 	     "usage: whereis [-abmqsux] [-BMS dir ... -f] program ...\n");
93 	exit(EX_USAGE);
94 }
95 
96 /*
97  * Scan options passed to program.
98  *
99  * Note that the -B/-M/-S options expect a list of directory
100  * names that must be terminated with -f.
101  */
102 void
103 scanopts(int argc, char **argv)
104 {
105 	int c, i;
106 	ccharp **dirlist;
107 
108 	while ((c = getopt(argc, argv, "BMSabfmqsux")) != -1)
109 		switch (c) {
110 		case 'B':
111 			dirlist = &bindirs;
112 			goto dolist;
113 
114 		case 'M':
115 			dirlist = &mandirs;
116 			goto dolist;
117 
118 		case 'S':
119 			dirlist = &sourcedirs;
120 		  dolist:
121 			i = 0;
122 			*dirlist = realloc(*dirlist, (i + 1) * sizeof(char *));
123 			(*dirlist)[i] = NULL;
124 			while (optind < argc &&
125 			       strcmp(argv[optind], "-f") != 0 &&
126 			       strcmp(argv[optind], "-B") != 0 &&
127 			       strcmp(argv[optind], "-M") != 0 &&
128 			       strcmp(argv[optind], "-S") != 0) {
129 				decolonify(argv[optind], dirlist, &i);
130 				optind++;
131 			}
132 			break;
133 
134 		case 'a':
135 			opt_a = 1;
136 			break;
137 
138 		case 'b':
139 			opt_b = 1;
140 			break;
141 
142 		case 'f':
143 			goto breakout;
144 
145 		case 'm':
146 			opt_m = 1;
147 			break;
148 
149 		case 'q':
150 			opt_q = 1;
151 			break;
152 
153 		case 's':
154 			opt_s = 1;
155 			break;
156 
157 		case 'u':
158 			opt_u = 1;
159 			break;
160 
161 		case 'x':
162 			opt_x = 1;
163 			break;
164 
165 		default:
166 			usage();
167 		}
168   breakout:
169 	if (optind == argc)
170 		usage();
171 	query = argv + optind;
172 }
173 
174 /*
175  * Find out whether string `s' is contained in list `cpp'.
176  */
177 int
178 contains(ccharp *cpp, const char *s)
179 {
180 	ccharp cp;
181 
182 	if (cpp == NULL)
183 		return (0);
184 
185 	while ((cp = *cpp) != NULL) {
186 		if (strcmp(cp, s) == 0)
187 			return (1);
188 		cpp++;
189 	}
190 	return (0);
191 }
192 
193 /*
194  * Split string `s' at colons, and pass it to the string list pointed
195  * to by `cppp' (which has `*ip' elements).  Note that the original
196  * string is modified by replacing the colon with a NUL byte.  The
197  * partial string is only added if it has a length greater than 0, and
198  * if it's not already contained in the string list.
199  */
200 void
201 decolonify(char *s, ccharp **cppp, int *ip)
202 {
203 	char *cp;
204 
205 	while ((cp = strchr(s, ':')), *s != '\0') {
206 		if (cp)
207 			*cp = '\0';
208 		if (strlen(s) && !contains(*cppp, s)) {
209 			*cppp = realloc(*cppp, (*ip + 2) * sizeof(char *));
210 			if (cppp == NULL)
211 				abort();
212 			(*cppp)[*ip] = s;
213 			(*cppp)[*ip + 1] = NULL;
214 			(*ip)++;
215 		}
216 		if (cp)
217 			s = cp + 1;
218 		else
219 			break;
220 	}
221 }
222 
223 /*
224  * Join string list `cpp' into a colon-separated string.
225  */
226 char *
227 colonify(ccharp *cpp)
228 {
229 	size_t s;
230 	char *cp;
231 	int i;
232 
233 	if (cpp == NULL)
234 		return (0);
235 
236 	for (s = 0, i = 0; cpp[i] != NULL; i++)
237 		s += strlen(cpp[i]) + 1;
238 	if ((cp = malloc(s + 1)) == NULL)
239 		abort();
240 	for (i = 0, *cp = '\0'; cpp[i] != NULL; i++) {
241 		strcat(cp, cpp[i]);
242 		strcat(cp, ":");
243 	}
244 	cp[s - 1] = '\0';		/* eliminate last colon */
245 
246 	return (cp);
247 }
248 
249 /*
250  * Provide defaults for all options and directory lists.
251  */
252 void
253 defaults(void)
254 {
255 	size_t s;
256 	char *b, buf[BUFSIZ], *cp;
257 	int nele;
258 	FILE *p;
259 	DIR *dir;
260 	struct stat sb;
261 	struct dirent *dirp;
262 
263 	/* default to -bms if none has been specified */
264 	if (!opt_b && !opt_m && !opt_s)
265 		opt_b = opt_m = opt_s = 1;
266 
267 	/* -b defaults to default path + /usr/libexec +
268 	 * /usr/games + user's path */
269 	if (!bindirs) {
270 		if (sysctlbyname("user.cs_path", (void *)NULL, &s,
271 				 (void *)NULL, 0) == -1)
272 			err(EX_OSERR, "sysctlbyname(\"user.cs_path\")");
273 		if ((b = malloc(s + 1)) == NULL)
274 			abort();
275 		if (sysctlbyname("user.cs_path", b, &s, (void *)NULL, 0) == -1)
276 			err(EX_OSERR, "sysctlbyname(\"user.cs_path\")");
277 		nele = 0;
278 		decolonify(b, &bindirs, &nele);
279 		bindirs = realloc(bindirs, (nele + 3) * sizeof(char *));
280 		if (bindirs == NULL)
281 			abort();
282 		bindirs[nele++] = PATH_LIBEXEC;
283 		bindirs[nele++] = PATH_GAMES;
284 		bindirs[nele] = NULL;
285 		if ((cp = getenv("PATH")) != NULL) {
286 			/* don't destroy the original environment... */
287 			if ((b = malloc(strlen(cp) + 1)) == NULL)
288 				abort();
289 			strcpy(b, cp);
290 			decolonify(b, &bindirs, &nele);
291 		}
292 	}
293 
294 	/* -m defaults to $(manpath) */
295 	if (!mandirs) {
296 		if ((p = popen(MANPATHCMD, "r")) == NULL)
297 			err(EX_OSERR, "cannot execute manpath command");
298 		if (fgets(buf, BUFSIZ - 1, p) == NULL ||
299 		    pclose(p))
300 			err(EX_OSERR, "error processing manpath results");
301 		if ((b = strchr(buf, '\n')) != NULL)
302 			*b = '\0';
303 		if ((b = malloc(strlen(buf) + 1)) == NULL)
304 			abort();
305 		strcpy(b, buf);
306 		nele = 0;
307 		decolonify(b, &mandirs, &nele);
308 	}
309 
310 	/* -s defaults to precompiled list, plus subdirs of /usr/ports */
311 	if (!sourcedirs) {
312 		if ((b = malloc(strlen(sourcepath) + 1)) == NULL)
313 			abort();
314 		strcpy(b, sourcepath);
315 		nele = 0;
316 		decolonify(b, &sourcedirs, &nele);
317 
318 		if (stat(PATH_PORTS, &sb) == -1) {
319 			if (errno == ENOENT)
320 				/* no /usr/ports, we are done */
321 				return;
322 			err(EX_OSERR, "stat(" PATH_PORTS ")");
323 		}
324 		if ((sb.st_mode & S_IFMT) != S_IFDIR)
325 			/* /usr/ports is not a directory, ignore */
326 			return;
327 		if (access(PATH_PORTS, R_OK | X_OK) != 0)
328 			return;
329 		if ((dir = opendir(PATH_PORTS)) == NULL)
330 			err(EX_OSERR, "opendir" PATH_PORTS ")");
331 		while ((dirp = readdir(dir)) != NULL) {
332 			/*
333 			 * Not everything below PATH_PORTS is of
334 			 * interest.  First, all dot files and
335 			 * directories (e. g. .snap) can be ignored.
336 			 * Also, all subdirectories starting with a
337 			 * capital letter are not going to be
338 			 * examined, as they are used for internal
339 			 * purposes (Mk, Tools, ...).  This also
340 			 * matches a possible CVS subdirectory.
341 			 * Finally, the distfiles subdirectory is also
342 			 * special, and should not be considered to
343 			 * avoid false matches.
344 			 */
345 			if (dirp->d_name[0] == '.' ||
346 			    /*
347 			     * isupper() not used on purpose: the
348 			     * check is supposed to default to the C
349 			     * locale instead of the current user's
350 			     * locale.
351 			     */
352 			    (dirp->d_name[0] >= 'A' && dirp->d_name[0] <= 'Z') ||
353 			    strcmp(dirp->d_name, "distfiles") == 0)
354 				continue;
355 			if ((b = malloc(sizeof PATH_PORTS + 1 + dirp->d_namlen))
356 			    == NULL)
357 				abort();
358 			strcpy(b, PATH_PORTS);
359 			strcat(b, "/");
360 			strcat(b, dirp->d_name);
361 			if (stat(b, &sb) == -1 ||
362 			    (sb.st_mode & S_IFMT) != S_IFDIR ||
363 			    access(b, R_OK | X_OK) != 0) {
364 				free(b);
365 				continue;
366 			}
367 			sourcedirs = realloc(sourcedirs,
368 					     (nele + 2) * sizeof(char *));
369 			if (sourcedirs == NULL)
370 				abort();
371 			sourcedirs[nele++] = b;
372 			sourcedirs[nele] = NULL;
373 		}
374 		closedir(dir);
375 	}
376 }
377 
378 int
379 main(int argc, char **argv)
380 {
381 	int unusual, i, printed;
382 	char *bin, buf[BUFSIZ], *cp, *cp2, *man, *name, *src;
383 	ccharp *dp;
384 	size_t nlen, olen, s;
385 	struct stat sb;
386 	regex_t re, re2;
387 	regmatch_t matches[2];
388 	regoff_t rlen;
389 	FILE *p;
390 
391 	setlocale(LC_ALL, "");
392 
393 	scanopts(argc, argv);
394 	defaults();
395 
396 	if (mandirs == NULL)
397 		opt_m = 0;
398 	if (bindirs == NULL)
399 		opt_b = 0;
400 	if (sourcedirs == NULL)
401 		opt_s = 0;
402 	if (opt_m + opt_b + opt_s == 0)
403 		errx(EX_DATAERR, "no directories to search");
404 
405 	if (opt_m) {
406 		setenv("MANPATH", colonify(mandirs), 1);
407 		if ((i = regcomp(&re, MANWHEREISMATCH, REG_EXTENDED)) != 0) {
408 			regerror(i, &re, buf, BUFSIZ - 1);
409 			errx(EX_UNAVAILABLE, "regcomp(%s) failed: %s",
410 			     MANWHEREISMATCH, buf);
411 		}
412 	}
413 
414 	for (; (name = *query) != NULL; query++) {
415 		/* strip leading path name component */
416 		if ((cp = strrchr(name, '/')) != NULL)
417 			name = cp + 1;
418 		/* strip SCCS or RCS suffix/prefix */
419 		if (strlen(name) > 2 && strncmp(name, "s.", 2) == 0)
420 			name += 2;
421 		if ((s = strlen(name)) > 2 && strcmp(name + s - 2, ",v") == 0)
422 			name[s - 2] = '\0';
423 		/* compression suffix */
424 		s = strlen(name);
425 		if (s > 2 &&
426 		    (strcmp(name + s - 2, ".z") == 0 ||
427 		     strcmp(name + s - 2, ".Z") == 0))
428 			name[s - 2] = '\0';
429 		else if (s > 3 &&
430 			 strcmp(name + s - 3, ".gz") == 0)
431 			name[s - 3] = '\0';
432 		else if (s > 4 &&
433 			 strcmp(name + s - 4, ".bz2") == 0)
434 			name[s - 4] = '\0';
435 
436 		unusual = 0;
437 		bin = man = src = NULL;
438 		s = strlen(name);
439 
440 		if (opt_b) {
441 			/*
442 			 * Binaries have to match exactly, and must be regular
443 			 * executable files.
444 			 */
445 			unusual = unusual | NO_BIN_FOUND;
446 			for (dp = bindirs; *dp != NULL; dp++) {
447 				cp = malloc(strlen(*dp) + 1 + s + 1);
448 				if (cp == NULL)
449 					abort();
450 				strcpy(cp, *dp);
451 				strcat(cp, "/");
452 				strcat(cp, name);
453 				if (stat(cp, &sb) == 0 &&
454 				    (sb.st_mode & S_IFMT) == S_IFREG &&
455 				    (sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))
456 				    != 0) {
457 					unusual = unusual & ~NO_BIN_FOUND;
458 					if (bin == NULL) {
459 						bin = strdup(cp);
460 					} else {
461 						olen = strlen(bin);
462 						nlen = strlen(cp);
463 						bin = realloc(bin,
464 							      olen + nlen + 2);
465 						if (bin == 0)
466 							abort();
467 						strcat(bin, " ");
468 						strcat(bin, cp);
469 					}
470 					if (!opt_a) {
471 						free(cp);
472 						break;
473 					}
474 				}
475 				free(cp);
476 			}
477 		}
478 
479 		if (opt_m) {
480 			/*
481 			 * Ask the man command to perform the search for us.
482 			 */
483 			unusual = unusual | NO_MAN_FOUND;
484 			if (opt_a)
485 				cp = malloc(sizeof MANWHEREISALLCMD - 2 + s);
486 			else
487 				cp = malloc(sizeof MANWHEREISCMD - 2 + s);
488 
489 			if (cp == NULL)
490 				abort();
491 
492 			if (opt_a)
493 				sprintf(cp, MANWHEREISALLCMD, name);
494 			else
495 				sprintf(cp, MANWHEREISCMD, name);
496 
497 			if ((p = popen(cp, "r")) != NULL) {
498 
499 				while (fgets(buf, BUFSIZ - 1, p) != NULL) {
500 					unusual = unusual & ~NO_MAN_FOUND;
501 
502 					if ((cp2 = strchr(buf, '\n')) != NULL)
503 						*cp2 = '\0';
504 					if (regexec(&re, buf, 2,
505 						    matches, 0) == 0 &&
506 					    (rlen = matches[1].rm_eo -
507 					     matches[1].rm_so) > 0) {
508 						/*
509 						 * man -w found formated
510 						 * page, need to pick up
511 						 * source page name.
512 						 */
513 						cp2 = malloc(rlen + 1);
514 						if (cp2 == NULL)
515 							abort();
516 						memcpy(cp2,
517 						       buf + matches[1].rm_so,
518 						       rlen);
519 						cp2[rlen] = '\0';
520 					} else {
521 						/*
522 						 * man -w found plain source
523 						 * page, use it.
524 						 */
525 						s = strlen(buf);
526 						cp2 = malloc(s + 1);
527 						if (cp2 == NULL)
528 							abort();
529 						strcpy(cp2, buf);
530 					}
531 
532 					if (man == NULL) {
533 						man = strdup(cp2);
534 					} else {
535 						olen = strlen(man);
536 						nlen = strlen(cp2);
537 						man = realloc(man,
538 							      olen + nlen + 2);
539 						if (man == 0)
540 							abort();
541 						strcat(man, " ");
542 						strcat(man, cp2);
543 					}
544 
545 					free(cp2);
546 
547 					if (!opt_a)
548 						break;
549 				}
550 				pclose(p);
551 				free(cp);
552 			}
553 		}
554 
555 		if (opt_s) {
556 			/*
557 			 * Sources match if a subdir with the exact
558 			 * name is found.
559 			 */
560 			unusual = unusual | NO_SRC_FOUND;
561 			for (dp = sourcedirs; *dp != NULL; dp++) {
562 				cp = malloc(strlen(*dp) + 1 + s + 1);
563 				if (cp == NULL)
564 					abort();
565 				strcpy(cp, *dp);
566 				strcat(cp, "/");
567 				strcat(cp, name);
568 				if (stat(cp, &sb) == 0 &&
569 				    (sb.st_mode & S_IFMT) == S_IFDIR) {
570 					unusual = unusual & ~NO_SRC_FOUND;
571 					if (src == NULL) {
572 						src = strdup(cp);
573 					} else {
574 						olen = strlen(src);
575 						nlen = strlen(cp);
576 						src = realloc(src,
577 							      olen + nlen + 2);
578 						if (src == 0)
579 							abort();
580 						strcat(src, " ");
581 						strcat(src, cp);
582 					}
583 					if (!opt_a) {
584 						free(cp);
585 						break;
586 					}
587 				}
588 				free(cp);
589 			}
590 			/*
591 			 * If still not found, ask locate to search it
592 			 * for us.  This will find sources for things
593 			 * like lpr that are well hidden in the
594 			 * /usr/src tree, but takes a lot longer.
595 			 * Thus, option -x (`expensive') prevents this
596 			 * search.
597 			 *
598 			 * Do only match locate output that starts
599 			 * with one of our source directories, and at
600 			 * least one further level of subdirectories.
601 			 */
602 			if (opt_x || (src && !opt_a))
603 				goto done_sources;
604 
605 			cp = malloc(sizeof LOCATECMD - 2 + s);
606 			if (cp == NULL)
607 				abort();
608 			sprintf(cp, LOCATECMD, name);
609 			if ((p = popen(cp, "r")) == NULL)
610 				goto done_sources;
611 			while ((src == NULL || opt_a) &&
612 			       (fgets(buf, BUFSIZ - 1, p)) != NULL) {
613 				if ((cp2 = strchr(buf, '\n')) != NULL)
614 					*cp2 = '\0';
615 				for (dp = sourcedirs;
616 				     (src == NULL || opt_a) && *dp != NULL;
617 				     dp++) {
618 					cp2 = malloc(strlen(*dp) + 9);
619 					if (cp2 == NULL)
620 						abort();
621 					strcpy(cp2, "^");
622 					strcat(cp2, *dp);
623 					strcat(cp2, "/[^/]+/");
624 					if ((i = regcomp(&re2, cp2,
625 							 REG_EXTENDED|REG_NOSUB))
626 					    != 0) {
627 						regerror(i, &re, buf,
628 							 BUFSIZ - 1);
629 						errx(EX_UNAVAILABLE,
630 						     "regcomp(%s) failed: %s",
631 						     cp2, buf);
632 					}
633 					free(cp2);
634 					if (regexec(&re2, buf, 0,
635 						    (regmatch_t *)NULL, 0)
636 					    == 0) {
637 						unusual = unusual &
638 						          ~NO_SRC_FOUND;
639 						if (src == NULL) {
640 							src = strdup(buf);
641 						} else {
642 							olen = strlen(src);
643 							nlen = strlen(buf);
644 							src = realloc(src,
645 								      olen +
646 								      nlen + 2);
647 							if (src == 0)
648 								abort();
649 							strcat(src, " ");
650 							strcat(src, buf);
651 						}
652 					}
653 					regfree(&re2);
654 				}
655 			}
656 			pclose(p);
657 			free(cp);
658 		}
659 	  done_sources:
660 
661 		if (opt_u && !unusual)
662 			continue;
663 
664 		printed = 0;
665 		if (!opt_q) {
666 			printf("%s:", name);
667 			printed++;
668 		}
669 		if (bin) {
670 			if (printed++)
671 				putchar(' ');
672 			fputs(bin, stdout);
673 		}
674 		if (man) {
675 			if (printed++)
676 				putchar(' ');
677 			fputs(man, stdout);
678 		}
679 		if (src) {
680 			if (printed++)
681 				putchar(' ');
682 			fputs(src, stdout);
683 		}
684 		if (printed)
685 			putchar('\n');
686 	}
687 
688 	if (opt_m)
689 		regfree(&re);
690 
691 	return (0);
692 }
693