xref: /illumos-gate/usr/src/tools/findunref/findunref.c (revision a38ee58261c5aa81028a4329e73da4016006aa99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
22  * Use is subject to license terms.
23  */
24 
25 /*
26  * Finds all unreferenced files in a source tree that do not match a list of
27  * permitted pathnames.
28  */
29 
30 #include <ctype.h>
31 #include <errno.h>
32 #include <fnmatch.h>
33 #include <ftw.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 
44 /*
45  * Pathname set: a simple datatype for storing pathname pattern globs and
46  * for checking whether a given pathname is matched by a pattern glob in
47  * the set.
48  */
49 typedef struct {
50 	char		**paths;
51 	unsigned int	npath;
52 	unsigned int	maxpaths;
53 } pnset_t;
54 
55 /*
56  * Data associated with the current SCM manifest.
57  */
58 typedef struct scmdata {
59 	pnset_t		*manifest;
60 	char		metapath[MAXPATHLEN];
61 	char		root[MAXPATHLEN];
62 	unsigned int	rootlen;
63 	boolean_t	rootwarn;
64 } scmdata_t;
65 
66 /*
67  * Hooks used to check if a given unreferenced file is known to an SCM
68  * (currently Git, Mercurial and TeamWare).
69  */
70 typedef int checkscm_func_t(const char *, const struct FTW *);
71 typedef void chdirscm_func_t(const char *);
72 
73 typedef struct {
74 	const char	*name;
75 	checkscm_func_t	*checkfunc;
76 	chdirscm_func_t	*chdirfunc;
77 } scm_t;
78 
79 static checkscm_func_t check_tw, check_scmdata;
80 static chdirscm_func_t chdir_hg, chdir_git;
81 static int	pnset_add(pnset_t *, const char *);
82 static int	pnset_check(const pnset_t *, const char *);
83 static void	pnset_empty(pnset_t *);
84 static void	pnset_free(pnset_t *);
85 static int	checkpath(const char *, const struct stat *, int, struct FTW *);
86 static pnset_t	*make_exset(const char *);
87 static void	warn(const char *, ...);
88 static void	die(const char *, ...);
89 
90 static const scm_t scms[] = {
91 	{ "tw",		check_tw,	NULL		},
92 	{ "teamware",	check_tw,	NULL		},
93 	{ "hg",		check_scmdata,	chdir_hg 	},
94 	{ "mercurial",	check_scmdata,	chdir_hg	},
95 	{ "git",	check_scmdata,	chdir_git	},
96 	{ NULL,		NULL, 		NULL		}
97 };
98 
99 static const scm_t	*scm;
100 static scmdata_t	scmdata;
101 static time_t		tstamp;		/* timestamp to compare files to */
102 static pnset_t		*exsetp;	/* pathname globs to ignore */
103 static const char	*progname;
104 
105 int
106 main(int argc, char *argv[])
107 {
108 	int c;
109 	char path[MAXPATHLEN];
110 	char subtree[MAXPATHLEN] = "./";
111 	char *tstampfile = ".build.tstamp";
112 	struct stat tsstat;
113 
114 	progname = strrchr(argv[0], '/');
115 	if (progname == NULL)
116 		progname = argv[0];
117 	else
118 		progname++;
119 
120 	while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
121 		switch (c) {
122 		case 'a':
123 			/* for compatibility; now the default */
124 			break;
125 
126 		case 's':
127 			(void) strlcat(subtree, optarg, MAXPATHLEN);
128 			break;
129 
130 		case 't':
131 			tstampfile = optarg;
132 			break;
133 
134 		case 'S':
135 			for (scm = scms; scm->name != NULL; scm++) {
136 				if (strcmp(scm->name, optarg) == 0)
137 					break;
138 			}
139 			if (scm->name == NULL)
140 				die("unsupported SCM `%s'\n", optarg);
141 			break;
142 
143 		default:
144 		case '?':
145 			goto usage;
146 		}
147 	}
148 
149 	argc -= optind;
150 	argv += optind;
151 
152 	if (argc != 2) {
153 usage:		(void) fprintf(stderr, "usage: %s [-s <subtree>] "
154 		    "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
155 		    progname);
156 		return (EXIT_FAILURE);
157 	}
158 
159 	/*
160 	 * Interpret a relative timestamp path as relative to srcroot.
161 	 */
162 	if (tstampfile[0] == '/')
163 		(void) strlcpy(path, tstampfile, MAXPATHLEN);
164 	else
165 		(void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
166 
167 	if (stat(path, &tsstat) == -1)
168 		die("cannot stat timestamp file \"%s\"", path);
169 	tstamp = tsstat.st_mtime;
170 
171 	/*
172 	 * Create the exception pathname set.
173 	 */
174 	exsetp = make_exset(argv[1]);
175 	if (exsetp == NULL)
176 		die("cannot make exception pathname set\n");
177 
178 	/*
179 	 * Walk the specified subtree of the tree rooted at argv[0].
180 	 */
181 	if (chdir(argv[0]) == -1)
182 		die("cannot change directory to \"%s\"", argv[0]);
183 
184 	if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
185 		die("cannot walk tree rooted at \"%s\"\n", argv[0]);
186 
187 	pnset_empty(exsetp);
188 	return (EXIT_SUCCESS);
189 }
190 
191 /*
192  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
193  */
194 static pnset_t *
195 hg_manifest(const char *hgroot)
196 {
197 	FILE	*fp = NULL;
198 	char	*hgcmd = NULL;
199 	char	*newline;
200 	pnset_t	*pnsetp;
201 	char	path[MAXPATHLEN];
202 
203 	pnsetp = calloc(sizeof (pnset_t), 1);
204 	if (pnsetp == NULL ||
205 	    asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
206 		goto fail;
207 
208 	fp = popen(hgcmd, "r");
209 	if (fp == NULL)
210 		goto fail;
211 
212 	while (fgets(path, sizeof (path), fp) != NULL) {
213 		newline = strrchr(path, '\n');
214 		if (newline != NULL)
215 			*newline = '\0';
216 
217 		if (pnset_add(pnsetp, path) == 0)
218 			goto fail;
219 	}
220 
221 	(void) pclose(fp);
222 	free(hgcmd);
223 	return (pnsetp);
224 fail:
225 	warn("cannot load hg manifest at %s", hgroot);
226 	if (fp != NULL)
227 		(void) pclose(fp);
228 	free(hgcmd);
229 	pnset_free(pnsetp);
230 	return (NULL);
231 }
232 
233 /*
234  * Load and return a pnset for the manifest for the Git repo at `gitroot'.
235  */
236 static pnset_t *
237 git_manifest(const char *gitroot)
238 {
239 	FILE	*fp = NULL;
240 	char	*gitcmd = NULL;
241 	char	*newline;
242 	pnset_t	*pnsetp;
243 	char	path[MAXPATHLEN];
244 
245 	pnsetp = calloc(sizeof (pnset_t), 1);
246 	if (pnsetp == NULL ||
247 	    asprintf(&gitcmd, "git --git-dir=%s/.git ls-files", gitroot) == -1)
248 		goto fail;
249 
250 	fp = popen(gitcmd, "r");
251 	if (fp == NULL)
252 		goto fail;
253 
254 	while (fgets(path, sizeof (path), fp) != NULL) {
255 		newline = strrchr(path, '\n');
256 		if (newline != NULL)
257 			*newline = '\0';
258 
259 		if (pnset_add(pnsetp, path) == 0)
260 			goto fail;
261 	}
262 
263 	(void) pclose(fp);
264 	free(gitcmd);
265 	return (pnsetp);
266 fail:
267 	warn("cannot load git manifest at %s", gitroot);
268 	if (fp != NULL)
269 		(void) pclose(fp);
270 	free(gitcmd);
271 	pnset_free(pnsetp);
272 	return (NULL);
273 }
274 
275 /*
276  * If necessary, change our active manifest to be appropriate for `path'.
277  */
278 static void
279 chdir_scmdata(const char *path, const char *meta,
280     pnset_t *(*manifest_func)(const char *path))
281 {
282 	char scmpath[MAXPATHLEN];
283 	char basepath[MAXPATHLEN];
284 	char *slash;
285 
286 	(void) snprintf(scmpath, MAXPATHLEN, "%s/%s", path, meta);
287 
288 	/*
289 	 * Change our active manifest if any one of the following is true:
290 	 *
291 	 *   1. No manifest is loaded.  Find the nearest SCM root to load from.
292 	 *
293 	 *   2. A manifest is loaded, but we've moved into a directory with
294 	 *	its own metadata directory (e.g., usr/closed).  Load from its
295 	 *	root.
296 	 *
297 	 *   3. A manifest is loaded, but no longer applies (e.g., the manifest
298 	 *	under usr/closed is loaded, but we've moved to usr/src).
299 	 */
300 	if (scmdata.manifest == NULL ||
301 	    (strcmp(scmpath, scmdata.metapath) != 0 &&
302 	    access(scmpath, X_OK) == 0) ||
303 	    strncmp(path, scmdata.root, scmdata.rootlen - 1) != 0) {
304 		pnset_free(scmdata.manifest);
305 		scmdata.manifest = NULL;
306 
307 		(void) strlcpy(basepath, path, MAXPATHLEN);
308 
309 		/*
310 		 * Walk up the directory tree looking for metadata
311 		 * subdirectories.
312 		 */
313 		while (access(scmpath, X_OK) == -1) {
314 			slash = strrchr(basepath, '/');
315 			if (slash == NULL) {
316 				if (!scmdata.rootwarn) {
317 					warn("no metadata directory "
318 					    "for \"%s\"\n", path);
319 					scmdata.rootwarn = B_TRUE;
320 				}
321 				return;
322 			}
323 			*slash = '\0';
324 			(void) snprintf(scmpath, MAXPATHLEN, "%s/%s", basepath,
325 			    meta);
326 		}
327 
328 		/*
329 		 * We found a directory with an SCM metadata directory; record
330 		 * it and load its manifest.
331 		 */
332 		(void) strlcpy(scmdata.metapath, scmpath, MAXPATHLEN);
333 		(void) strlcpy(scmdata.root, basepath, MAXPATHLEN);
334 		scmdata.manifest = manifest_func(scmdata.root);
335 
336 		/*
337 		 * The logic in check_scmdata() depends on scmdata.root having
338 		 * a single trailing slash, so only add it if it's missing.
339 		 */
340 		if (scmdata.root[strlen(scmdata.root) - 1] != '/')
341 			(void) strlcat(scmdata.root, "/", MAXPATHLEN);
342 		scmdata.rootlen = strlen(scmdata.root);
343 	}
344 }
345 
346 /*
347  * If necessary, change our active manifest to be appropriate for `path'.
348  */
349 static void
350 chdir_git(const char *path)
351 {
352 	chdir_scmdata(path, ".git", git_manifest);
353 }
354 
355 static void
356 chdir_hg(const char *path)
357 {
358 	chdir_scmdata(path, ".hg", hg_manifest);
359 }
360 
361 /* ARGSUSED */
362 static int
363 check_scmdata(const char *path, const struct FTW *ftwp)
364 {
365 	/*
366 	 * The manifest paths are relative to the manifest root; skip past it.
367 	 */
368 	path += scmdata.rootlen;
369 
370 	return (scmdata.manifest != NULL && pnset_check(scmdata.manifest,
371 	    path));
372 }
373 
374 /*
375  * Check if a file is under TeamWare control by checking for its corresponding
376  * SCCS "s-dot" file.
377  */
378 static int
379 check_tw(const char *path, const struct FTW *ftwp)
380 {
381 	char sccspath[MAXPATHLEN];
382 
383 	(void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
384 	    path, path + ftwp->base);
385 
386 	return (access(sccspath, F_OK) == 0);
387 }
388 
389 /*
390  * Using `exceptfile' and a built-in list of exceptions, build and return a
391  * pnset_t consisting of all of the pathnames globs which are allowed to be
392  * unreferenced in the source tree.
393  */
394 static pnset_t *
395 make_exset(const char *exceptfile)
396 {
397 	FILE		*fp;
398 	char		line[MAXPATHLEN];
399 	char		*newline;
400 	pnset_t		*pnsetp;
401 	unsigned int	i;
402 
403 	pnsetp = calloc(sizeof (pnset_t), 1);
404 	if (pnsetp == NULL)
405 		return (NULL);
406 
407 	/*
408 	 * Add any exceptions from the file.
409 	 */
410 	fp = fopen(exceptfile, "r");
411 	if (fp == NULL) {
412 		warn("cannot open exception file \"%s\"", exceptfile);
413 		goto fail;
414 	}
415 
416 	while (fgets(line, sizeof (line), fp) != NULL) {
417 		newline = strrchr(line, '\n');
418 		if (newline != NULL)
419 			*newline = '\0';
420 
421 		for (i = 0; isspace(line[i]); i++)
422 			;
423 
424 		if (line[i] == '#' || line[i] == '\0')
425 			continue;
426 
427 		if (pnset_add(pnsetp, line) == 0) {
428 			(void) fclose(fp);
429 			goto fail;
430 		}
431 	}
432 
433 	(void) fclose(fp);
434 	return (pnsetp);
435 fail:
436 	pnset_free(pnsetp);
437 	return (NULL);
438 }
439 
440 /*
441  * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
442  */
443 static int
444 checkpath(const char *path, const struct stat *statp, int type,
445     struct FTW *ftwp)
446 {
447 	switch (type) {
448 	case FTW_F:
449 		/*
450 		 * Skip if the file is referenced or in the exception list.
451 		 */
452 		if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
453 			return (0);
454 
455 		/*
456 		 * If requested, restrict ourselves to unreferenced files
457 		 * under SCM control.
458 		 */
459 		if (scm == NULL || scm->checkfunc(path, ftwp))
460 			(void) puts(path);
461 		return (0);
462 
463 	case FTW_D:
464 		/*
465 		 * Prune any directories in the exception list.
466 		 */
467 		if (pnset_check(exsetp, path)) {
468 			ftwp->quit = FTW_PRUNE;
469 			return (0);
470 		}
471 
472 		/*
473 		 * If necessary, advise the SCM logic of our new directory.
474 		 */
475 		if (scm != NULL && scm->chdirfunc != NULL)
476 			scm->chdirfunc(path);
477 
478 		return (0);
479 
480 	case FTW_DNR:
481 		warn("cannot read \"%s\"", path);
482 		return (0);
483 
484 	case FTW_NS:
485 		warn("cannot stat \"%s\"", path);
486 		return (0);
487 
488 	default:
489 		break;
490 	}
491 
492 	return (0);
493 }
494 
495 /*
496  * Add `path' to the pnset_t pointed to by `pnsetp'.
497  */
498 static int
499 pnset_add(pnset_t *pnsetp, const char *path)
500 {
501 	char **newpaths;
502 	unsigned int maxpaths;
503 
504 	if (pnsetp->npath == pnsetp->maxpaths) {
505 		maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
506 		newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
507 		if (newpaths == NULL)
508 			return (0);
509 		pnsetp->paths = newpaths;
510 		pnsetp->maxpaths = maxpaths;
511 	}
512 
513 	pnsetp->paths[pnsetp->npath] = strdup(path);
514 	if (pnsetp->paths[pnsetp->npath] == NULL)
515 		return (0);
516 
517 	pnsetp->npath++;
518 	return (1);
519 }
520 
521 /*
522  * Check `path' against the pnset_t pointed to by `pnsetp'.
523  */
524 static int
525 pnset_check(const pnset_t *pnsetp, const char *path)
526 {
527 	unsigned int i;
528 
529 	for (i = 0; i < pnsetp->npath; i++) {
530 		if (fnmatch(pnsetp->paths[i], path, 0) == 0)
531 			return (1);
532 	}
533 	return (0);
534 }
535 
536 /*
537  * Empty the pnset_t pointed to by `pnsetp'.
538  */
539 static void
540 pnset_empty(pnset_t *pnsetp)
541 {
542 	while (pnsetp->npath-- != 0)
543 		free(pnsetp->paths[pnsetp->npath]);
544 
545 	free(pnsetp->paths);
546 	pnsetp->maxpaths = 0;
547 }
548 
549 /*
550  * Free the pnset_t pointed to by `pnsetp'.
551  */
552 static void
553 pnset_free(pnset_t *pnsetp)
554 {
555 	if (pnsetp != NULL) {
556 		pnset_empty(pnsetp);
557 		free(pnsetp);
558 	}
559 }
560 
561 /* PRINTFLIKE1 */
562 static void
563 warn(const char *format, ...)
564 {
565 	va_list alist;
566 	char *errstr = strerror(errno);
567 
568 	if (errstr == NULL)
569 		errstr = "<unknown error>";
570 
571 	(void) fprintf(stderr, "%s: ", progname);
572 
573 	va_start(alist, format);
574 	(void) vfprintf(stderr, format, alist);
575 	va_end(alist);
576 
577 	if (strrchr(format, '\n') == NULL)
578 		(void) fprintf(stderr, ": %s\n", errstr);
579 }
580 
581 /* PRINTFLIKE1 */
582 static void
583 die(const char *format, ...)
584 {
585 	va_list alist;
586 	char *errstr = strerror(errno);
587 
588 	if (errstr == NULL)
589 		errstr = "<unknown error>";
590 
591 	(void) fprintf(stderr, "%s: fatal: ", progname);
592 
593 	va_start(alist, format);
594 	(void) vfprintf(stderr, format, alist);
595 	va_end(alist);
596 
597 	if (strrchr(format, '\n') == NULL)
598 		(void) fprintf(stderr, ": %s\n", errstr);
599 
600 	exit(EXIT_FAILURE);
601 }
602