xref: /illumos-gate/usr/src/tools/findunref/findunref.c (revision 69a119caa6570c7077699161b7c28b6ee9f8b0f4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
22  * Use is subject to license terms.
23  */
24 
25 /*
26  * Finds all unreferenced files in a source tree that do not match a list of
27  * permitted pathnames.
28  */
29 
30 #include <ctype.h>
31 #include <errno.h>
32 #include <fnmatch.h>
33 #include <ftw.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 
44 /*
45  * Pathname set: a simple datatype for storing pathname pattern globs and
46  * for checking whether a given pathname is matched by a pattern glob in
47  * the set.
48  */
49 typedef struct {
50 	char		**paths;
51 	unsigned int	npath;
52 	unsigned int	maxpaths;
53 } pnset_t;
54 
55 /*
56  * Data associated with the current Mercurial manifest.
57  */
58 typedef struct hgdata {
59 	pnset_t		*manifest;
60 	char		hgpath[MAXPATHLEN];
61 	char		root[MAXPATHLEN];
62 	unsigned int	rootlen;
63 	boolean_t	rootwarn;
64 } hgdata_t;
65 
66 /*
67  * Hooks used to check if a given unreferenced file is known to an SCM
68  * (currently Mercurial and TeamWare).
69  */
70 typedef int checkscm_func_t(const char *, const struct FTW *);
71 typedef void chdirscm_func_t(const char *);
72 
73 typedef struct {
74 	const char	*name;
75 	checkscm_func_t	*checkfunc;
76 	chdirscm_func_t	*chdirfunc;
77 } scm_t;
78 
79 static checkscm_func_t check_tw, check_hg, check_git;
80 static chdirscm_func_t chdir_hg, chdir_git;
81 static int	pnset_add(pnset_t *, const char *);
82 static int	pnset_check(const pnset_t *, const char *);
83 static void	pnset_empty(pnset_t *);
84 static void	pnset_free(pnset_t *);
85 static int	checkpath(const char *, const struct stat *, int, struct FTW *);
86 static pnset_t	*make_exset(const char *);
87 static void	warn(const char *, ...);
88 static void	die(const char *, ...);
89 
90 static const scm_t scms[] = {
91 	{ "tw",		check_tw,	NULL		},
92 	{ "teamware",	check_tw,	NULL		},
93 	{ "hg",		check_hg,	chdir_hg 	},
94 	{ "mercurial",	check_hg,	chdir_hg	},
95 	{ "git",	check_git,	chdir_git	},
96 	{ NULL,		NULL, 		NULL		}
97 };
98 
99 static const scm_t	*scm;
100 static hgdata_t		hgdata;
101 static pnset_t		*gitmanifest = NULL;
102 static time_t		tstamp;		/* timestamp to compare files to */
103 static pnset_t		*exsetp;	/* pathname globs to ignore */
104 static const char	*progname;
105 
106 int
107 main(int argc, char *argv[])
108 {
109 	int c;
110 	char path[MAXPATHLEN];
111 	char subtree[MAXPATHLEN] = "./";
112 	char *tstampfile = ".build.tstamp";
113 	struct stat tsstat;
114 
115 	progname = strrchr(argv[0], '/');
116 	if (progname == NULL)
117 		progname = argv[0];
118 	else
119 		progname++;
120 
121 	while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
122 		switch (c) {
123 		case 'a':
124 			/* for compatibility; now the default */
125 			break;
126 
127 		case 's':
128 			(void) strlcat(subtree, optarg, MAXPATHLEN);
129 			break;
130 
131 		case 't':
132 			tstampfile = optarg;
133 			break;
134 
135 		case 'S':
136 			for (scm = scms; scm->name != NULL; scm++) {
137 				if (strcmp(scm->name, optarg) == 0)
138 					break;
139 			}
140 			if (scm->name == NULL)
141 				die("unsupported SCM `%s'\n", optarg);
142 			break;
143 
144 		default:
145 		case '?':
146 			goto usage;
147 		}
148 	}
149 
150 	argc -= optind;
151 	argv += optind;
152 
153 	if (argc != 2) {
154 usage:		(void) fprintf(stderr, "usage: %s [-s <subtree>] "
155 		    "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
156 		    progname);
157 		return (EXIT_FAILURE);
158 	}
159 
160 	/*
161 	 * Interpret a relative timestamp path as relative to srcroot.
162 	 */
163 	if (tstampfile[0] == '/')
164 		(void) strlcpy(path, tstampfile, MAXPATHLEN);
165 	else
166 		(void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
167 
168 	if (stat(path, &tsstat) == -1)
169 		die("cannot stat timestamp file \"%s\"", path);
170 	tstamp = tsstat.st_mtime;
171 
172 	/*
173 	 * Create the exception pathname set.
174 	 */
175 	exsetp = make_exset(argv[1]);
176 	if (exsetp == NULL)
177 		die("cannot make exception pathname set\n");
178 
179 	/*
180 	 * Walk the specified subtree of the tree rooted at argv[0].
181 	 */
182 	if (chdir(argv[0]) == -1)
183 		die("cannot change directory to \"%s\"", argv[0]);
184 
185 	if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
186 		die("cannot walk tree rooted at \"%s\"\n", argv[0]);
187 
188 	pnset_empty(exsetp);
189 	return (EXIT_SUCCESS);
190 }
191 
192 /*
193  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
194  */
195 static pnset_t *
196 load_manifest(const char *hgroot)
197 {
198 	FILE	*fp = NULL;
199 	char	*hgcmd = NULL;
200 	char	*newline;
201 	pnset_t	*pnsetp;
202 	char	path[MAXPATHLEN];
203 
204 	pnsetp = calloc(sizeof (pnset_t), 1);
205 	if (pnsetp == NULL ||
206 	    asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
207 		goto fail;
208 
209 	fp = popen(hgcmd, "r");
210 	if (fp == NULL)
211 		goto fail;
212 
213 	while (fgets(path, sizeof (path), fp) != NULL) {
214 		newline = strrchr(path, '\n');
215 		if (newline != NULL)
216 			*newline = '\0';
217 
218 		if (pnset_add(pnsetp, path) == 0)
219 			goto fail;
220 	}
221 
222 	(void) pclose(fp);
223 	free(hgcmd);
224 	return (pnsetp);
225 fail:
226 	warn("cannot load hg manifest at %s", hgroot);
227 	if (fp != NULL)
228 		(void) pclose(fp);
229 	free(hgcmd);
230 	pnset_free(pnsetp);
231 	return (NULL);
232 }
233 
234 static void
235 chdir_git(const char *path)
236 {
237 	FILE *fp = NULL;
238 	char *gitcmd = NULL;
239 	char *newline;
240 	char fn[MAXPATHLEN];
241 	pnset_t *pnsetp;
242 
243 	pnsetp = calloc(sizeof (pnset_t), 1);
244 	if ((pnsetp == NULL) ||
245 	    (asprintf(&gitcmd, "git ls-files %s", path) == -1))
246 		goto fail;
247 
248 	if ((fp = popen(gitcmd, "r")) == NULL)
249 		goto fail;
250 
251 	while (fgets(fn, sizeof (fn), fp) != NULL) {
252 		if ((newline = strrchr(fn, '\n')) != NULL)
253 			*newline = '\0';
254 
255 		if (pnset_add(pnsetp, fn) == 0)
256 			goto fail;
257 	}
258 
259 	(void) pclose(fp);
260 	free(gitcmd);
261 	gitmanifest = pnsetp;
262 	return;
263 fail:
264 	warn("cannot load git manifest");
265 	if (fp != NULL)
266 		(void) pclose(fp);
267 	if (pnsetp != NULL)
268 		free(pnsetp);
269 	if (gitcmd != NULL)
270 		free(gitcmd);
271 }
272 
273 /*
274  * If necessary, change our active manifest to be appropriate for `path'.
275  */
276 static void
277 chdir_hg(const char *path)
278 {
279 	char hgpath[MAXPATHLEN];
280 	char basepath[MAXPATHLEN];
281 	char *slash;
282 
283 	(void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", path);
284 
285 	/*
286 	 * Change our active manifest if any one of the following is true:
287 	 *
288 	 *   1. No manifest is loaded.  Find the nearest hgroot to load from.
289 	 *
290 	 *   2. A manifest is loaded, but we've moved into a directory with
291 	 *	its own hgroot (e.g., usr/closed).  Load from its hgroot.
292 	 *
293 	 *   3. A manifest is loaded, but no longer applies (e.g., the manifest
294 	 *	under usr/closed is loaded, but we've moved to usr/src).
295 	 */
296 	if (hgdata.manifest == NULL ||
297 	    strcmp(hgpath, hgdata.hgpath) != 0 && access(hgpath, X_OK) == 0 ||
298 	    strncmp(path, hgdata.root, hgdata.rootlen - 1) != 0) {
299 		pnset_free(hgdata.manifest);
300 		hgdata.manifest = NULL;
301 
302 		(void) strlcpy(basepath, path, MAXPATHLEN);
303 
304 		/*
305 		 * Walk up the directory tree looking for .hg subdirectories.
306 		 */
307 		while (access(hgpath, X_OK) == -1) {
308 			slash = strrchr(basepath, '/');
309 			if (slash == NULL) {
310 				if (!hgdata.rootwarn) {
311 					warn("no hg root for \"%s\"\n", path);
312 					hgdata.rootwarn = B_TRUE;
313 				}
314 				return;
315 			}
316 			*slash = '\0';
317 			(void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", basepath);
318 		}
319 
320 		/*
321 		 * We found a directory with an .hg subdirectory; record it
322 		 * and load its manifest.
323 		 */
324 		(void) strlcpy(hgdata.hgpath, hgpath, MAXPATHLEN);
325 		(void) strlcpy(hgdata.root, basepath, MAXPATHLEN);
326 		hgdata.manifest = load_manifest(hgdata.root);
327 
328 		/*
329 		 * The logic in check_hg() depends on hgdata.root having a
330 		 * single trailing slash, so only add it if it's missing.
331 		 */
332 		if (hgdata.root[strlen(hgdata.root) - 1] != '/')
333 			(void) strlcat(hgdata.root, "/", MAXPATHLEN);
334 		hgdata.rootlen = strlen(hgdata.root);
335 	}
336 }
337 
338 /*
339  * Check if a file is under Mercurial control by checking against the manifest.
340  */
341 /* ARGSUSED */
342 static int
343 check_hg(const char *path, const struct FTW *ftwp)
344 {
345 	/*
346 	 * The manifest paths are relative to the manifest root; skip past it.
347 	 */
348 	path += hgdata.rootlen;
349 
350 	return (hgdata.manifest != NULL && pnset_check(hgdata.manifest, path));
351 }
352 /* ARGSUSED */
353 static int
354 check_git(const char *path, const struct FTW *ftwp)
355 {
356 	path += 2;		/* Skip "./" */
357 	return (gitmanifest != NULL && pnset_check(gitmanifest, path));
358 }
359 
360 /*
361  * Check if a file is under TeamWare control by checking for its corresponding
362  * SCCS "s-dot" file.
363  */
364 static int
365 check_tw(const char *path, const struct FTW *ftwp)
366 {
367 	char sccspath[MAXPATHLEN];
368 
369 	(void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
370 	    path, path + ftwp->base);
371 
372 	return (access(sccspath, F_OK) == 0);
373 }
374 
375 /*
376  * Using `exceptfile' and a built-in list of exceptions, build and return a
377  * pnset_t consisting of all of the pathnames globs which are allowed to be
378  * unreferenced in the source tree.
379  */
380 static pnset_t *
381 make_exset(const char *exceptfile)
382 {
383 	FILE		*fp;
384 	char		line[MAXPATHLEN];
385 	char		*newline;
386 	pnset_t		*pnsetp;
387 	unsigned int	i;
388 
389 	pnsetp = calloc(sizeof (pnset_t), 1);
390 	if (pnsetp == NULL)
391 		return (NULL);
392 
393 	/*
394 	 * Add any exceptions from the file.
395 	 */
396 	fp = fopen(exceptfile, "r");
397 	if (fp == NULL) {
398 		warn("cannot open exception file \"%s\"", exceptfile);
399 		goto fail;
400 	}
401 
402 	while (fgets(line, sizeof (line), fp) != NULL) {
403 		newline = strrchr(line, '\n');
404 		if (newline != NULL)
405 			*newline = '\0';
406 
407 		for (i = 0; isspace(line[i]); i++)
408 			;
409 
410 		if (line[i] == '#' || line[i] == '\0')
411 			continue;
412 
413 		if (pnset_add(pnsetp, line) == 0) {
414 			(void) fclose(fp);
415 			goto fail;
416 		}
417 	}
418 
419 	(void) fclose(fp);
420 	return (pnsetp);
421 fail:
422 	pnset_free(pnsetp);
423 	return (NULL);
424 }
425 
426 /*
427  * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
428  */
429 static int
430 checkpath(const char *path, const struct stat *statp, int type,
431     struct FTW *ftwp)
432 {
433 	switch (type) {
434 	case FTW_F:
435 		/*
436 		 * Skip if the file is referenced or in the exception list.
437 		 */
438 		if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
439 			return (0);
440 
441 		/*
442 		 * If requested, restrict ourselves to unreferenced files
443 		 * under SCM control.
444 		 */
445 		if (scm == NULL || scm->checkfunc(path, ftwp))
446 			(void) puts(path);
447 		return (0);
448 
449 	case FTW_D:
450 		/*
451 		 * Prune any directories in the exception list.
452 		 */
453 		if (pnset_check(exsetp, path)) {
454 			ftwp->quit = FTW_PRUNE;
455 			return (0);
456 		}
457 
458 		/*
459 		 * If necessary, advise the SCM logic of our new directory.
460 		 */
461 		if (scm != NULL && scm->chdirfunc != NULL)
462 			scm->chdirfunc(path);
463 
464 		return (0);
465 
466 	case FTW_DNR:
467 		warn("cannot read \"%s\"", path);
468 		return (0);
469 
470 	case FTW_NS:
471 		warn("cannot stat \"%s\"", path);
472 		return (0);
473 
474 	default:
475 		break;
476 	}
477 
478 	return (0);
479 }
480 
481 /*
482  * Add `path' to the pnset_t pointed to by `pnsetp'.
483  */
484 static int
485 pnset_add(pnset_t *pnsetp, const char *path)
486 {
487 	char **newpaths;
488 	unsigned int maxpaths;
489 
490 	if (pnsetp->npath == pnsetp->maxpaths) {
491 		maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
492 		newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
493 		if (newpaths == NULL)
494 			return (0);
495 		pnsetp->paths = newpaths;
496 		pnsetp->maxpaths = maxpaths;
497 	}
498 
499 	pnsetp->paths[pnsetp->npath] = strdup(path);
500 	if (pnsetp->paths[pnsetp->npath] == NULL)
501 		return (0);
502 
503 	pnsetp->npath++;
504 	return (1);
505 }
506 
507 /*
508  * Check `path' against the pnset_t pointed to by `pnsetp'.
509  */
510 static int
511 pnset_check(const pnset_t *pnsetp, const char *path)
512 {
513 	unsigned int i;
514 
515 	for (i = 0; i < pnsetp->npath; i++) {
516 		if (fnmatch(pnsetp->paths[i], path, 0) == 0)
517 			return (1);
518 	}
519 	return (0);
520 }
521 
522 /*
523  * Empty the pnset_t pointed to by `pnsetp'.
524  */
525 static void
526 pnset_empty(pnset_t *pnsetp)
527 {
528 	while (pnsetp->npath-- != 0)
529 		free(pnsetp->paths[pnsetp->npath]);
530 
531 	free(pnsetp->paths);
532 	pnsetp->maxpaths = 0;
533 }
534 
535 /*
536  * Free the pnset_t pointed to by `pnsetp'.
537  */
538 static void
539 pnset_free(pnset_t *pnsetp)
540 {
541 	if (pnsetp != NULL) {
542 		pnset_empty(pnsetp);
543 		free(pnsetp);
544 	}
545 }
546 
547 /* PRINTFLIKE1 */
548 static void
549 warn(const char *format, ...)
550 {
551 	va_list alist;
552 	char *errstr = strerror(errno);
553 
554 	if (errstr == NULL)
555 		errstr = "<unknown error>";
556 
557 	(void) fprintf(stderr, "%s: ", progname);
558 
559 	va_start(alist, format);
560 	(void) vfprintf(stderr, format, alist);
561 	va_end(alist);
562 
563 	if (strrchr(format, '\n') == NULL)
564 		(void) fprintf(stderr, ": %s\n", errstr);
565 }
566 
567 /* PRINTFLIKE1 */
568 static void
569 die(const char *format, ...)
570 {
571 	va_list alist;
572 	char *errstr = strerror(errno);
573 
574 	if (errstr == NULL)
575 		errstr = "<unknown error>";
576 
577 	(void) fprintf(stderr, "%s: fatal: ", progname);
578 
579 	va_start(alist, format);
580 	(void) vfprintf(stderr, format, alist);
581 	va_end(alist);
582 
583 	if (strrchr(format, '\n') == NULL)
584 		(void) fprintf(stderr, ": %s\n", errstr);
585 
586 	exit(EXIT_FAILURE);
587 }
588