xref: /illumos-gate/usr/src/tools/findunref/findunref.c (revision e9af4bc0b1cc30cea75d6ad4aa2fde97d985e9be)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  *
21  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
22  * Use is subject to license terms.
23  */
24 
25 /*
26  * Finds all unreferenced files in a source tree that do not match a list of
27  * permitted pathnames.
28  */
29 
30 #include <ctype.h>
31 #include <errno.h>
32 #include <fnmatch.h>
33 #include <ftw.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43 
44 /*
45  * Pathname set: a simple datatype for storing pathname pattern globs and
46  * for checking whether a given pathname is matched by a pattern glob in
47  * the set.
48  */
49 typedef struct {
50 	char		**paths;
51 	unsigned int	npath;
52 	unsigned int	maxpaths;
53 } pnset_t;
54 
55 /*
56  * Data associated with the current Mercurial manifest.
57  */
58 typedef struct hgdata {
59 	pnset_t		*manifest;
60 	char		hgpath[MAXPATHLEN];
61 	char		root[MAXPATHLEN];
62 	unsigned int	rootlen;
63 	boolean_t	rootwarn;
64 } hgdata_t;
65 
66 /*
67  * Hooks used to check if a given unreferenced file is known to an SCM
68  * (currently Mercurial and TeamWare).
69  */
70 typedef int checkscm_func_t(const char *, const struct FTW *);
71 typedef void chdirscm_func_t(const char *);
72 
73 typedef struct {
74 	const char	*name;
75 	checkscm_func_t	*checkfunc;
76 	chdirscm_func_t	*chdirfunc;
77 } scm_t;
78 
79 static checkscm_func_t check_tw, check_hg;
80 static chdirscm_func_t chdir_hg;
81 static int	pnset_add(pnset_t *, const char *);
82 static int	pnset_check(const pnset_t *, const char *);
83 static void	pnset_empty(pnset_t *);
84 static void	pnset_free(pnset_t *);
85 static int	checkpath(const char *, const struct stat *, int, struct FTW *);
86 static pnset_t	*make_exset(const char *);
87 static void	warn(const char *, ...);
88 static void	die(const char *, ...);
89 
90 static const scm_t scms[] = {
91 	{ "tw",		check_tw,	NULL		},
92 	{ "teamware",	check_tw,	NULL		},
93 	{ "hg",		check_hg,	chdir_hg 	},
94 	{ "mercurial",	check_hg,	chdir_hg	},
95 	{ NULL,		NULL, 		NULL		}
96 };
97 
98 static const scm_t	*scm;
99 static hgdata_t		hgdata;
100 static time_t		tstamp;		/* timestamp to compare files to */
101 static pnset_t		*exsetp;	/* pathname globs to ignore */
102 static const char	*progname;
103 
104 int
105 main(int argc, char *argv[])
106 {
107 	int c;
108 	char path[MAXPATHLEN];
109 	char subtree[MAXPATHLEN] = "./";
110 	char *tstampfile = ".build.tstamp";
111 	struct stat tsstat;
112 
113 	progname = strrchr(argv[0], '/');
114 	if (progname == NULL)
115 		progname = argv[0];
116 	else
117 		progname++;
118 
119 	while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
120 		switch (c) {
121 		case 'a':
122 			/* for compatibility; now the default */
123 			break;
124 
125 		case 's':
126 			(void) strlcat(subtree, optarg, MAXPATHLEN);
127 			break;
128 
129 		case 't':
130 			tstampfile = optarg;
131 			break;
132 
133 		case 'S':
134 			for (scm = scms; scm->name != NULL; scm++) {
135 				if (strcmp(scm->name, optarg) == 0)
136 					break;
137 			}
138 			if (scm->name == NULL)
139 				die("unsupported SCM `%s'\n", optarg);
140 			break;
141 
142 		default:
143 		case '?':
144 			goto usage;
145 		}
146 	}
147 
148 	argc -= optind;
149 	argv += optind;
150 
151 	if (argc != 2) {
152 usage:		(void) fprintf(stderr, "usage: %s [-s <subtree>] "
153 		    "[-t <tstampfile>] [-S hg|tw] <srcroot> <exceptfile>\n",
154 		    progname);
155 		return (EXIT_FAILURE);
156 	}
157 
158 	/*
159 	 * Interpret a relative timestamp path as relative to srcroot.
160 	 */
161 	if (tstampfile[0] == '/')
162 		(void) strlcpy(path, tstampfile, MAXPATHLEN);
163 	else
164 		(void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
165 
166 	if (stat(path, &tsstat) == -1)
167 		die("cannot stat timestamp file \"%s\"", path);
168 	tstamp = tsstat.st_mtime;
169 
170 	/*
171 	 * Create the exception pathname set.
172 	 */
173 	exsetp = make_exset(argv[1]);
174 	if (exsetp == NULL)
175 		die("cannot make exception pathname set\n");
176 
177 	/*
178 	 * Walk the specified subtree of the tree rooted at argv[0].
179 	 */
180 	if (chdir(argv[0]) == -1)
181 		die("cannot change directory to \"%s\"", argv[0]);
182 
183 	if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
184 		die("cannot walk tree rooted at \"%s\"\n", argv[0]);
185 
186 	pnset_empty(exsetp);
187 	return (EXIT_SUCCESS);
188 }
189 
190 /*
191  * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
192  */
193 static pnset_t *
194 load_manifest(const char *hgroot)
195 {
196 	FILE	*fp = NULL;
197 	char	*hgcmd = NULL;
198 	char	*newline;
199 	pnset_t	*pnsetp;
200 	char	path[MAXPATHLEN];
201 
202 	pnsetp = calloc(sizeof (pnset_t), 1);
203 	if (pnsetp == NULL ||
204 	    asprintf(&hgcmd, "/usr/bin/hg manifest -R %s", hgroot) == -1)
205 		goto fail;
206 
207 	fp = popen(hgcmd, "r");
208 	if (fp == NULL)
209 		goto fail;
210 
211 	while (fgets(path, sizeof (path), fp) != NULL) {
212 		newline = strrchr(path, '\n');
213 		if (newline != NULL)
214 			*newline = '\0';
215 
216 		if (pnset_add(pnsetp, path) == 0)
217 			goto fail;
218 	}
219 
220 	(void) pclose(fp);
221 	free(hgcmd);
222 	return (pnsetp);
223 fail:
224 	warn("cannot load hg manifest at %s", hgroot);
225 	if (fp != NULL)
226 		(void) pclose(fp);
227 	free(hgcmd);
228 	pnset_free(pnsetp);
229 	return (NULL);
230 }
231 
232 /*
233  * If necessary, change our active manifest to be appropriate for `path'.
234  */
235 static void
236 chdir_hg(const char *path)
237 {
238 	char hgpath[MAXPATHLEN];
239 	char basepath[MAXPATHLEN];
240 	char *slash;
241 
242 	(void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", path);
243 
244 	/*
245 	 * Change our active manifest if any one of the following is true:
246 	 *
247 	 *   1. No manifest is loaded.  Find the nearest hgroot to load from.
248 	 *
249 	 *   2. A manifest is loaded, but we've moved into a directory with
250 	 *	its own hgroot (e.g., usr/closed).  Load from its hgroot.
251 	 *
252 	 *   3. A manifest is loaded, but no longer applies (e.g., the manifest
253 	 *	under usr/closed is loaded, but we've moved to usr/src).
254 	 */
255 	if (hgdata.manifest == NULL ||
256 	    strcmp(hgpath, hgdata.hgpath) != 0 && access(hgpath, X_OK) == 0 ||
257 	    strncmp(path, hgdata.root, hgdata.rootlen - 1) != 0) {
258 		pnset_free(hgdata.manifest);
259 		hgdata.manifest = NULL;
260 
261 		(void) strlcpy(basepath, path, MAXPATHLEN);
262 
263 		/*
264 		 * Walk up the directory tree looking for .hg subdirectories.
265 		 */
266 		while (access(hgpath, X_OK) == -1) {
267 			slash = strrchr(basepath, '/');
268 			if (slash == NULL) {
269 				if (!hgdata.rootwarn) {
270 					warn("no hg root for \"%s\"\n", path);
271 					hgdata.rootwarn = B_TRUE;
272 				}
273 				return;
274 			}
275 			*slash = '\0';
276 			(void) snprintf(hgpath, MAXPATHLEN, "%s/.hg", basepath);
277 		}
278 
279 		/*
280 		 * We found a directory with an .hg subdirectory; record it
281 		 * and load its manifest.
282 		 */
283 		(void) strlcpy(hgdata.hgpath, hgpath, MAXPATHLEN);
284 		(void) strlcpy(hgdata.root, basepath, MAXPATHLEN);
285 		hgdata.manifest = load_manifest(hgdata.root);
286 
287 		/*
288 		 * The logic in check_hg() depends on hgdata.root having a
289 		 * single trailing slash, so only add it if it's missing.
290 		 */
291 		if (hgdata.root[strlen(hgdata.root) - 1] != '/')
292 			(void) strlcat(hgdata.root, "/", MAXPATHLEN);
293 		hgdata.rootlen = strlen(hgdata.root);
294 	}
295 }
296 
297 /*
298  * Check if a file is under Mercurial control by checking against the manifest.
299  */
300 /* ARGSUSED */
301 static int
302 check_hg(const char *path, const struct FTW *ftwp)
303 {
304 	/*
305 	 * The manifest paths are relative to the manifest root; skip past it.
306 	 */
307 	path += hgdata.rootlen;
308 
309 	return (hgdata.manifest != NULL && pnset_check(hgdata.manifest, path));
310 }
311 
312 /*
313  * Check if a file is under TeamWare control by checking for its corresponding
314  * SCCS "s-dot" file.
315  */
316 static int
317 check_tw(const char *path, const struct FTW *ftwp)
318 {
319 	char sccspath[MAXPATHLEN];
320 
321 	(void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
322 	    path, path + ftwp->base);
323 
324 	return (access(sccspath, F_OK) == 0);
325 }
326 
327 /*
328  * Using `exceptfile' and a built-in list of exceptions, build and return a
329  * pnset_t consisting of all of the pathnames globs which are allowed to be
330  * unreferenced in the source tree.
331  */
332 static pnset_t *
333 make_exset(const char *exceptfile)
334 {
335 	FILE		*fp;
336 	char		line[MAXPATHLEN];
337 	char		*newline;
338 	pnset_t		*pnsetp;
339 	unsigned int	i;
340 
341 	pnsetp = calloc(sizeof (pnset_t), 1);
342 	if (pnsetp == NULL)
343 		return (NULL);
344 
345 	/*
346 	 * Add any exceptions from the file.
347 	 */
348 	fp = fopen(exceptfile, "r");
349 	if (fp == NULL) {
350 		warn("cannot open exception file \"%s\"", exceptfile);
351 		goto fail;
352 	}
353 
354 	while (fgets(line, sizeof (line), fp) != NULL) {
355 		newline = strrchr(line, '\n');
356 		if (newline != NULL)
357 			*newline = '\0';
358 
359 		for (i = 0; isspace(line[i]); i++)
360 			;
361 
362 		if (line[i] == '#' || line[i] == '\0')
363 			continue;
364 
365 		if (pnset_add(pnsetp, line) == 0) {
366 			(void) fclose(fp);
367 			goto fail;
368 		}
369 	}
370 
371 	(void) fclose(fp);
372 	return (pnsetp);
373 fail:
374 	pnset_free(pnsetp);
375 	return (NULL);
376 }
377 
378 /*
379  * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
380  */
381 static int
382 checkpath(const char *path, const struct stat *statp, int type,
383     struct FTW *ftwp)
384 {
385 	switch (type) {
386 	case FTW_F:
387 		/*
388 		 * Skip if the file is referenced or in the exception list.
389 		 */
390 		if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
391 			return (0);
392 
393 		/*
394 		 * If requested, restrict ourselves to unreferenced files
395 		 * under SCM control.
396 		 */
397 		if (scm == NULL || scm->checkfunc(path, ftwp))
398 			(void) puts(path);
399 		return (0);
400 
401 	case FTW_D:
402 		/*
403 		 * Prune any directories in the exception list.
404 		 */
405 		if (pnset_check(exsetp, path)) {
406 			ftwp->quit = FTW_PRUNE;
407 			return (0);
408 		}
409 
410 		/*
411 		 * If necessary, advise the SCM logic of our new directory.
412 		 */
413 		if (scm != NULL && scm->chdirfunc != NULL)
414 			scm->chdirfunc(path);
415 
416 		return (0);
417 
418 	case FTW_DNR:
419 		warn("cannot read \"%s\"", path);
420 		return (0);
421 
422 	case FTW_NS:
423 		warn("cannot stat \"%s\"", path);
424 		return (0);
425 
426 	default:
427 		break;
428 	}
429 
430 	return (0);
431 }
432 
433 /*
434  * Add `path' to the pnset_t pointed to by `pnsetp'.
435  */
436 static int
437 pnset_add(pnset_t *pnsetp, const char *path)
438 {
439 	char **newpaths;
440 	unsigned int maxpaths;
441 
442 	if (pnsetp->npath == pnsetp->maxpaths) {
443 		maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
444 		newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
445 		if (newpaths == NULL)
446 			return (0);
447 		pnsetp->paths = newpaths;
448 		pnsetp->maxpaths = maxpaths;
449 	}
450 
451 	pnsetp->paths[pnsetp->npath] = strdup(path);
452 	if (pnsetp->paths[pnsetp->npath] == NULL)
453 		return (0);
454 
455 	pnsetp->npath++;
456 	return (1);
457 }
458 
459 /*
460  * Check `path' against the pnset_t pointed to by `pnsetp'.
461  */
462 static int
463 pnset_check(const pnset_t *pnsetp, const char *path)
464 {
465 	unsigned int i;
466 
467 	for (i = 0; i < pnsetp->npath; i++) {
468 		if (fnmatch(pnsetp->paths[i], path, 0) == 0)
469 			return (1);
470 	}
471 	return (0);
472 }
473 
474 /*
475  * Empty the pnset_t pointed to by `pnsetp'.
476  */
477 static void
478 pnset_empty(pnset_t *pnsetp)
479 {
480 	while (pnsetp->npath-- != 0)
481 		free(pnsetp->paths[pnsetp->npath]);
482 
483 	free(pnsetp->paths);
484 	pnsetp->maxpaths = 0;
485 }
486 
487 /*
488  * Free the pnset_t pointed to by `pnsetp'.
489  */
490 static void
491 pnset_free(pnset_t *pnsetp)
492 {
493 	if (pnsetp != NULL) {
494 		pnset_empty(pnsetp);
495 		free(pnsetp);
496 	}
497 }
498 
499 /* PRINTFLIKE1 */
500 static void
501 warn(const char *format, ...)
502 {
503 	va_list alist;
504 	char *errstr = strerror(errno);
505 
506 	if (errstr == NULL)
507 		errstr = "<unknown error>";
508 
509 	(void) fprintf(stderr, "%s: ", progname);
510 
511 	va_start(alist, format);
512 	(void) vfprintf(stderr, format, alist);
513 	va_end(alist);
514 
515 	if (strrchr(format, '\n') == NULL)
516 		(void) fprintf(stderr, ": %s\n", errstr);
517 }
518 
519 /* PRINTFLIKE1 */
520 static void
521 die(const char *format, ...)
522 {
523 	va_list alist;
524 	char *errstr = strerror(errno);
525 
526 	if (errstr == NULL)
527 		errstr = "<unknown error>";
528 
529 	(void) fprintf(stderr, "%s: fatal: ", progname);
530 
531 	va_start(alist, format);
532 	(void) vfprintf(stderr, format, alist);
533 	va_end(alist);
534 
535 	if (strrchr(format, '\n') == NULL)
536 		(void) fprintf(stderr, ": %s\n", errstr);
537 
538 	exit(EXIT_FAILURE);
539 }
540