1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
22 * Use is subject to license terms.
23 */
24
25 /*
26 * Finds all unreferenced files in a source tree that do not match a list of
27 * permitted pathnames.
28 */
29
30 #include <ctype.h>
31 #include <errno.h>
32 #include <fnmatch.h>
33 #include <ftw.h>
34 #include <stdarg.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <time.h>
39 #include <unistd.h>
40 #include <sys/param.h>
41 #include <sys/stat.h>
42 #include <sys/types.h>
43
44 /*
45 * Pathname set: a simple datatype for storing pathname pattern globs and
46 * for checking whether a given pathname is matched by a pattern glob in
47 * the set.
48 */
49 typedef struct {
50 char **paths;
51 unsigned int npath;
52 unsigned int maxpaths;
53 } pnset_t;
54
55 /*
56 * Data associated with the current SCM manifest.
57 */
58 typedef struct scmdata {
59 pnset_t *manifest;
60 char metapath[MAXPATHLEN];
61 char root[MAXPATHLEN];
62 unsigned int rootlen;
63 boolean_t rootwarn;
64 } scmdata_t;
65
66 /*
67 * Hooks used to check if a given unreferenced file is known to an SCM
68 * (currently Git, Mercurial and TeamWare).
69 */
70 typedef int checkscm_func_t(const char *, const struct FTW *);
71 typedef void chdirscm_func_t(const char *);
72
73 typedef struct {
74 const char *name;
75 checkscm_func_t *checkfunc;
76 chdirscm_func_t *chdirfunc;
77 } scm_t;
78
79 static checkscm_func_t check_tw, check_scmdata;
80 static chdirscm_func_t chdir_hg, chdir_git;
81 static int pnset_add(pnset_t *, const char *);
82 static int pnset_check(const pnset_t *, const char *);
83 static void pnset_empty(pnset_t *);
84 static void pnset_free(pnset_t *);
85 static int checkpath(const char *, const struct stat *, int, struct FTW *);
86 static pnset_t *make_exset(const char *);
87 static void warn(const char *, ...);
88 static void die(const char *, ...);
89
90 static const scm_t scms[] = {
91 { "tw", check_tw, NULL },
92 { "teamware", check_tw, NULL },
93 { "hg", check_scmdata, chdir_hg },
94 { "mercurial", check_scmdata, chdir_hg },
95 { "git", check_scmdata, chdir_git },
96 { NULL, NULL, NULL }
97 };
98
99 static const scm_t *scm;
100 static scmdata_t scmdata;
101 static time_t tstamp; /* timestamp to compare files to */
102 static pnset_t *exsetp; /* pathname globs to ignore */
103 static const char *progname;
104
105 int
main(int argc,char * argv[])106 main(int argc, char *argv[])
107 {
108 int c;
109 char path[MAXPATHLEN];
110 char subtree[MAXPATHLEN] = "./";
111 char *tstampfile = ".build.tstamp";
112 struct stat tsstat;
113
114 progname = strrchr(argv[0], '/');
115 if (progname == NULL)
116 progname = argv[0];
117 else
118 progname++;
119
120 while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
121 switch (c) {
122 case 'a':
123 /* for compatibility; now the default */
124 break;
125
126 case 's':
127 (void) strlcat(subtree, optarg, MAXPATHLEN);
128 break;
129
130 case 't':
131 tstampfile = optarg;
132 break;
133
134 case 'S':
135 for (scm = scms; scm->name != NULL; scm++) {
136 if (strcmp(scm->name, optarg) == 0)
137 break;
138 }
139 if (scm->name == NULL)
140 die("unsupported SCM `%s'\n", optarg);
141 break;
142
143 default:
144 case '?':
145 goto usage;
146 }
147 }
148
149 argc -= optind;
150 argv += optind;
151
152 if (argc != 2) {
153 usage: (void) fprintf(stderr, "usage: %s [-s <subtree>] "
154 "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
155 progname);
156 return (EXIT_FAILURE);
157 }
158
159 /*
160 * Interpret a relative timestamp path as relative to srcroot.
161 */
162 if (tstampfile[0] == '/')
163 (void) strlcpy(path, tstampfile, MAXPATHLEN);
164 else
165 (void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
166
167 if (stat(path, &tsstat) == -1)
168 die("cannot stat timestamp file \"%s\"", path);
169 tstamp = tsstat.st_mtime;
170
171 /*
172 * Create the exception pathname set.
173 */
174 exsetp = make_exset(argv[1]);
175 if (exsetp == NULL)
176 die("cannot make exception pathname set\n");
177
178 /*
179 * Walk the specified subtree of the tree rooted at argv[0].
180 */
181 if (chdir(argv[0]) == -1)
182 die("cannot change directory to \"%s\"", argv[0]);
183
184 if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
185 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
186
187 pnset_empty(exsetp);
188 return (EXIT_SUCCESS);
189 }
190
191 /*
192 * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
193 */
194 static pnset_t *
hg_manifest(const char * hgroot)195 hg_manifest(const char *hgroot)
196 {
197 FILE *fp = NULL;
198 char *hgcmd = NULL;
199 char *newline;
200 pnset_t *pnsetp;
201 char path[MAXPATHLEN];
202
203 pnsetp = calloc(sizeof (pnset_t), 1);
204 if (pnsetp == NULL ||
205 asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
206 goto fail;
207
208 fp = popen(hgcmd, "r");
209 if (fp == NULL)
210 goto fail;
211
212 while (fgets(path, sizeof (path), fp) != NULL) {
213 newline = strrchr(path, '\n');
214 if (newline != NULL)
215 *newline = '\0';
216
217 if (pnset_add(pnsetp, path) == 0)
218 goto fail;
219 }
220
221 (void) pclose(fp);
222 free(hgcmd);
223 return (pnsetp);
224 fail:
225 warn("cannot load hg manifest at %s", hgroot);
226 if (fp != NULL)
227 (void) pclose(fp);
228 free(hgcmd);
229 pnset_free(pnsetp);
230 return (NULL);
231 }
232
233 /*
234 * Load and return a pnset for the manifest for the Git repo at `gitroot'.
235 */
236 static pnset_t *
git_manifest(const char * gitroot)237 git_manifest(const char *gitroot)
238 {
239 FILE *fp = NULL;
240 char *gitcmd = NULL;
241 char *newline;
242 pnset_t *pnsetp;
243 char path[MAXPATHLEN];
244
245 pnsetp = calloc(sizeof (pnset_t), 1);
246 if (pnsetp == NULL ||
247 asprintf(&gitcmd, "git --git-dir=%s/.git ls-files", gitroot) == -1)
248 goto fail;
249
250 fp = popen(gitcmd, "r");
251 if (fp == NULL)
252 goto fail;
253
254 while (fgets(path, sizeof (path), fp) != NULL) {
255 newline = strrchr(path, '\n');
256 if (newline != NULL)
257 *newline = '\0';
258
259 if (pnset_add(pnsetp, path) == 0)
260 goto fail;
261 }
262
263 (void) pclose(fp);
264 free(gitcmd);
265 return (pnsetp);
266 fail:
267 warn("cannot load git manifest at %s", gitroot);
268 if (fp != NULL)
269 (void) pclose(fp);
270 free(gitcmd);
271 pnset_free(pnsetp);
272 return (NULL);
273 }
274
275 /*
276 * If necessary, change our active manifest to be appropriate for `path'.
277 */
278 static void
chdir_scmdata(const char * path,const char * meta,pnset_t * (* manifest_func)(const char * path))279 chdir_scmdata(const char *path, const char *meta,
280 pnset_t *(*manifest_func)(const char *path))
281 {
282 char scmpath[MAXPATHLEN];
283 char basepath[MAXPATHLEN];
284 char *slash;
285
286 (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", path, meta);
287
288 /*
289 * Change our active manifest if any one of the following is true:
290 *
291 * 1. No manifest is loaded. Find the nearest SCM root to load from.
292 *
293 * 2. A manifest is loaded, but we've moved into a directory with
294 * its own metadata directory (e.g., usr/closed). Load from its
295 * root.
296 *
297 * 3. A manifest is loaded, but no longer applies (e.g., the manifest
298 * under usr/closed is loaded, but we've moved to usr/src).
299 */
300 if (scmdata.manifest == NULL ||
301 (strcmp(scmpath, scmdata.metapath) != 0 &&
302 access(scmpath, X_OK) == 0) ||
303 strncmp(path, scmdata.root, scmdata.rootlen - 1) != 0) {
304 pnset_free(scmdata.manifest);
305 scmdata.manifest = NULL;
306
307 (void) strlcpy(basepath, path, MAXPATHLEN);
308
309 /*
310 * Walk up the directory tree looking for metadata
311 * subdirectories.
312 */
313 while (access(scmpath, X_OK) == -1) {
314 slash = strrchr(basepath, '/');
315 if (slash == NULL) {
316 if (!scmdata.rootwarn) {
317 warn("no metadata directory "
318 "for \"%s\"\n", path);
319 scmdata.rootwarn = B_TRUE;
320 }
321 return;
322 }
323 *slash = '\0';
324 (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", basepath,
325 meta);
326 }
327
328 /*
329 * We found a directory with an SCM metadata directory; record
330 * it and load its manifest.
331 */
332 (void) strlcpy(scmdata.metapath, scmpath, MAXPATHLEN);
333 (void) strlcpy(scmdata.root, basepath, MAXPATHLEN);
334 scmdata.manifest = manifest_func(scmdata.root);
335
336 /*
337 * The logic in check_scmdata() depends on scmdata.root having
338 * a single trailing slash, so only add it if it's missing.
339 */
340 if (scmdata.root[strlen(scmdata.root) - 1] != '/')
341 (void) strlcat(scmdata.root, "/", MAXPATHLEN);
342 scmdata.rootlen = strlen(scmdata.root);
343 }
344 }
345
346 /*
347 * If necessary, change our active manifest to be appropriate for `path'.
348 */
349 static void
chdir_git(const char * path)350 chdir_git(const char *path)
351 {
352 chdir_scmdata(path, ".git", git_manifest);
353 }
354
355 static void
chdir_hg(const char * path)356 chdir_hg(const char *path)
357 {
358 chdir_scmdata(path, ".hg", hg_manifest);
359 }
360
361 /* ARGSUSED */
362 static int
check_scmdata(const char * path,const struct FTW * ftwp)363 check_scmdata(const char *path, const struct FTW *ftwp)
364 {
365 /*
366 * The manifest paths are relative to the manifest root; skip past it.
367 */
368 path += scmdata.rootlen;
369
370 return (scmdata.manifest != NULL && pnset_check(scmdata.manifest,
371 path));
372 }
373
374 /*
375 * Check if a file is under TeamWare control by checking for its corresponding
376 * SCCS "s-dot" file.
377 */
378 static int
check_tw(const char * path,const struct FTW * ftwp)379 check_tw(const char *path, const struct FTW *ftwp)
380 {
381 char sccspath[MAXPATHLEN];
382
383 (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
384 path, path + ftwp->base);
385
386 return (access(sccspath, F_OK) == 0);
387 }
388
389 /*
390 * Using `exceptfile' and a built-in list of exceptions, build and return a
391 * pnset_t consisting of all of the pathnames globs which are allowed to be
392 * unreferenced in the source tree.
393 */
394 static pnset_t *
make_exset(const char * exceptfile)395 make_exset(const char *exceptfile)
396 {
397 FILE *fp;
398 char line[MAXPATHLEN];
399 char *newline;
400 pnset_t *pnsetp;
401 unsigned int i;
402
403 pnsetp = calloc(sizeof (pnset_t), 1);
404 if (pnsetp == NULL)
405 return (NULL);
406
407 /*
408 * Add any exceptions from the file.
409 */
410 fp = fopen(exceptfile, "r");
411 if (fp == NULL) {
412 warn("cannot open exception file \"%s\"", exceptfile);
413 goto fail;
414 }
415
416 while (fgets(line, sizeof (line), fp) != NULL) {
417 newline = strrchr(line, '\n');
418 if (newline != NULL)
419 *newline = '\0';
420
421 for (i = 0; isspace(line[i]); i++)
422 ;
423
424 if (line[i] == '#' || line[i] == '\0')
425 continue;
426
427 if (pnset_add(pnsetp, line) == 0) {
428 (void) fclose(fp);
429 goto fail;
430 }
431 }
432
433 (void) fclose(fp);
434 return (pnsetp);
435 fail:
436 pnset_free(pnsetp);
437 return (NULL);
438 }
439
440 /*
441 * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
442 */
443 static int
checkpath(const char * path,const struct stat * statp,int type,struct FTW * ftwp)444 checkpath(const char *path, const struct stat *statp, int type,
445 struct FTW *ftwp)
446 {
447 switch (type) {
448 case FTW_F:
449 /*
450 * Skip if the file is referenced or in the exception list.
451 */
452 if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
453 return (0);
454
455 /*
456 * If requested, restrict ourselves to unreferenced files
457 * under SCM control.
458 */
459 if (scm == NULL || scm->checkfunc(path, ftwp))
460 (void) puts(path);
461 return (0);
462
463 case FTW_D:
464 /*
465 * Prune any directories in the exception list.
466 */
467 if (pnset_check(exsetp, path)) {
468 ftwp->quit = FTW_PRUNE;
469 return (0);
470 }
471
472 /*
473 * If necessary, advise the SCM logic of our new directory.
474 */
475 if (scm != NULL && scm->chdirfunc != NULL)
476 scm->chdirfunc(path);
477
478 return (0);
479
480 case FTW_DNR:
481 warn("cannot read \"%s\"", path);
482 return (0);
483
484 case FTW_NS:
485 warn("cannot stat \"%s\"", path);
486 return (0);
487
488 default:
489 break;
490 }
491
492 return (0);
493 }
494
495 /*
496 * Add `path' to the pnset_t pointed to by `pnsetp'.
497 */
498 static int
pnset_add(pnset_t * pnsetp,const char * path)499 pnset_add(pnset_t *pnsetp, const char *path)
500 {
501 char **newpaths;
502 unsigned int maxpaths;
503
504 if (pnsetp->npath == pnsetp->maxpaths) {
505 maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
506 newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
507 if (newpaths == NULL)
508 return (0);
509 pnsetp->paths = newpaths;
510 pnsetp->maxpaths = maxpaths;
511 }
512
513 pnsetp->paths[pnsetp->npath] = strdup(path);
514 if (pnsetp->paths[pnsetp->npath] == NULL)
515 return (0);
516
517 pnsetp->npath++;
518 return (1);
519 }
520
521 /*
522 * Check `path' against the pnset_t pointed to by `pnsetp'.
523 */
524 static int
pnset_check(const pnset_t * pnsetp,const char * path)525 pnset_check(const pnset_t *pnsetp, const char *path)
526 {
527 unsigned int i;
528
529 for (i = 0; i < pnsetp->npath; i++) {
530 if (fnmatch(pnsetp->paths[i], path, 0) == 0)
531 return (1);
532 }
533 return (0);
534 }
535
536 /*
537 * Empty the pnset_t pointed to by `pnsetp'.
538 */
539 static void
pnset_empty(pnset_t * pnsetp)540 pnset_empty(pnset_t *pnsetp)
541 {
542 while (pnsetp->npath-- != 0)
543 free(pnsetp->paths[pnsetp->npath]);
544
545 free(pnsetp->paths);
546 pnsetp->maxpaths = 0;
547 }
548
549 /*
550 * Free the pnset_t pointed to by `pnsetp'.
551 */
552 static void
pnset_free(pnset_t * pnsetp)553 pnset_free(pnset_t *pnsetp)
554 {
555 if (pnsetp != NULL) {
556 pnset_empty(pnsetp);
557 free(pnsetp);
558 }
559 }
560
561 /* PRINTFLIKE1 */
562 static void
warn(const char * format,...)563 warn(const char *format, ...)
564 {
565 va_list alist;
566 char *errstr = strerror(errno);
567
568 if (errstr == NULL)
569 errstr = "<unknown error>";
570
571 (void) fprintf(stderr, "%s: ", progname);
572
573 va_start(alist, format);
574 (void) vfprintf(stderr, format, alist);
575 va_end(alist);
576
577 if (strrchr(format, '\n') == NULL)
578 (void) fprintf(stderr, ": %s\n", errstr);
579 }
580
581 /* PRINTFLIKE1 */
582 static void
die(const char * format,...)583 die(const char *format, ...)
584 {
585 va_list alist;
586 char *errstr = strerror(errno);
587
588 if (errstr == NULL)
589 errstr = "<unknown error>";
590
591 (void) fprintf(stderr, "%s: fatal: ", progname);
592
593 va_start(alist, format);
594 (void) vfprintf(stderr, format, alist);
595 va_end(alist);
596
597 if (strrchr(format, '\n') == NULL)
598 (void) fprintf(stderr, ": %s\n", errstr);
599
600 exit(EXIT_FAILURE);
601 }
602