1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 *
21 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
22 * Use is subject to license terms.
23 */
24
25 /*
26 * Copyright (c) 2018, Joyent, Inc.
27 */
28
29 /*
30 * Finds all unreferenced files in a source tree that do not match a list of
31 * permitted pathnames.
32 */
33
34 #include <ctype.h>
35 #include <errno.h>
36 #include <fnmatch.h>
37 #include <ftw.h>
38 #include <stdarg.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <time.h>
43 #include <unistd.h>
44 #include <sys/param.h>
45 #include <sys/stat.h>
46 #include <sys/types.h>
47
48 /*
49 * Pathname set: a simple datatype for storing pathname pattern globs and
50 * for checking whether a given pathname is matched by a pattern glob in
51 * the set.
52 */
53 typedef struct {
54 char **paths;
55 unsigned int npath;
56 unsigned int maxpaths;
57 } pnset_t;
58
59 /*
60 * Data associated with the current SCM manifest.
61 */
62 typedef struct scmdata {
63 pnset_t *manifest;
64 char metapath[MAXPATHLEN];
65 char root[MAXPATHLEN];
66 unsigned int rootlen;
67 boolean_t rootwarn;
68 } scmdata_t;
69
70 /*
71 * Hooks used to check if a given unreferenced file is known to an SCM
72 * (currently Git, Mercurial and TeamWare).
73 */
74 typedef int checkscm_func_t(const char *, const struct FTW *);
75 typedef void chdirscm_func_t(const char *);
76
77 typedef struct {
78 const char *name;
79 checkscm_func_t *checkfunc;
80 chdirscm_func_t *chdirfunc;
81 } scm_t;
82
83 static checkscm_func_t check_tw, check_scmdata;
84 static chdirscm_func_t chdir_hg, chdir_git;
85 static int pnset_add(pnset_t *, const char *);
86 static int pnset_check(const pnset_t *, const char *);
87 static void pnset_empty(pnset_t *);
88 static void pnset_free(pnset_t *);
89 static int checkpath(const char *, const struct stat *, int, struct FTW *);
90 static pnset_t *make_exset(const char *);
91 static void warn(const char *, ...);
92 static void die(const char *, ...);
93
94 static const scm_t scms[] = {
95 { "tw", check_tw, NULL },
96 { "teamware", check_tw, NULL },
97 { "hg", check_scmdata, chdir_hg },
98 { "mercurial", check_scmdata, chdir_hg },
99 { "git", check_scmdata, chdir_git },
100 { NULL, NULL, NULL }
101 };
102
103 static const scm_t *scm;
104 static scmdata_t scmdata;
105 static time_t tstamp; /* timestamp to compare files to */
106 static pnset_t *exsetp; /* pathname globs to ignore */
107 static const char *progname;
108
109 int
main(int argc,char * argv[])110 main(int argc, char *argv[])
111 {
112 int c;
113 char path[MAXPATHLEN];
114 char subtree[MAXPATHLEN] = "./";
115 char *tstampfile = ".build.tstamp";
116 struct stat tsstat;
117
118 progname = strrchr(argv[0], '/');
119 if (progname == NULL)
120 progname = argv[0];
121 else
122 progname++;
123
124 while ((c = getopt(argc, argv, "as:t:S:")) != EOF) {
125 switch (c) {
126 case 'a':
127 /* for compatibility; now the default */
128 break;
129
130 case 's':
131 (void) strlcat(subtree, optarg, MAXPATHLEN);
132 break;
133
134 case 't':
135 tstampfile = optarg;
136 break;
137
138 case 'S':
139 for (scm = scms; scm->name != NULL; scm++) {
140 if (strcmp(scm->name, optarg) == 0)
141 break;
142 }
143 if (scm->name == NULL)
144 die("unsupported SCM `%s'\n", optarg);
145 break;
146
147 default:
148 case '?':
149 goto usage;
150 }
151 }
152
153 argc -= optind;
154 argv += optind;
155
156 if (argc != 2) {
157 usage: (void) fprintf(stderr, "usage: %s [-s <subtree>] "
158 "[-t <tstampfile>] [-S hg|tw|git] <srcroot> <exceptfile>\n",
159 progname);
160 return (EXIT_FAILURE);
161 }
162
163 /*
164 * Interpret a relative timestamp path as relative to srcroot.
165 */
166 if (tstampfile[0] == '/')
167 (void) strlcpy(path, tstampfile, MAXPATHLEN);
168 else
169 (void) snprintf(path, MAXPATHLEN, "%s/%s", argv[0], tstampfile);
170
171 if (stat(path, &tsstat) == -1)
172 die("cannot stat timestamp file \"%s\"", path);
173 tstamp = tsstat.st_mtime;
174
175 /*
176 * Create the exception pathname set.
177 */
178 exsetp = make_exset(argv[1]);
179 if (exsetp == NULL)
180 die("cannot make exception pathname set\n");
181
182 /*
183 * Walk the specified subtree of the tree rooted at argv[0].
184 */
185 if (chdir(argv[0]) == -1)
186 die("cannot change directory to \"%s\"", argv[0]);
187
188 if (nftw(subtree, checkpath, 100, FTW_PHYS) != 0)
189 die("cannot walk tree rooted at \"%s\"\n", argv[0]);
190
191 pnset_empty(exsetp);
192 return (EXIT_SUCCESS);
193 }
194
195 /*
196 * Load and return a pnset for the manifest for the Mercurial repo at `hgroot'.
197 */
198 static pnset_t *
hg_manifest(const char * hgroot)199 hg_manifest(const char *hgroot)
200 {
201 FILE *fp = NULL;
202 char *hgcmd = NULL;
203 char *newline;
204 pnset_t *pnsetp;
205 char path[MAXPATHLEN];
206
207 pnsetp = calloc(1, sizeof (pnset_t));
208 if (pnsetp == NULL ||
209 asprintf(&hgcmd, "hg manifest -R %s", hgroot) == -1)
210 goto fail;
211
212 fp = popen(hgcmd, "r");
213 if (fp == NULL)
214 goto fail;
215
216 while (fgets(path, sizeof (path), fp) != NULL) {
217 newline = strrchr(path, '\n');
218 if (newline != NULL)
219 *newline = '\0';
220
221 if (pnset_add(pnsetp, path) == 0)
222 goto fail;
223 }
224
225 (void) pclose(fp);
226 free(hgcmd);
227 return (pnsetp);
228 fail:
229 warn("cannot load hg manifest at %s", hgroot);
230 if (fp != NULL)
231 (void) pclose(fp);
232 free(hgcmd);
233 pnset_free(pnsetp);
234 return (NULL);
235 }
236
237 /*
238 * Load and return a pnset for the manifest for the Git repo at `gitroot'.
239 */
240 static pnset_t *
git_manifest(const char * gitroot)241 git_manifest(const char *gitroot)
242 {
243 FILE *fp = NULL;
244 char *gitcmd = NULL;
245 char *newline;
246 pnset_t *pnsetp;
247 char path[MAXPATHLEN];
248
249 pnsetp = calloc(1, sizeof (pnset_t));
250 if (pnsetp == NULL ||
251 asprintf(&gitcmd, "git --git-dir=%s/.git ls-files", gitroot) == -1)
252 goto fail;
253
254 fp = popen(gitcmd, "r");
255 if (fp == NULL)
256 goto fail;
257
258 while (fgets(path, sizeof (path), fp) != NULL) {
259 newline = strrchr(path, '\n');
260 if (newline != NULL)
261 *newline = '\0';
262
263 if (pnset_add(pnsetp, path) == 0)
264 goto fail;
265 }
266
267 (void) pclose(fp);
268 free(gitcmd);
269 return (pnsetp);
270 fail:
271 warn("cannot load git manifest at %s", gitroot);
272 if (fp != NULL)
273 (void) pclose(fp);
274 free(gitcmd);
275 pnset_free(pnsetp);
276 return (NULL);
277 }
278
279 /*
280 * If necessary, change our active manifest to be appropriate for `path'.
281 */
282 static void
chdir_scmdata(const char * path,const char * meta,pnset_t * (* manifest_func)(const char * path))283 chdir_scmdata(const char *path, const char *meta,
284 pnset_t *(*manifest_func)(const char *path))
285 {
286 char scmpath[MAXPATHLEN];
287 char basepath[MAXPATHLEN];
288 char *slash;
289
290 (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", path, meta);
291
292 /*
293 * Change our active manifest if any one of the following is true:
294 *
295 * 1. No manifest is loaded. Find the nearest SCM root to load from.
296 *
297 * 2. A manifest is loaded, but we've moved into a directory with
298 * its own metadata directory (e.g., usr/closed). Load from its
299 * root.
300 *
301 * 3. A manifest is loaded, but no longer applies (e.g., the manifest
302 * under usr/closed is loaded, but we've moved to usr/src).
303 */
304 if (scmdata.manifest == NULL ||
305 (strcmp(scmpath, scmdata.metapath) != 0 &&
306 access(scmpath, X_OK) == 0) ||
307 strncmp(path, scmdata.root, scmdata.rootlen - 1) != 0) {
308 pnset_free(scmdata.manifest);
309 scmdata.manifest = NULL;
310
311 (void) strlcpy(basepath, path, MAXPATHLEN);
312
313 /*
314 * Walk up the directory tree looking for metadata
315 * subdirectories.
316 */
317 while (access(scmpath, X_OK) == -1) {
318 slash = strrchr(basepath, '/');
319 if (slash == NULL) {
320 if (!scmdata.rootwarn) {
321 warn("no metadata directory "
322 "for \"%s\"\n", path);
323 scmdata.rootwarn = B_TRUE;
324 }
325 return;
326 }
327 *slash = '\0';
328 (void) snprintf(scmpath, MAXPATHLEN, "%s/%s", basepath,
329 meta);
330 }
331
332 /*
333 * We found a directory with an SCM metadata directory; record
334 * it and load its manifest.
335 */
336 (void) strlcpy(scmdata.metapath, scmpath, MAXPATHLEN);
337 (void) strlcpy(scmdata.root, basepath, MAXPATHLEN);
338 scmdata.manifest = manifest_func(scmdata.root);
339
340 /*
341 * The logic in check_scmdata() depends on scmdata.root having
342 * a single trailing slash, so only add it if it's missing.
343 */
344 if (scmdata.root[strlen(scmdata.root) - 1] != '/')
345 (void) strlcat(scmdata.root, "/", MAXPATHLEN);
346 scmdata.rootlen = strlen(scmdata.root);
347 }
348 }
349
350 /*
351 * If necessary, change our active manifest to be appropriate for `path'.
352 */
353 static void
chdir_git(const char * path)354 chdir_git(const char *path)
355 {
356 chdir_scmdata(path, ".git", git_manifest);
357 }
358
359 static void
chdir_hg(const char * path)360 chdir_hg(const char *path)
361 {
362 chdir_scmdata(path, ".hg", hg_manifest);
363 }
364
365 /* ARGSUSED */
366 static int
check_scmdata(const char * path,const struct FTW * ftwp)367 check_scmdata(const char *path, const struct FTW *ftwp)
368 {
369 /*
370 * The manifest paths are relative to the manifest root; skip past it.
371 */
372 path += scmdata.rootlen;
373
374 return (scmdata.manifest != NULL && pnset_check(scmdata.manifest,
375 path));
376 }
377
378 /*
379 * Check if a file is under TeamWare control by checking for its corresponding
380 * SCCS "s-dot" file.
381 */
382 static int
check_tw(const char * path,const struct FTW * ftwp)383 check_tw(const char *path, const struct FTW *ftwp)
384 {
385 char sccspath[MAXPATHLEN];
386
387 (void) snprintf(sccspath, MAXPATHLEN, "%.*s/SCCS/s.%s", ftwp->base,
388 path, path + ftwp->base);
389
390 return (access(sccspath, F_OK) == 0);
391 }
392
393 /*
394 * Using `exceptfile' and a built-in list of exceptions, build and return a
395 * pnset_t consisting of all of the pathnames globs which are allowed to be
396 * unreferenced in the source tree.
397 */
398 static pnset_t *
make_exset(const char * exceptfile)399 make_exset(const char *exceptfile)
400 {
401 FILE *fp;
402 char line[MAXPATHLEN];
403 char *newline;
404 pnset_t *pnsetp;
405 unsigned int i;
406
407 pnsetp = calloc(1, sizeof (pnset_t));
408 if (pnsetp == NULL)
409 return (NULL);
410
411 /*
412 * Add any exceptions from the file.
413 */
414 fp = fopen(exceptfile, "r");
415 if (fp == NULL) {
416 warn("cannot open exception file \"%s\"", exceptfile);
417 goto fail;
418 }
419
420 while (fgets(line, sizeof (line), fp) != NULL) {
421 newline = strrchr(line, '\n');
422 if (newline != NULL)
423 *newline = '\0';
424
425 for (i = 0; isspace(line[i]); i++)
426 ;
427
428 if (line[i] == '#' || line[i] == '\0')
429 continue;
430
431 if (pnset_add(pnsetp, line) == 0) {
432 (void) fclose(fp);
433 goto fail;
434 }
435 }
436
437 (void) fclose(fp);
438 return (pnsetp);
439 fail:
440 pnset_free(pnsetp);
441 return (NULL);
442 }
443
444 /*
445 * FTW callback: print `path' if it's older than `tstamp' and not in `exsetp'.
446 */
447 static int
checkpath(const char * path,const struct stat * statp,int type,struct FTW * ftwp)448 checkpath(const char *path, const struct stat *statp, int type,
449 struct FTW *ftwp)
450 {
451 switch (type) {
452 case FTW_F:
453 /*
454 * Skip if the file is referenced or in the exception list.
455 */
456 if (statp->st_atime >= tstamp || pnset_check(exsetp, path))
457 return (0);
458
459 /*
460 * If requested, restrict ourselves to unreferenced files
461 * under SCM control.
462 */
463 if (scm == NULL || scm->checkfunc(path, ftwp))
464 (void) puts(path);
465 return (0);
466
467 case FTW_D:
468 /*
469 * Prune any directories in the exception list.
470 */
471 if (pnset_check(exsetp, path)) {
472 ftwp->quit = FTW_PRUNE;
473 return (0);
474 }
475
476 /*
477 * If necessary, advise the SCM logic of our new directory.
478 */
479 if (scm != NULL && scm->chdirfunc != NULL)
480 scm->chdirfunc(path);
481
482 return (0);
483
484 case FTW_DNR:
485 warn("cannot read \"%s\"", path);
486 return (0);
487
488 case FTW_NS:
489 warn("cannot stat \"%s\"", path);
490 return (0);
491
492 default:
493 break;
494 }
495
496 return (0);
497 }
498
499 /*
500 * Add `path' to the pnset_t pointed to by `pnsetp'.
501 */
502 static int
pnset_add(pnset_t * pnsetp,const char * path)503 pnset_add(pnset_t *pnsetp, const char *path)
504 {
505 char **newpaths;
506 unsigned int maxpaths;
507
508 if (pnsetp->npath == pnsetp->maxpaths) {
509 maxpaths = (pnsetp->maxpaths == 0) ? 512 : pnsetp->maxpaths * 2;
510 newpaths = realloc(pnsetp->paths, sizeof (char *) * maxpaths);
511 if (newpaths == NULL)
512 return (0);
513 pnsetp->paths = newpaths;
514 pnsetp->maxpaths = maxpaths;
515 }
516
517 pnsetp->paths[pnsetp->npath] = strdup(path);
518 if (pnsetp->paths[pnsetp->npath] == NULL)
519 return (0);
520
521 pnsetp->npath++;
522 return (1);
523 }
524
525 /*
526 * Check `path' against the pnset_t pointed to by `pnsetp'.
527 */
528 static int
pnset_check(const pnset_t * pnsetp,const char * path)529 pnset_check(const pnset_t *pnsetp, const char *path)
530 {
531 unsigned int i;
532
533 for (i = 0; i < pnsetp->npath; i++) {
534 if (fnmatch(pnsetp->paths[i], path, 0) == 0)
535 return (1);
536 }
537 return (0);
538 }
539
540 /*
541 * Empty the pnset_t pointed to by `pnsetp'.
542 */
543 static void
pnset_empty(pnset_t * pnsetp)544 pnset_empty(pnset_t *pnsetp)
545 {
546 while (pnsetp->npath-- != 0)
547 free(pnsetp->paths[pnsetp->npath]);
548
549 free(pnsetp->paths);
550 pnsetp->maxpaths = 0;
551 }
552
553 /*
554 * Free the pnset_t pointed to by `pnsetp'.
555 */
556 static void
pnset_free(pnset_t * pnsetp)557 pnset_free(pnset_t *pnsetp)
558 {
559 if (pnsetp != NULL) {
560 pnset_empty(pnsetp);
561 free(pnsetp);
562 }
563 }
564
565 /* PRINTFLIKE1 */
566 static void
warn(const char * format,...)567 warn(const char *format, ...)
568 {
569 va_list alist;
570 char *errstr = strerror(errno);
571
572 if (errstr == NULL)
573 errstr = "<unknown error>";
574
575 (void) fprintf(stderr, "%s: ", progname);
576
577 va_start(alist, format);
578 (void) vfprintf(stderr, format, alist);
579 va_end(alist);
580
581 if (strrchr(format, '\n') == NULL)
582 (void) fprintf(stderr, ": %s\n", errstr);
583 }
584
585 /* PRINTFLIKE1 */
586 static void
die(const char * format,...)587 die(const char *format, ...)
588 {
589 va_list alist;
590 char *errstr = strerror(errno);
591
592 if (errstr == NULL)
593 errstr = "<unknown error>";
594
595 (void) fprintf(stderr, "%s: fatal: ", progname);
596
597 va_start(alist, format);
598 (void) vfprintf(stderr, format, alist);
599 va_end(alist);
600
601 if (strrchr(format, '\n') == NULL)
602 (void) fprintf(stderr, ": %s\n", errstr);
603
604 exit(EXIT_FAILURE);
605 }
606