xref: /titanic_52/usr/src/cmd/bart/create.c (revision 99389cdeb9c9d0c12fa16c043aa9d1c384f05a05)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  /*
22   * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23   */
24  
25  #include <signal.h>
26  #include <unistd.h>
27  #include <sys/acl.h>
28  #include <sys/statvfs.h>
29  #include <sys/wait.h>
30  #include "bart.h"
31  #include <aclutils.h>
32  
33  static int	sanitize_reloc_root(char *root, size_t bufsize);
34  static int	create_manifest_filelist(char **argv, char *reloc_root);
35  static int	create_manifest_rule(char *reloc_root, FILE *rule_fp);
36  static void	output_manifest(void);
37  static int	eval_file(const char *fname, const struct stat64 *statb,
38  	struct FTW *ftwx);
39  static char	*sanitized_fname(const char *, boolean_t);
40  static char	*get_acl_string(const char *fname, const struct stat64 *statb,
41      int *err_code);
42  static int	generate_hash(int fdin, char *hash_str);
43  static int	read_filelist(char *reloc_root, char **argv, char *buf,
44      size_t bufsize);
45  static int	walker(const char *name, const struct stat64 *sp,
46      int type, struct FTW *ftwx);
47  
48  /*
49   * The following globals are necessary due to the "walker" function
50   * provided by nftw().  Since there is no way to pass them through to the
51   * walker function, they must be global.
52   */
53  static int		compute_chksum = 1, eval_err = 0;
54  static struct rule	*subtree_root;
55  static char		reloc_root[PATH_MAX];
56  static struct statvfs64	parent_vfs;
57  
58  int
59  bart_create(int argc, char **argv)
60  {
61  	boolean_t	filelist_input;
62  	int		ret, c, output_pipe[2];
63  	FILE 		*rules_fd = NULL;
64  	pid_t		pid;
65  
66  	filelist_input = B_FALSE;
67  	reloc_root[0] = '\0';
68  
69  	while ((c = getopt(argc, argv, "Inr:R:")) != EOF) {
70  		switch (c) {
71  		case 'I':
72  			if (rules_fd != NULL) {
73  				(void) fprintf(stderr, "%s", INPUT_ERR);
74  				usage();
75  			}
76  			filelist_input = B_TRUE;
77  			break;
78  
79  		case 'n':
80  			compute_chksum = 0;
81  			break;
82  
83  		case 'r':
84  			if (strcmp(optarg, "-") == 0)
85  				rules_fd = stdin;
86  			else
87  				rules_fd = fopen(optarg, "r");
88  			if (rules_fd == NULL) {
89  				perror(optarg);
90  				usage();
91  			}
92  			break;
93  
94  		case 'R':
95  			(void) strlcpy(reloc_root, optarg, sizeof (reloc_root));
96  			ret = sanitize_reloc_root(reloc_root,
97  			    sizeof (reloc_root));
98  			if (ret == 0)
99  				usage();
100  			break;
101  
102  		case '?':
103  		default :
104  			usage();
105  		}
106  	}
107  	argv += optind;
108  
109  	if (pipe(output_pipe) < 0) {
110  		perror("");
111  		exit(FATAL_EXIT);
112  	}
113  
114  	pid = fork();
115  	if (pid < 0) {
116  		perror(NULL);
117  		exit(FATAL_EXIT);
118  	}
119  
120  	/*
121  	 * Break the creation of a manifest into two parts: the parent process
122  	 * generated the data whereas the child process sorts the data.
123  	 *
124  	 * The processes communicate through the pipe.
125  	 */
126  	if (pid > 0) {
127  		/*
128  		 * Redirect the stdout of this process so it goes into
129  		 * output_pipe[0].  The output of this process will be read
130  		 * by the child, which will sort the output.
131  		 */
132  		if (dup2(output_pipe[0], STDOUT_FILENO) != STDOUT_FILENO) {
133  			perror(NULL);
134  			exit(FATAL_EXIT);
135  		}
136  		(void) close(output_pipe[0]);
137  		(void) close(output_pipe[1]);
138  
139  		if (filelist_input == B_TRUE) {
140  			ret = create_manifest_filelist(argv, reloc_root);
141  		} else {
142  			ret = create_manifest_rule(reloc_root, rules_fd);
143  		}
144  
145  		/* Close stdout so the sort in the child proc will complete */
146  		(void) fclose(stdout);
147  	} else {
148  		/*
149  		 * Redirect the stdin of this process so its read in from
150  		 * the pipe, which is the parent process in this case.
151  		 */
152  		if (dup2(output_pipe[1], STDIN_FILENO) != STDIN_FILENO) {
153  			perror(NULL);
154  			exit(FATAL_EXIT);
155  		}
156  		(void) close(output_pipe[0]);
157  
158  		output_manifest();
159  	}
160  
161  	/* Wait for the child proc (the sort) to complete */
162  	(void) wait(0);
163  
164  	return (ret);
165  }
166  
167  /*
168   * Handle the -R option and sets 'root' to be the absolute path of the
169   * relocatable root.  This is useful when the user specifies '-R ../../foo'.
170   *
171   * Return code is whether or not the location spec'd by the -R flag is a
172   * directory or not.
173   */
174  static int
175  sanitize_reloc_root(char *root, size_t bufsize)
176  {
177  	char		pwd[PATH_MAX];
178  
179  	/*
180  	 * First, save the current directory and go to the location
181  	 * specified with the -R option.
182  	 */
183  	(void) getcwd(pwd, sizeof (pwd));
184  	if (chdir(root) < 0) {
185  		/* Failed to change directory, something is wrong.... */
186  		perror(root);
187  		return (0);
188  	}
189  
190  	/*
191  	 * Save the absolute path of the relocatable root directory.
192  	 */
193  	(void) getcwd(root, bufsize);
194  
195  	/*
196  	 * Now, go back to where we started, necessary for picking up a rules
197  	 * file.
198  	 */
199  	if (chdir(pwd) < 0) {
200  		/* Failed to change directory, something is wrong.... */
201  		perror(root);
202  		return (0);
203  	}
204  
205  	/*
206  	 * Make sure the path returned does not have a trailing /. This
207  	 * can only happen when the entire pathname is "/".
208  	 */
209  	if (strcmp(root, "/") == 0)
210  		root[0] = '\0';
211  
212  	/*
213  	 * Since the earlier chdir() succeeded, return success.
214  	 */
215  	return (1);
216  }
217  
218  /*
219   * This is the worker bee which creates the manifest based upon the command
220   * line options supplied by the user.
221   *
222   * NOTE: create_manifest() eventually outputs data to a pipe, which is read in
223   * by the child process.  The child process is running output_manifest(), which
224   * is responsible for generating sorted output.
225   */
226  static int
227  create_manifest_rule(char *reloc_root, FILE *rule_fp)
228  {
229  	struct rule	*root;
230  	int		ret_status = EXIT;
231  	uint_t		flags;
232  
233  	if (compute_chksum)
234  		flags = ATTR_CONTENTS;
235  	else
236  		flags = 0;
237  	ret_status = read_rules(rule_fp, reloc_root, flags, 1);
238  
239  	/* Loop through every single subtree */
240  	for (root = get_first_subtree(); root != NULL;
241  	    root = get_next_subtree(root)) {
242  
243  		/*
244  		 * Check to see if this subtree should have contents
245  		 * checking turned on or off.
246  		 *
247  		 * NOTE: The 'compute_chksum' and 'parent_vfs'
248  		 * are a necessary hack: the variables are used in
249  		 * walker(), both directly and indirectly.  Since
250  		 * the parameters to walker() are defined by nftw(),
251  		 * the globals are really a backdoor mechanism.
252  		 */
253  		ret_status = statvfs64(root->subtree, &parent_vfs);
254  		if (ret_status < 0) {
255  			perror(root->subtree);
256  			continue;
257  		}
258  
259  		/*
260  		 * Walk the subtree and invoke the callback function walker()
261  		 * Use FTW_ANYERR to get FTW_NS and FTW_DNR entries *and*
262  		 * to continue past those errors.
263  		 */
264  		subtree_root = root;
265  		(void) nftw64(root->subtree, &walker, 20, FTW_PHYS|FTW_ANYERR);
266  
267  		/*
268  		 * Ugly but necessary:
269  		 *
270  		 * walker() must return 0, or the tree walk will stop,
271  		 * so warning flags must be set through a global.
272  		 */
273  		if (eval_err == WARNING_EXIT)
274  			ret_status = WARNING_EXIT;
275  
276  	}
277  	return (ret_status);
278  }
279  
280  static int
281  create_manifest_filelist(char **argv, char *reloc_root)
282  {
283  	int	ret_status = EXIT;
284  	char	input_fname[PATH_MAX];
285  
286  	while (read_filelist(reloc_root, argv,
287  	    input_fname, sizeof (input_fname)) != -1) {
288  
289  		struct stat64	stat_buf;
290  		int		ret;
291  
292  		ret = lstat64(input_fname, &stat_buf);
293  		if (ret < 0) {
294  			ret_status = WARNING_EXIT;
295  			perror(input_fname);
296  		} else {
297  			ret = eval_file(input_fname, &stat_buf, NULL);
298  
299  			if (ret == WARNING_EXIT)
300  				ret_status = WARNING_EXIT;
301  		}
302  	}
303  
304  	return (ret_status);
305  }
306  
307  /*
308   * output_manifest() the child process.  It reads in the output from
309   * create_manifest() and sorts it.
310   */
311  static void
312  output_manifest(void)
313  {
314  	char	*env[] = {"LC_CTYPE=C", "LC_COLLATE=C", "LC_NUMERIC=C", NULL};
315  	time_t		time_val;
316  	struct tm	*tm;
317  	char		time_buf[1024];
318  
319  	(void) printf("%s", MANIFEST_VER);
320  	time_val = time((time_t)0);
321  	tm = localtime(&time_val);
322  	(void) strftime(time_buf, sizeof (time_buf), "%A, %B %d, %Y (%T)", tm);
323  	(void) printf("! %s\n", time_buf);
324  	(void) printf("%s", FORMAT_STR);
325  	(void) fflush(stdout);
326  	/*
327  	 * Simply run sort and read from the the current stdin, which is really
328  	 * the output of create_manifest().
329  	 * Also, make sure the output is unique, since a given file may be
330  	 * included by several stanzas.
331  	 */
332  	if (execle("/usr/bin/sort", "sort", "-u", NULL, env) < 0) {
333  		perror("");
334  		exit(FATAL_EXIT);
335  	}
336  
337  	/*NOTREACHED*/
338  }
339  
340  /*
341   * Callback function for nftw()
342   */
343  static int
344  walker(const char *name, const struct stat64 *sp, int type, struct FTW *ftwx)
345  {
346  	int			ret;
347  	struct statvfs64	path_vfs;
348  	boolean_t		dir_flag = B_FALSE;
349  	struct rule		*rule;
350  
351  	switch (type) {
352  	case FTW_F:	/* file 		*/
353  		rule = check_rules(name, 'F');
354  		if (rule != NULL) {
355  			if (rule->attr_list & ATTR_CONTENTS)
356  				compute_chksum = 1;
357  			else
358  				compute_chksum = 0;
359  		}
360  		break;
361  	case FTW_SL:	/* symbolic link, FTW_PHYS	*/
362  	case FTW_SLN:	/* symbolic link, ~FTW_PHYS	*/
363  		break;
364  	case FTW_DP:	/* end of directory, FTW_DEPTH	*/
365  	case FTW_D:	/* enter directory, ~FTW_DEPTH	*/
366  		dir_flag = B_TRUE;
367  		ret = statvfs64(name, &path_vfs);
368  		if (ret < 0)
369  			eval_err = WARNING_EXIT;
370  		break;
371  	case FTW_NS:	/* unstatable file	*/
372  		(void) fprintf(stderr, UNKNOWN_FILE, name);
373  		eval_err = WARNING_EXIT;
374  		return (0);
375  	case FTW_DNR:	/* unreadable directory	*/
376  		(void) fprintf(stderr, CANTLIST_DIR, name);
377  		eval_err = WARNING_EXIT;
378  		return (0);
379  	default:
380  		(void) fprintf(stderr, INTERNAL_ERR, name);
381  		eval_err = WARNING_EXIT;
382  		return (0);
383  	}
384  
385  	/* This is the function which really processes the file */
386  	ret = eval_file(name, sp, ftwx);
387  
388  	/*
389  	 * Since the parameters to walker() are constrained by nftw(),
390  	 * need to use a global to reflect a WARNING.  Sigh.
391  	 */
392  	if (ret == WARNING_EXIT)
393  		eval_err = WARNING_EXIT;
394  
395  	/*
396  	 * This is a case of a directory which crosses into a mounted
397  	 * filesystem of a different type, e.g., UFS -> NFS.
398  	 * BART should not walk the new filesystem (by specification), so
399  	 * set this consolidation-private flag so the rest of the subtree
400  	 * under this directory is not waled.
401  	 */
402  	if (dir_flag &&
403  	    (strcmp(parent_vfs.f_basetype, path_vfs.f_basetype) != 0))
404  		ftwx->quit = FTW_PRUNE;
405  
406  	return (0);
407  }
408  
409  /*
410   * This file does the per-file evaluation and is run to generate every entry
411   * in the manifest.
412   *
413   * All output is written to a pipe which is read by the child process,
414   * which is running output_manifest().
415   */
416  static int
417  eval_file(const char *fname, const struct stat64 *statb, struct FTW *ftwx)
418  {
419  	int	fd, ret, err_code, i, result;
420  	char	last_field[PATH_MAX], ftype, *acl_str;
421  	char	*quoted_name;
422  
423  	err_code = EXIT;
424  
425  	switch (statb->st_mode & S_IFMT) {
426  	/* Regular file */
427  	case S_IFREG: ftype = 'F'; break;
428  
429  	/* Directory */
430  	case S_IFDIR: ftype = 'D'; break;
431  
432  	/* Block Device */
433  	case S_IFBLK: ftype = 'B'; break;
434  
435  	/* Character Device */
436  	case S_IFCHR: ftype = 'C'; break;
437  
438  	/* Named Pipe */
439  	case S_IFIFO: ftype = 'P'; break;
440  
441  	/* Socket */
442  	case S_IFSOCK: ftype = 'S'; break;
443  
444  	/* Door */
445  	case S_IFDOOR: ftype = 'O'; break;
446  
447  	/* Symbolic link */
448  	case S_IFLNK: ftype = 'L'; break;
449  
450  	default: ftype = '-'; break;
451  	}
452  
453  	/* First, make sure this file should be cataloged */
454  
455  	if ((subtree_root != NULL) &&
456  	    ((result = exclude_fname(fname, ftype, subtree_root)) !=
457  	    NO_EXCLUDE)) {
458  		if ((result == EXCLUDE_PRUNE) && (ftwx != (struct FTW *)NULL))
459  			ftwx->quit = FTW_PRUNE;
460  		return (err_code);
461  	}
462  	for (i = 0; i < PATH_MAX; i++)
463  		last_field[i] = '\0';
464  
465  	/*
466  	 * Regular files, compute the MD5 checksum and put it into 'last_field'
467  	 * UNLESS instructed to ignore the checksums.
468  	 */
469  	if (ftype == 'F') {
470  		if (compute_chksum) {
471  			fd = open(fname, O_RDONLY|O_LARGEFILE);
472  			if (fd < 0) {
473  				err_code = WARNING_EXIT;
474  				perror(fname);
475  
476  				/* default value since the computution failed */
477  				(void) strcpy(last_field, "-");
478  			} else {
479  				if (generate_hash(fd, last_field) != 0) {
480  					err_code = WARNING_EXIT;
481  					(void) fprintf(stderr, CONTENTS_WARN,
482  					    fname);
483  					(void) strcpy(last_field, "-");
484  				}
485  			}
486  			(void) close(fd);
487  		}
488  		/* Instructed to ignore checksums, just put in a '-' */
489  		else
490  			(void) strcpy(last_field, "-");
491  	}
492  
493  	/*
494  	 * For symbolic links, put the destination of the symbolic link into
495  	 * 'last_field'
496  	 */
497  	if (ftype == 'L') {
498  		ret = readlink(fname, last_field, sizeof (last_field));
499  		if (ret < 0) {
500  			err_code = WARNING_EXIT;
501  			perror(fname);
502  
503  			/* default value since the computation failed */
504  			(void) strcpy(last_field, "-");
505  		}
506  		else
507  			(void) strlcpy(last_field,
508  			    sanitized_fname(last_field, B_FALSE),
509  			    sizeof (last_field));
510  
511  		/*
512  		 * Boundary condition: possible for a symlink to point to
513  		 * nothing [ ln -s '' link_name ].  For this case, set the
514  		 * destination to "\000".
515  		 */
516  		if (strlen(last_field) == 0)
517  			(void) strcpy(last_field, "\\000");
518  	}
519  
520  	acl_str = get_acl_string(fname, statb, &err_code);
521  
522  	/* Sanitize 'fname', so its in the proper format for the manifest */
523  	quoted_name = sanitized_fname(fname, B_TRUE);
524  
525  	/* Start to build the entry.... */
526  	(void) printf("%s %c %d %o %s %x %d %d", quoted_name, ftype,
527  	    (int)statb->st_size, (int)statb->st_mode, acl_str,
528  	    (int)statb->st_mtime, (int)statb->st_uid, (int)statb->st_gid);
529  
530  	/* Finish it off based upon whether or not it's a device node */
531  	if ((ftype == 'B') || (ftype == 'C'))
532  		(void) printf(" %x\n", (int)statb->st_rdev);
533  	else if (strlen(last_field) > 0)
534  		(void) printf(" %s\n", last_field);
535  	else
536  		(void) printf("\n");
537  
538  	/* free the memory consumed */
539  	free(acl_str);
540  	free(quoted_name);
541  
542  	return (err_code);
543  }
544  
545  /*
546   * When creating a manifest, make sure all '?', tabs, space, newline, '/'
547   * and '[' are all properly quoted.  Convert them to a "\ooo" where the 'ooo'
548   * represents their octal value. For filesystem objects, as opposed to symlink
549   * targets, also canonicalize the pathname.
550   */
551  static char *
552  sanitized_fname(const char *fname, boolean_t canon_path)
553  {
554  	const char *ip;
555  	unsigned char ch;
556  	char *op, *quoted_name;
557  
558  	/* Initialize everything */
559  	quoted_name = safe_calloc((4 * PATH_MAX) + 1);
560  	ip = fname;
561  	op = quoted_name;
562  
563  	if (canon_path) {
564  		/*
565  		 * In the case when a relocatable root was used, the relocatable
566  		 * root should *not* be part of the manifest.
567  		 */
568  		ip += strlen(reloc_root);
569  
570  		/*
571  		 * In the case when the '-I' option was used, make sure
572  		 * the quoted_name starts with a '/'.
573  		 */
574  		if (*ip != '/')
575  			*op++ = '/';
576  	}
577  
578  	/* Now walk through 'fname' and build the quoted string */
579  	while ((ch = *ip++) != 0) {
580  		switch (ch) {
581  		/* Quote the following characters */
582  		case ' ':
583  		case '*':
584  		case '\n':
585  		case '?':
586  		case '[':
587  		case '\\':
588  		case '\t':
589  			op += sprintf(op, "\\%.3o", (unsigned char)ch);
590  			break;
591  
592  		/* Otherwise, simply append them */
593  		default:
594  			*op++ = ch;
595  			break;
596  		}
597  	}
598  
599  	*op = 0;
600  
601  	return (quoted_name);
602  }
603  
604  /*
605   * Function responsible for generating the ACL information for a given
606   * file.  Note, the string is put into buffer malloc'd by this function.
607   * It's the responsibility of the caller to free the buffer.  This function
608   * should never return a NULL pointer.
609   */
610  static char *
611  get_acl_string(const char *fname, const struct stat64 *statb, int *err_code)
612  {
613  	acl_t		*aclp;
614  	char		*acltext;
615  	int		error;
616  
617  	if (S_ISLNK(statb->st_mode)) {
618  		return (safe_strdup("-"));
619  	}
620  
621  	/*
622  	 *  Include trivial acl's
623  	 */
624  	error = acl_get(fname, 0, &aclp);
625  
626  	if (error != 0) {
627  		*err_code = WARNING_EXIT;
628  		(void) fprintf(stderr, "%s: %s\n", fname, acl_strerror(error));
629  		return (safe_strdup("-"));
630  	} else {
631  		acltext = acl_totext(aclp, 0);
632  		acl_free(aclp);
633  		if (acltext == NULL)
634  			return (safe_strdup("-"));
635  		else
636  			return (acltext);
637  	}
638  }
639  
640  
641  /*
642   *
643   * description:	This routine reads stdin in BUF_SIZE chunks, uses the bits
644   *		to update the md5 hash buffer, and outputs the chunks
645   *		to stdout.  When stdin is exhausted, the hash is computed,
646   *		converted to a hexadecimal string, and returned.
647   *
648   * returns:	The md5 hash of stdin, or NULL if unsuccessful for any reason.
649   */
650  static int
651  generate_hash(int fdin, char *hash_str)
652  {
653  	unsigned char buf[BUF_SIZE];
654  	unsigned char hash[MD5_DIGEST_LENGTH];
655  	int i, amtread;
656  	MD5_CTX ctx;
657  
658  	MD5Init(&ctx);
659  
660  	for (;;) {
661  		amtread = read(fdin, buf, sizeof (buf));
662  		if (amtread == 0)
663  			break;
664  		if (amtread <  0)
665  			return (1);
666  
667  		/* got some data.  Now update hash */
668  		MD5Update(&ctx, buf, amtread);
669  	}
670  
671  	/* done passing through data, calculate hash */
672  	MD5Final(hash, &ctx);
673  
674  	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
675  		(void) sprintf(hash_str + (i*2), "%2.2x", hash[i]);
676  
677  	return (0);
678  }
679  
680  /*
681   * Used by 'bart create' with the '-I' option.  Return each entry into a 'buf'
682   * with the appropriate exit code: '0' for success and '-1' for failure.
683   */
684  static int
685  read_filelist(char *reloc_root, char **argv, char *buf, size_t bufsize)
686  {
687  	static int		argv_index = -1;
688  	static boolean_t	read_stdinput = B_FALSE;
689  	char			temp_buf[PATH_MAX];
690  	char 			*cp;
691  
692  	/*
693  	 * INITIALIZATION:
694  	 * Setup this code so it knows whether or not to read sdtin.
695  	 * Also, if reading from argv, setup the index, "argv_index"
696  	 */
697  	if (argv_index == -1) {
698  		argv_index = 0;
699  
700  		/* In this case, no args after '-I', so read stdin */
701  		if (argv[0] == NULL)
702  			read_stdinput = B_TRUE;
703  	}
704  
705  	buf[0] = '\0';
706  
707  	if (read_stdinput) {
708  		if (fgets(temp_buf, PATH_MAX, stdin) == NULL)
709  			return (-1);
710  		cp = strtok(temp_buf, "\n");
711  	} else {
712  		cp = argv[argv_index++];
713  	}
714  
715  	if (cp == NULL)
716  		return (-1);
717  
718  	/*
719  	 * Unlike similar code elsewhere, avoid adding a leading
720  	 * slash for relative pathnames.
721  	 */
722  	(void) snprintf(buf, bufsize,
723  	    (reloc_root[0] == '\0' || cp[0] == '/') ? "%s%s" : "%s/%s",
724  	    reloc_root, cp);
725  
726  	return (0);
727  }
728