xref: /illumos-gate/usr/src/cmd/bart/create.c (revision aba1133a5077b2daf9217c517f6aa15731135d8e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <signal.h>
29 #include <unistd.h>
30 #include <sys/acl.h>
31 #include <sys/statvfs.h>
32 #include <sys/wait.h>
33 #include "bart.h"
34 #include <aclutils.h>
35 
36 static int	sanitize_reloc_root(char *root, size_t bufsize);
37 static int	create_manifest_filelist(char **argv, char *reloc_root);
38 static int	create_manifest_rule(char *reloc_root, FILE *rule_fp);
39 static void	output_manifest(void);
40 static int	eval_file(const char *fname, const struct stat64 *statb);
41 static char	*sanitized_fname(const char *, boolean_t);
42 static char	*get_acl_string(const char *fname, const struct stat64 *statb,
43     int *err_code);
44 static int	generate_hash(int fdin, char *hash_str);
45 static int	read_filelist(char *reloc_root, char **argv, char *buf,
46     size_t bufsize);
47 static int	walker(const char *name, const struct stat64 *sp,
48     int type, struct FTW *ftwx);
49 
50 /*
51  * The following globals are necessary due to the "walker" function
52  * provided by nftw().  Since there is no way to pass them through to the
53  * walker function, they must be global.
54  */
55 static int		compute_chksum = 1, eval_err = 0;
56 static struct rule	*subtree_root;
57 static char		reloc_root[PATH_MAX];
58 static struct statvfs	parent_vfs;
59 
60 int
61 bart_create(int argc, char **argv)
62 {
63 	boolean_t	filelist_input;
64 	int		ret, c, output_pipe[2];
65 	FILE 		*rules_fd = NULL;
66 	pid_t		pid;
67 
68 	filelist_input = B_FALSE;
69 	reloc_root[0] = '\0';
70 
71 	while ((c = getopt(argc, argv, "Inr:R:")) != EOF) {
72 		switch (c) {
73 		case 'I':
74 			if (rules_fd != NULL) {
75 				(void) fprintf(stderr, "%s", INPUT_ERR);
76 				usage();
77 			}
78 			filelist_input = B_TRUE;
79 			break;
80 
81 		case 'n':
82 			compute_chksum = 0;
83 			break;
84 
85 		case 'r':
86 			if (strcmp(optarg, "-") == 0)
87 				rules_fd = stdin;
88 			else
89 				rules_fd = fopen(optarg, "r");
90 			if (rules_fd == NULL) {
91 				perror(optarg);
92 				usage();
93 			}
94 			break;
95 
96 		case 'R':
97 			(void) strlcpy(reloc_root, optarg, sizeof (reloc_root));
98 			ret = sanitize_reloc_root(reloc_root,
99 			    sizeof (reloc_root));
100 			if (ret == 0)
101 				usage();
102 			break;
103 
104 		case '?':
105 		default :
106 			usage();
107 		}
108 	}
109 	argv += optind;
110 
111 	if (pipe(output_pipe) < 0) {
112 		perror("");
113 		exit(FATAL_EXIT);
114 	}
115 
116 	pid = fork();
117 	if (pid < 0) {
118 		perror(NULL);
119 		exit(FATAL_EXIT);
120 	}
121 
122 	/*
123 	 * Break the creation of a manifest into two parts: the parent process
124 	 * generated the data whereas the child process sorts the data.
125 	 *
126 	 * The processes communicate through the pipe.
127 	 */
128 	if (pid > 0) {
129 		/*
130 		 * Redirect the stdout of this process so it goes into
131 		 * output_pipe[0].  The output of this process will be read
132 		 * by the child, which will sort the output.
133 		 */
134 		if (dup2(output_pipe[0], STDOUT_FILENO) != STDOUT_FILENO) {
135 			perror(NULL);
136 			exit(FATAL_EXIT);
137 		}
138 		(void) close(output_pipe[0]);
139 		(void) close(output_pipe[1]);
140 
141 		if (filelist_input == B_TRUE) {
142 			ret = create_manifest_filelist(argv, reloc_root);
143 		} else {
144 			ret = create_manifest_rule(reloc_root, rules_fd);
145 		}
146 
147 		/* Close stdout so the sort in the child proc will complete */
148 		(void) fclose(stdout);
149 	} else {
150 		/*
151 		 * Redirect the stdin of this process so its read in from
152 		 * the pipe, which is the parent process in this case.
153 		 */
154 		if (dup2(output_pipe[1], STDIN_FILENO) != STDIN_FILENO) {
155 			perror(NULL);
156 			exit(FATAL_EXIT);
157 		}
158 		(void) close(output_pipe[0]);
159 
160 		output_manifest();
161 	}
162 
163 	/* Wait for the child proc (the sort) to complete */
164 	(void) wait(0);
165 
166 	return (ret);
167 }
168 
169 /*
170  * Handle the -R option and sets 'root' to be the absolute path of the
171  * relocatable root.  This is useful when the user specifies '-R ../../foo'.
172  *
173  * Return code is whether or not the location spec'd by the -R flag is a
174  * directory or not.
175  */
176 static int
177 sanitize_reloc_root(char *root, size_t bufsize)
178 {
179 	char		pwd[PATH_MAX];
180 
181 	/*
182 	 * First, save the current directory and go to the location
183 	 * specified with the -R option.
184 	 */
185 	(void) getcwd(pwd, sizeof (pwd));
186 	if (chdir(root) < 0) {
187 		/* Failed to change directory, something is wrong.... */
188 		perror(root);
189 		return (0);
190 	}
191 
192 	/*
193 	 * Save the absolute path of the relocatable root directory.
194 	 */
195 	(void) getcwd(root, bufsize);
196 
197 	/*
198 	 * Now, go back to where we started, necessary for picking up a rules
199 	 * file.
200 	 */
201 	if (chdir(pwd) < 0) {
202 		/* Failed to change directory, something is wrong.... */
203 		perror(root);
204 		return (0);
205 	}
206 
207 	/*
208 	 * Make sure the path returned does not have a trailing /. This
209 	 * can only happen when the entire pathname is "/".
210 	 */
211 	if (strcmp(root, "/") == 0)
212 		root[0] = '\0';
213 
214 	/*
215 	 * Since the earlier chdir() succeeded, return success.
216 	 */
217 	return (1);
218 }
219 
220 /*
221  * This is the worker bee which creates the manifest based upon the command
222  * line options supplied by the user.
223  *
224  * NOTE: create_manifest() eventually outputs data to a pipe, which is read in
225  * by the child process.  The child process is running output_manifest(), which
226  * is responsible for generating sorted output.
227  */
228 static int
229 create_manifest_rule(char *reloc_root, FILE *rule_fp)
230 {
231 	struct rule	*root;
232 	int		ret_status = EXIT;
233 	uint_t		flags;
234 
235 	if (compute_chksum)
236 		flags = ATTR_CONTENTS;
237 	else
238 		flags = 0;
239 	ret_status = read_rules(rule_fp, reloc_root, flags, 1);
240 
241 	/* Loop through every single subtree */
242 	for (root = get_first_subtree(); root != NULL;
243 	    root = get_next_subtree(root)) {
244 
245 		/*
246 		 * This subtree has already been traversed by a
247 		 * previous stanza, i.e. this rule is a subset of a
248 		 * previous rule.
249 		 *
250 		 * Subtree has already been handled so move on!
251 		 */
252 		if (root->traversed)
253 			continue;
254 
255 		/*
256 		 * Check to see if this subtree should have contents
257 		 * checking turned on or off.
258 		 *
259 		 * NOTE: The 'compute_chksum' and 'parent_vfs'
260 		 * are a necessary hack: the variables are used in
261 		 * walker(), both directly and indirectly.  Since
262 		 * the parameters to walker() are defined by nftw(),
263 		 * the globals are really a backdoor mechanism.
264 		 */
265 		ret_status = statvfs(root->subtree, &parent_vfs);
266 		if (ret_status < 0) {
267 			perror(root->subtree);
268 			continue;
269 		}
270 
271 		/*
272 		 * Walk the subtree and invoke the callback function
273 		 * walker()
274 		 */
275 		subtree_root = root;
276 		(void) nftw64(root->subtree, &walker, 20, FTW_PHYS);
277 		root->traversed = B_TRUE;
278 
279 		/*
280 		 * Ugly but necessary:
281 		 *
282 		 * walker() must return 0, or the tree walk will stop,
283 		 * so warning flags must be set through a global.
284 		 */
285 		if (eval_err == WARNING_EXIT)
286 			ret_status = WARNING_EXIT;
287 
288 	}
289 	return (ret_status);
290 }
291 
292 static int
293 create_manifest_filelist(char **argv, char *reloc_root)
294 {
295 	int	ret_status = EXIT;
296 	char	input_fname[PATH_MAX];
297 
298 	while (read_filelist(reloc_root, argv,
299 	    input_fname, sizeof (input_fname)) != -1) {
300 
301 		struct stat64	stat_buf;
302 		int		ret;
303 
304 		ret = lstat64(input_fname, &stat_buf);
305 		if (ret < 0) {
306 			ret_status = WARNING_EXIT;
307 			perror(input_fname);
308 		} else {
309 			ret = eval_file(input_fname, &stat_buf);
310 
311 			if (ret == WARNING_EXIT)
312 				ret_status = WARNING_EXIT;
313 		}
314 	}
315 
316 	return (ret_status);
317 }
318 
319 /*
320  * output_manifest() the child process.  It reads in the output from
321  * create_manifest() and sorts it.
322  */
323 static void
324 output_manifest(void)
325 {
326 	char	*env[] = {"LC_CTYPE=C", "LC_COLLATE=C", "LC_NUMERIC=C", NULL};
327 	time_t		time_val;
328 	struct tm	*tm;
329 	char		time_buf[1024];
330 
331 	(void) printf("%s", MANIFEST_VER);
332 	time_val = time((time_t)0);
333 	tm = localtime(&time_val);
334 	(void) strftime(time_buf, sizeof (time_buf), "%A, %B %d, %Y (%T)", tm);
335 	(void) printf("! %s\n", time_buf);
336 	(void) printf("%s", FORMAT_STR);
337 	(void) fflush(stdout);
338 	/*
339 	 * Simply run sort and read from the the current stdin, which is really
340 	 * the output of create_manifest().
341 	 * Also, make sure the output is unique, since a given file may be
342 	 * included by several stanzas.
343 	 */
344 	if (execle("/usr/bin/sort", "sort", NULL, env) < 0) {
345 		perror("");
346 		exit(FATAL_EXIT);
347 	}
348 
349 	/*NOTREACHED*/
350 }
351 
352 /*
353  * Callback function for nftw()
354  */
355 static int
356 walker(const char *name, const struct stat64 *sp, int type, struct FTW *ftwx)
357 {
358 	int		ret;
359 	struct statvfs	path_vfs;
360 	boolean_t	dir_flag = B_FALSE;
361 	struct rule	*rule;
362 
363 	switch (type) {
364 	case FTW_F:	/* file 		*/
365 		rule = check_rules(name, 'F');
366 		if (rule != NULL) {
367 			if (rule->attr_list & ATTR_CONTENTS)
368 				compute_chksum = 1;
369 			else
370 				compute_chksum = 0;
371 		}
372 		break;
373 	case FTW_SL:	/* symbolic link	*/
374 	case FTW_DP:	/* end of directory	*/
375 	case FTW_DNR:	/* unreadable directory	*/
376 	case FTW_NS:	/* unstatable file	*/
377 		break;
378 	case FTW_D:	/* enter directory 		*/
379 
380 		/*
381 		 * Check to see if any subsequent rules are a subset
382 		 * of this rule; if they are, then mark them as
383 		 * "traversed".
384 		 */
385 		rule = subtree_root->next;
386 		while (rule != NULL) {
387 			if (strcmp(name, rule->subtree) == 0)
388 				rule->traversed = B_TRUE;
389 
390 			rule = rule->next;
391 		}
392 		dir_flag = B_TRUE;
393 		ret = statvfs(name, &path_vfs);
394 		if (ret < 0)
395 			eval_err = WARNING_EXIT;
396 		break;
397 	default:
398 		(void) fprintf(stderr, INVALID_FILE, name);
399 		eval_err = WARNING_EXIT;
400 		break;
401 	}
402 
403 	/* This is the function which really processes the file */
404 	ret = eval_file(name, sp);
405 
406 	/*
407 	 * Since the parameters to walker() are constrained by nftw(),
408 	 * need to use a global to reflect a WARNING.  Sigh.
409 	 */
410 	if (ret == WARNING_EXIT)
411 		eval_err = WARNING_EXIT;
412 
413 	/*
414 	 * This is a case of a directory which crosses into a mounted
415 	 * filesystem of a different type, e.g., UFS -> NFS.
416 	 * BART should not walk the new filesystem (by specification), so
417 	 * set this consolidation-private flag so the rest of the subtree
418 	 * under this directory is not waled.
419 	 */
420 	if (dir_flag &&
421 	    (strcmp(parent_vfs.f_basetype, path_vfs.f_basetype) != 0))
422 		ftwx->quit = FTW_PRUNE;
423 
424 	return (0);
425 }
426 
427 /*
428  * This file does the per-file evaluation and is run to generate every entry
429  * in the manifest.
430  *
431  * All output is written to a pipe which is read by the child process,
432  * which is running output_manifest().
433  */
434 static int
435 eval_file(const char *fname, const struct stat64 *statb)
436 {
437 	int	fd, ret, err_code, i;
438 	char	last_field[PATH_MAX], ftype, *acl_str,
439 		*quoted_name;
440 
441 	err_code = EXIT;
442 
443 	switch (statb->st_mode & S_IFMT) {
444 	/* Regular file */
445 	case S_IFREG: ftype = 'F'; break;
446 
447 	/* Directory */
448 	case S_IFDIR: ftype = 'D'; break;
449 
450 	/* Block Device */
451 	case S_IFBLK: ftype = 'B'; break;
452 
453 	/* Character Device */
454 	case S_IFCHR: ftype = 'C'; break;
455 
456 	/* Named Pipe */
457 	case S_IFIFO: ftype = 'P'; break;
458 
459 	/* Socket */
460 	case S_IFSOCK: ftype = 'S'; break;
461 
462 	/* Door */
463 	case S_IFDOOR: ftype = 'O'; break;
464 
465 	/* Symbolic link */
466 	case S_IFLNK: ftype = 'L'; break;
467 
468 	default: ftype = '-'; break;
469 	}
470 
471 	/* First, make sure this file should be cataloged */
472 
473 	if ((subtree_root != NULL) &&
474 	    (exclude_fname(fname, ftype, subtree_root)))
475 		return (err_code);
476 
477 	for (i = 0; i < PATH_MAX; i++)
478 		last_field[i] = '\0';
479 
480 	/*
481 	 * Regular files, compute the MD5 checksum and put it into 'last_field'
482 	 * UNLESS instructed to ignore the checksums.
483 	 */
484 	if (ftype == 'F') {
485 		if (compute_chksum) {
486 			fd = open(fname, O_RDONLY|O_LARGEFILE);
487 			if (fd < 0) {
488 				err_code = WARNING_EXIT;
489 				perror(fname);
490 
491 				/* default value since the computution failed */
492 				(void) strcpy(last_field, "-");
493 			} else {
494 				if (generate_hash(fd, last_field) != 0) {
495 					err_code = WARNING_EXIT;
496 					(void) fprintf(stderr, CONTENTS_WARN,
497 					    fname);
498 					(void) strcpy(last_field, "-");
499 				}
500 			}
501 			(void) close(fd);
502 		}
503 		/* Instructed to ignore checksums, just put in a '-' */
504 		else
505 			(void) strcpy(last_field, "-");
506 	}
507 
508 	/*
509 	 * For symbolic links, put the destination of the symbolic link into
510 	 * 'last_field'
511 	 */
512 	if (ftype == 'L') {
513 		ret = readlink(fname, last_field, sizeof (last_field));
514 		if (ret < 0) {
515 			err_code = WARNING_EXIT;
516 			perror(fname);
517 
518 			/* default value since the computation failed */
519 			(void) strcpy(last_field, "-");
520 		}
521 		else
522 			(void) strlcpy(last_field,
523 			    sanitized_fname(last_field, B_FALSE),
524 			    sizeof (last_field));
525 
526 		/*
527 		 * Boundary condition: possible for a symlink to point to
528 		 * nothing [ ln -s '' link_name ].  For this case, set the
529 		 * destination to "\000".
530 		 */
531 		if (strlen(last_field) == 0)
532 			(void) strcpy(last_field, "\\000");
533 	}
534 
535 	acl_str = get_acl_string(fname, statb, &err_code);
536 
537 	/* Sanitize 'fname', so its in the proper format for the manifest */
538 	quoted_name = sanitized_fname(fname, B_TRUE);
539 
540 	/* Start to build the entry.... */
541 	(void) printf("%s %c %d %o %s %x %d %d", quoted_name, ftype,
542 	    (int)statb->st_size, (int)statb->st_mode, acl_str,
543 	    (int)statb->st_mtime, (int)statb->st_uid, (int)statb->st_gid);
544 
545 	/* Finish it off based upon whether or not it's a device node */
546 	if ((ftype == 'B') && (ftype == 'C'))
547 		(void) printf(" %x\n", (int)statb->st_rdev);
548 	else if (strlen(last_field) > 0)
549 		(void) printf(" %s\n", last_field);
550 	else
551 		(void) printf("\n");
552 
553 	/* free the memory consumed */
554 	free(acl_str);
555 	free(quoted_name);
556 
557 	return (err_code);
558 }
559 
560 /*
561  * When creating a manifest, make sure all '?', tabs, space, newline, '/'
562  * and '[' are all properly quoted.  Convert them to a "\ooo" where the 'ooo'
563  * represents their octal value. For filesystem objects, as opposed to symlink
564  * targets, also canonicalize the pathname.
565  */
566 static char *
567 sanitized_fname(const char *fname, boolean_t canon_path)
568 {
569 	const char *ip;
570 	unsigned char ch;
571 	char *op, *quoted_name;
572 
573 	/* Initialize everything */
574 	quoted_name = safe_calloc((4 * PATH_MAX) + 1);
575 	ip = fname;
576 	op = quoted_name;
577 
578 	if (canon_path) {
579 		/*
580 		 * In the case when a relocatable root was used, the relocatable
581 		 * root should *not* be part of the manifest.
582 		 */
583 		ip += strlen(reloc_root);
584 
585 		/*
586 		 * In the case when the '-I' option was used, make sure
587 		 * the quoted_name starts with a '/'.
588 		 */
589 		if (*ip != '/')
590 			*op++ = '/';
591 	}
592 
593 	/* Now walk through 'fname' and build the quoted string */
594 	while ((ch = *ip++) != 0) {
595 		switch (ch) {
596 		/* Quote the following characters */
597 		case ' ':
598 		case '*':
599 		case '\n':
600 		case '?':
601 		case '[':
602 		case '\\':
603 		case '\t':
604 			op += sprintf(op, "\\%.3o", (unsigned char)ch);
605 			break;
606 
607 		/* Otherwise, simply append them */
608 		default:
609 			*op++ = ch;
610 			break;
611 		}
612 	}
613 
614 	*op = 0;
615 
616 	return (quoted_name);
617 }
618 
619 /*
620  * Function responsible for generating the ACL information for a given
621  * file.  Note, the string is put into buffer malloc'd by this function.
622  * Its the responsibility of the caller to free the buffer.
623  */
624 static char *
625 get_acl_string(const char *fname, const struct stat64 *statb, int *err_code)
626 {
627 	acl_t		*aclp;
628 	char		*acltext;
629 	int		error;
630 
631 	if (S_ISLNK(statb->st_mode)) {
632 		return (safe_strdup("-"));
633 	}
634 
635 	/*
636 	 *  Include trivial acl's
637 	 */
638 	error = acl_get(fname, 0, &aclp);
639 
640 	if (error != 0) {
641 		*err_code = WARNING_EXIT;
642 		(void) fprintf(stderr, "%s: %s\n", fname, acl_strerror(error));
643 		return (safe_strdup("-"));
644 	} else {
645 		acltext = acl_totext(aclp);
646 		acl_free(aclp);
647 		return (acltext);
648 	}
649 }
650 
651 
652 /*
653  *
654  * description:	This routine reads stdin in BUF_SIZE chunks, uses the bits
655  *		to update the md5 hash buffer, and outputs the chunks
656  *		to stdout.  When stdin is exhausted, the hash is computed,
657  *		converted to a hexadecimal string, and returned.
658  *
659  * returns:	The md5 hash of stdin, or NULL if unsuccessful for any reason.
660  */
661 static int
662 generate_hash(int fdin, char *hash_str)
663 {
664 	unsigned char buf[BUF_SIZE];
665 	unsigned char hash[MD5_DIGEST_LENGTH];
666 	int i, amtread;
667 	MD5_CTX ctx;
668 
669 	MD5Init(&ctx);
670 
671 	for (;;) {
672 		amtread = read(fdin, buf, sizeof (buf));
673 		if (amtread == 0)
674 			break;
675 		if (amtread <  0)
676 			return (1);
677 
678 		/* got some data.  Now update hash */
679 		MD5Update(&ctx, buf, amtread);
680 	}
681 
682 	/* done passing through data, calculate hash */
683 	MD5Final(hash, &ctx);
684 
685 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
686 		(void) sprintf(hash_str + (i*2), "%2.2x", hash[i]);
687 
688 	return (0);
689 }
690 
691 /*
692  * Used by 'bart create' with the '-I' option.  Return each entry into a 'buf'
693  * with the appropriate exit code: '0' for success and '-1' for failure.
694  */
695 static int
696 read_filelist(char *reloc_root, char **argv, char *buf, size_t bufsize)
697 {
698 	static int		argv_index = -1;
699 	static boolean_t	read_stdinput = B_FALSE;
700 	char			temp_buf[PATH_MAX];
701 	char 			*cp;
702 
703 	/*
704 	 * INITIALIZATION:
705 	 * Setup this code so it knows whether or not to read sdtin.
706 	 * Also, if reading from argv, setup the index, "argv_index"
707 	 */
708 	if (argv_index == -1) {
709 		argv_index = 0;
710 
711 		/* In this case, no args after '-I', so read stdin */
712 		if (argv[0] == NULL)
713 			read_stdinput = B_TRUE;
714 	}
715 
716 	buf[0] = '\0';
717 
718 	if (read_stdinput) {
719 		if (fgets(temp_buf, PATH_MAX, stdin) == NULL)
720 			return (-1);
721 		cp = strtok(temp_buf, "\n");
722 	} else {
723 		cp = argv[argv_index++];
724 	}
725 
726 	if (cp == NULL)
727 		return (-1);
728 
729 	/*
730 	 * Unlike similar code elsewhere, avoid adding a leading
731 	 * slash for relative pathnames.
732 	 */
733 	(void) snprintf(buf, bufsize,
734 	    (reloc_root[0] == '\0' || cp[0] == '/') ? "%s%s" : "%s/%s",
735 	    reloc_root, cp);
736 
737 	return (0);
738 }
739