xref: /illumos-gate/usr/src/cmd/bart/create.c (revision 2a12f85ad140e332791b4bad1208a734c3f26bf3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <signal.h>
26 #include <unistd.h>
27 #include <sys/acl.h>
28 #include <sys/statvfs.h>
29 #include <sys/wait.h>
30 #include "bart.h"
31 #include <aclutils.h>
32 
33 static int	sanitize_reloc_root(char *root, size_t bufsize);
34 static int	create_manifest_filelist(char **argv, char *reloc_root);
35 static int	create_manifest_rule(char *reloc_root, FILE *rule_fp);
36 static void	output_manifest(void);
37 static int	eval_file(const char *fname, const struct stat64 *statb,
38 	struct FTW *ftwx);
39 static char	*sanitized_fname(const char *, boolean_t);
40 static char	*get_acl_string(const char *fname, const struct stat64 *statb,
41     int *err_code);
42 static int	generate_hash(int fdin, char *hash_str);
43 static int	read_filelist(char *reloc_root, char **argv, char *buf,
44     size_t bufsize);
45 static int	walker(const char *name, const struct stat64 *sp,
46     int type, struct FTW *ftwx);
47 
48 /*
49  * The following globals are necessary due to the "walker" function
50  * provided by nftw().  Since there is no way to pass them through to the
51  * walker function, they must be global.
52  */
53 static int		compute_chksum = 1, eval_err = 0;
54 static struct rule	*subtree_root;
55 static char		reloc_root[PATH_MAX];
56 static struct statvfs64	parent_vfs;
57 
58 int
59 bart_create(int argc, char **argv)
60 {
61 	boolean_t	filelist_input;
62 	int		ret, c, output_pipe[2];
63 	FILE 		*rules_fd = NULL;
64 	pid_t		pid;
65 
66 	filelist_input = B_FALSE;
67 	reloc_root[0] = '\0';
68 
69 	while ((c = getopt(argc, argv, "Inr:R:")) != EOF) {
70 		switch (c) {
71 		case 'I':
72 			if (rules_fd != NULL) {
73 				(void) fprintf(stderr, "%s", INPUT_ERR);
74 				usage();
75 			}
76 			filelist_input = B_TRUE;
77 			break;
78 
79 		case 'n':
80 			compute_chksum = 0;
81 			break;
82 
83 		case 'r':
84 			if (strcmp(optarg, "-") == 0)
85 				rules_fd = stdin;
86 			else
87 				rules_fd = fopen(optarg, "r");
88 			if (rules_fd == NULL) {
89 				perror(optarg);
90 				usage();
91 			}
92 			break;
93 
94 		case 'R':
95 			(void) strlcpy(reloc_root, optarg, sizeof (reloc_root));
96 			ret = sanitize_reloc_root(reloc_root,
97 			    sizeof (reloc_root));
98 			if (ret == 0)
99 				usage();
100 			break;
101 
102 		case '?':
103 		default :
104 			usage();
105 		}
106 	}
107 	argv += optind;
108 
109 	if (pipe(output_pipe) < 0) {
110 		perror("");
111 		exit(FATAL_EXIT);
112 	}
113 
114 	pid = fork();
115 	if (pid < 0) {
116 		perror(NULL);
117 		exit(FATAL_EXIT);
118 	}
119 
120 	/*
121 	 * Break the creation of a manifest into two parts: the parent process
122 	 * generated the data whereas the child process sorts the data.
123 	 *
124 	 * The processes communicate through the pipe.
125 	 */
126 	if (pid > 0) {
127 		/*
128 		 * Redirect the stdout of this process so it goes into
129 		 * output_pipe[0].  The output of this process will be read
130 		 * by the child, which will sort the output.
131 		 */
132 		if (dup2(output_pipe[0], STDOUT_FILENO) != STDOUT_FILENO) {
133 			perror(NULL);
134 			exit(FATAL_EXIT);
135 		}
136 		(void) close(output_pipe[0]);
137 		(void) close(output_pipe[1]);
138 
139 		if (filelist_input == B_TRUE) {
140 			ret = create_manifest_filelist(argv, reloc_root);
141 		} else {
142 			ret = create_manifest_rule(reloc_root, rules_fd);
143 		}
144 
145 		/* Close stdout so the sort in the child proc will complete */
146 		(void) fclose(stdout);
147 	} else {
148 		/*
149 		 * Redirect the stdin of this process so its read in from
150 		 * the pipe, which is the parent process in this case.
151 		 */
152 		if (dup2(output_pipe[1], STDIN_FILENO) != STDIN_FILENO) {
153 			perror(NULL);
154 			exit(FATAL_EXIT);
155 		}
156 		(void) close(output_pipe[0]);
157 
158 		output_manifest();
159 	}
160 
161 	/* Wait for the child proc (the sort) to complete */
162 	(void) wait(0);
163 
164 	return (ret);
165 }
166 
167 /*
168  * Handle the -R option and sets 'root' to be the absolute path of the
169  * relocatable root.  This is useful when the user specifies '-R ../../foo'.
170  *
171  * Return code is whether or not the location spec'd by the -R flag is a
172  * directory or not.
173  */
174 static int
175 sanitize_reloc_root(char *root, size_t bufsize)
176 {
177 	char		pwd[PATH_MAX];
178 
179 	/*
180 	 * First, save the current directory and go to the location
181 	 * specified with the -R option.
182 	 */
183 	(void) getcwd(pwd, sizeof (pwd));
184 	if (chdir(root) < 0) {
185 		/* Failed to change directory, something is wrong.... */
186 		perror(root);
187 		return (0);
188 	}
189 
190 	/*
191 	 * Save the absolute path of the relocatable root directory.
192 	 */
193 	(void) getcwd(root, bufsize);
194 
195 	/*
196 	 * Now, go back to where we started, necessary for picking up a rules
197 	 * file.
198 	 */
199 	if (chdir(pwd) < 0) {
200 		/* Failed to change directory, something is wrong.... */
201 		perror(root);
202 		return (0);
203 	}
204 
205 	/*
206 	 * Make sure the path returned does not have a trailing /. This
207 	 * can only happen when the entire pathname is "/".
208 	 */
209 	if (strcmp(root, "/") == 0)
210 		root[0] = '\0';
211 
212 	/*
213 	 * Since the earlier chdir() succeeded, return success.
214 	 */
215 	return (1);
216 }
217 
218 /*
219  * This is the worker bee which creates the manifest based upon the command
220  * line options supplied by the user.
221  *
222  * NOTE: create_manifest() eventually outputs data to a pipe, which is read in
223  * by the child process.  The child process is running output_manifest(), which
224  * is responsible for generating sorted output.
225  */
226 static int
227 create_manifest_rule(char *reloc_root, FILE *rule_fp)
228 {
229 	struct rule	*root;
230 	int		ret_status = EXIT;
231 	uint_t		flags;
232 
233 	if (compute_chksum)
234 		flags = ATTR_CONTENTS;
235 	else
236 		flags = 0;
237 	ret_status = read_rules(rule_fp, reloc_root, flags, 1);
238 
239 	/* Loop through every single subtree */
240 	for (root = get_first_subtree(); root != NULL;
241 	    root = get_next_subtree(root)) {
242 
243 		/*
244 		 * Check to see if this subtree should have contents
245 		 * checking turned on or off.
246 		 *
247 		 * NOTE: The 'compute_chksum' and 'parent_vfs'
248 		 * are a necessary hack: the variables are used in
249 		 * walker(), both directly and indirectly.  Since
250 		 * the parameters to walker() are defined by nftw(),
251 		 * the globals are really a backdoor mechanism.
252 		 */
253 		ret_status = statvfs64(root->subtree, &parent_vfs);
254 		if (ret_status < 0) {
255 			perror(root->subtree);
256 			continue;
257 		}
258 
259 		/*
260 		 * Walk the subtree and invoke the callback function walker()
261 		 * Use FTW_ANYERR to get FTW_NS and FTW_DNR entries *and*
262 		 * to continue past those errors.
263 		 */
264 		subtree_root = root;
265 		(void) nftw64(root->subtree, &walker, 20, FTW_PHYS|FTW_ANYERR);
266 
267 		/*
268 		 * Ugly but necessary:
269 		 *
270 		 * walker() must return 0, or the tree walk will stop,
271 		 * so warning flags must be set through a global.
272 		 */
273 		if (eval_err == WARNING_EXIT)
274 			ret_status = WARNING_EXIT;
275 
276 	}
277 	return (ret_status);
278 }
279 
280 static int
281 create_manifest_filelist(char **argv, char *reloc_root)
282 {
283 	int	ret_status = EXIT;
284 	char	input_fname[PATH_MAX];
285 
286 	while (read_filelist(reloc_root, argv,
287 	    input_fname, sizeof (input_fname)) != -1) {
288 
289 		struct stat64	stat_buf;
290 		int		ret;
291 
292 		ret = lstat64(input_fname, &stat_buf);
293 		if (ret < 0) {
294 			ret_status = WARNING_EXIT;
295 			perror(input_fname);
296 		} else {
297 			ret = eval_file(input_fname, &stat_buf, NULL);
298 
299 			if (ret == WARNING_EXIT)
300 				ret_status = WARNING_EXIT;
301 		}
302 	}
303 
304 	return (ret_status);
305 }
306 
307 /*
308  * output_manifest() the child process.  It reads in the output from
309  * create_manifest() and sorts it.
310  */
311 static void
312 output_manifest(void)
313 {
314 	char	*env[] = {"LC_CTYPE=C", "LC_COLLATE=C", "LC_NUMERIC=C", NULL};
315 	time_t		time_val;
316 	struct tm	*tm;
317 	char		time_buf[1024];
318 
319 	(void) printf("%s", MANIFEST_VER);
320 	time_val = time((time_t)0);
321 	tm = localtime(&time_val);
322 	(void) strftime(time_buf, sizeof (time_buf), "%A, %B %d, %Y (%T)", tm);
323 	(void) printf("! %s\n", time_buf);
324 	(void) printf("%s", FORMAT_STR);
325 	(void) fflush(stdout);
326 	/*
327 	 * Simply run sort and read from the the current stdin, which is really
328 	 * the output of create_manifest().
329 	 * Also, make sure the output is unique, since a given file may be
330 	 * included by several stanzas.
331 	 */
332 	if (execle("/usr/bin/sort", "sort", "-u", NULL, env) < 0) {
333 		perror("");
334 		exit(FATAL_EXIT);
335 	}
336 
337 	/*NOTREACHED*/
338 }
339 
340 /*
341  * Callback function for nftw()
342  */
343 static int
344 walker(const char *name, const struct stat64 *sp, int type, struct FTW *ftwx)
345 {
346 	int			ret;
347 	struct statvfs64	path_vfs;
348 	boolean_t		dir_flag = B_FALSE;
349 	struct rule		*rule;
350 
351 	switch (type) {
352 	case FTW_F:	/* file 		*/
353 		rule = check_rules(name, 'F');
354 		if (rule != NULL) {
355 			if (rule->attr_list & ATTR_CONTENTS)
356 				compute_chksum = 1;
357 			else
358 				compute_chksum = 0;
359 		}
360 		break;
361 	case FTW_SL:	/* symbolic link, FTW_PHYS	*/
362 	case FTW_SLN:	/* symbolic link, ~FTW_PHYS	*/
363 		break;
364 	case FTW_DP:	/* end of directory, FTW_DEPTH	*/
365 	case FTW_D:	/* enter directory, ~FTW_DEPTH	*/
366 		dir_flag = B_TRUE;
367 		ret = statvfs64(name, &path_vfs);
368 		if (ret < 0)
369 			eval_err = WARNING_EXIT;
370 		break;
371 	case FTW_NS:	/* unstatable file	*/
372 		(void) fprintf(stderr, UNKNOWN_FILE, name);
373 		eval_err = WARNING_EXIT;
374 		return (0);
375 	case FTW_DNR:	/* unreadable directory	*/
376 		(void) fprintf(stderr, CANTLIST_DIR, name);
377 		eval_err = WARNING_EXIT;
378 		return (0);
379 	default:
380 		(void) fprintf(stderr, INTERNAL_ERR, name);
381 		eval_err = WARNING_EXIT;
382 		return (0);
383 	}
384 
385 	/* This is the function which really processes the file */
386 	ret = eval_file(name, sp, ftwx);
387 
388 	/*
389 	 * Since the parameters to walker() are constrained by nftw(),
390 	 * need to use a global to reflect a WARNING.  Sigh.
391 	 */
392 	if (ret == WARNING_EXIT)
393 		eval_err = WARNING_EXIT;
394 
395 	/*
396 	 * This is a case of a directory which crosses into a mounted
397 	 * filesystem of a different type, e.g., UFS -> NFS.
398 	 * BART should not walk the new filesystem (by specification), so
399 	 * set this consolidation-private flag so the rest of the subtree
400 	 * under this directory is not waled.
401 	 */
402 	if (dir_flag &&
403 	    (strcmp(parent_vfs.f_basetype, path_vfs.f_basetype) != 0))
404 		ftwx->quit = FTW_PRUNE;
405 
406 	return (0);
407 }
408 
409 /*
410  * This file does the per-file evaluation and is run to generate every entry
411  * in the manifest.
412  *
413  * All output is written to a pipe which is read by the child process,
414  * which is running output_manifest().
415  */
416 static int
417 eval_file(const char *fname, const struct stat64 *statb, struct FTW *ftwx)
418 {
419 	int	fd, ret, err_code, i, result;
420 	char	last_field[PATH_MAX], ftype, *acl_str;
421 	char	*quoted_name;
422 
423 	err_code = EXIT;
424 
425 	switch (statb->st_mode & S_IFMT) {
426 	/* Regular file */
427 	case S_IFREG: ftype = 'F'; break;
428 
429 	/* Directory */
430 	case S_IFDIR: ftype = 'D'; break;
431 
432 	/* Block Device */
433 	case S_IFBLK: ftype = 'B'; break;
434 
435 	/* Character Device */
436 	case S_IFCHR: ftype = 'C'; break;
437 
438 	/* Named Pipe */
439 	case S_IFIFO: ftype = 'P'; break;
440 
441 	/* Socket */
442 	case S_IFSOCK: ftype = 'S'; break;
443 
444 	/* Door */
445 	case S_IFDOOR: ftype = 'O'; break;
446 
447 	/* Symbolic link */
448 	case S_IFLNK: ftype = 'L'; break;
449 
450 	default: ftype = '-'; break;
451 	}
452 
453 	/* First, make sure this file should be cataloged */
454 
455 	if ((subtree_root != NULL) &&
456 	    ((result = exclude_fname(fname, ftype, subtree_root)) !=
457 	    NO_EXCLUDE)) {
458 		if ((result == EXCLUDE_PRUNE) && (ftwx != (struct FTW *)NULL))
459 			ftwx->quit = FTW_PRUNE;
460 		return (err_code);
461 	}
462 	for (i = 0; i < PATH_MAX; i++)
463 		last_field[i] = '\0';
464 
465 	/*
466 	 * Regular files, compute the MD5 checksum and put it into 'last_field'
467 	 * UNLESS instructed to ignore the checksums.
468 	 */
469 	if (ftype == 'F') {
470 		if (compute_chksum) {
471 			fd = open(fname, O_RDONLY|O_LARGEFILE);
472 			if (fd < 0) {
473 				err_code = WARNING_EXIT;
474 				perror(fname);
475 
476 				/* default value since the computution failed */
477 				(void) strcpy(last_field, "-");
478 			} else {
479 				if (generate_hash(fd, last_field) != 0) {
480 					err_code = WARNING_EXIT;
481 					(void) fprintf(stderr, CONTENTS_WARN,
482 					    fname);
483 					(void) strcpy(last_field, "-");
484 				}
485 			}
486 			(void) close(fd);
487 		}
488 		/* Instructed to ignore checksums, just put in a '-' */
489 		else
490 			(void) strcpy(last_field, "-");
491 	}
492 
493 	/*
494 	 * For symbolic links, put the destination of the symbolic link into
495 	 * 'last_field'
496 	 */
497 	if (ftype == 'L') {
498 		ret = readlink(fname, last_field, sizeof (last_field));
499 		if (ret < 0) {
500 			err_code = WARNING_EXIT;
501 			perror(fname);
502 
503 			/* default value since the computation failed */
504 			(void) strcpy(last_field, "-");
505 		}
506 		else
507 			(void) strlcpy(last_field,
508 			    sanitized_fname(last_field, B_FALSE),
509 			    sizeof (last_field));
510 
511 		/*
512 		 * Boundary condition: possible for a symlink to point to
513 		 * nothing [ ln -s '' link_name ].  For this case, set the
514 		 * destination to "\000".
515 		 */
516 		if (strlen(last_field) == 0)
517 			(void) strcpy(last_field, "\\000");
518 	}
519 
520 	acl_str = get_acl_string(fname, statb, &err_code);
521 
522 	/* Sanitize 'fname', so its in the proper format for the manifest */
523 	quoted_name = sanitized_fname(fname, B_TRUE);
524 
525 	/* Start to build the entry.... */
526 	(void) printf("%s %c %d %o %s %x %d %d", quoted_name, ftype,
527 	    (int)statb->st_size, (int)statb->st_mode, acl_str,
528 	    (int)statb->st_mtime, (int)statb->st_uid, (int)statb->st_gid);
529 
530 	/* Finish it off based upon whether or not it's a device node */
531 	if ((ftype == 'B') || (ftype == 'C'))
532 		(void) printf(" %x\n", (int)statb->st_rdev);
533 	else if (strlen(last_field) > 0)
534 		(void) printf(" %s\n", last_field);
535 	else
536 		(void) printf("\n");
537 
538 	/* free the memory consumed */
539 	free(acl_str);
540 	free(quoted_name);
541 
542 	return (err_code);
543 }
544 
545 /*
546  * When creating a manifest, make sure all '?', tabs, space, newline, '/'
547  * and '[' are all properly quoted.  Convert them to a "\ooo" where the 'ooo'
548  * represents their octal value. For filesystem objects, as opposed to symlink
549  * targets, also canonicalize the pathname.
550  */
551 static char *
552 sanitized_fname(const char *fname, boolean_t canon_path)
553 {
554 	const char *ip;
555 	unsigned char ch;
556 	char *op, *quoted_name;
557 
558 	/* Initialize everything */
559 	quoted_name = safe_calloc((4 * PATH_MAX) + 1);
560 	ip = fname;
561 	op = quoted_name;
562 
563 	if (canon_path) {
564 		/*
565 		 * In the case when a relocatable root was used, the relocatable
566 		 * root should *not* be part of the manifest.
567 		 */
568 		ip += strlen(reloc_root);
569 
570 		/*
571 		 * In the case when the '-I' option was used, make sure
572 		 * the quoted_name starts with a '/'.
573 		 */
574 		if (*ip != '/')
575 			*op++ = '/';
576 	}
577 
578 	/* Now walk through 'fname' and build the quoted string */
579 	while ((ch = *ip++) != 0) {
580 		switch (ch) {
581 		/* Quote the following characters */
582 		case ' ':
583 		case '*':
584 		case '\n':
585 		case '?':
586 		case '[':
587 		case '\\':
588 		case '\t':
589 			op += sprintf(op, "\\%.3o", (unsigned char)ch);
590 			break;
591 
592 		/* Otherwise, simply append them */
593 		default:
594 			*op++ = ch;
595 			break;
596 		}
597 	}
598 
599 	*op = 0;
600 
601 	return (quoted_name);
602 }
603 
604 /*
605  * Function responsible for generating the ACL information for a given
606  * file.  Note, the string is put into buffer malloc'd by this function.
607  * It's the responsibility of the caller to free the buffer.  This function
608  * should never return a NULL pointer.
609  */
610 static char *
611 get_acl_string(const char *fname, const struct stat64 *statb, int *err_code)
612 {
613 	acl_t		*aclp;
614 	char		*acltext;
615 	int		error;
616 
617 	if (S_ISLNK(statb->st_mode)) {
618 		return (safe_strdup("-"));
619 	}
620 
621 	/*
622 	 *  Include trivial acl's
623 	 */
624 	error = acl_get(fname, 0, &aclp);
625 
626 	if (error != 0) {
627 		*err_code = WARNING_EXIT;
628 		(void) fprintf(stderr, "%s: %s\n", fname, acl_strerror(error));
629 		return (safe_strdup("-"));
630 	} else {
631 		acltext = acl_totext(aclp, 0);
632 		acl_free(aclp);
633 		if (acltext == NULL)
634 			return (safe_strdup("-"));
635 		else
636 			return (acltext);
637 	}
638 }
639 
640 
641 /*
642  *
643  * description:	This routine reads stdin in BUF_SIZE chunks, uses the bits
644  *		to update the md5 hash buffer, and outputs the chunks
645  *		to stdout.  When stdin is exhausted, the hash is computed,
646  *		converted to a hexadecimal string, and returned.
647  *
648  * returns:	The md5 hash of stdin, or NULL if unsuccessful for any reason.
649  */
650 static int
651 generate_hash(int fdin, char *hash_str)
652 {
653 	unsigned char buf[BUF_SIZE];
654 	unsigned char hash[MD5_DIGEST_LENGTH];
655 	int i, amtread;
656 	MD5_CTX ctx;
657 
658 	MD5Init(&ctx);
659 
660 	for (;;) {
661 		amtread = read(fdin, buf, sizeof (buf));
662 		if (amtread == 0)
663 			break;
664 		if (amtread <  0)
665 			return (1);
666 
667 		/* got some data.  Now update hash */
668 		MD5Update(&ctx, buf, amtread);
669 	}
670 
671 	/* done passing through data, calculate hash */
672 	MD5Final(hash, &ctx);
673 
674 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
675 		(void) sprintf(hash_str + (i*2), "%2.2x", hash[i]);
676 
677 	return (0);
678 }
679 
680 /*
681  * Used by 'bart create' with the '-I' option.  Return each entry into a 'buf'
682  * with the appropriate exit code: '0' for success and '-1' for failure.
683  */
684 static int
685 read_filelist(char *reloc_root, char **argv, char *buf, size_t bufsize)
686 {
687 	static int		argv_index = -1;
688 	static boolean_t	read_stdinput = B_FALSE;
689 	char			temp_buf[PATH_MAX];
690 	char 			*cp;
691 
692 	/*
693 	 * INITIALIZATION:
694 	 * Setup this code so it knows whether or not to read sdtin.
695 	 * Also, if reading from argv, setup the index, "argv_index"
696 	 */
697 	if (argv_index == -1) {
698 		argv_index = 0;
699 
700 		/* In this case, no args after '-I', so read stdin */
701 		if (argv[0] == NULL)
702 			read_stdinput = B_TRUE;
703 	}
704 
705 	buf[0] = '\0';
706 
707 	if (read_stdinput) {
708 		if (fgets(temp_buf, PATH_MAX, stdin) == NULL)
709 			return (-1);
710 		cp = strtok(temp_buf, "\n");
711 	} else {
712 		cp = argv[argv_index++];
713 	}
714 
715 	if (cp == NULL)
716 		return (-1);
717 
718 	/*
719 	 * Unlike similar code elsewhere, avoid adding a leading
720 	 * slash for relative pathnames.
721 	 */
722 	(void) snprintf(buf, bufsize,
723 	    (reloc_root[0] == '\0' || cp[0] == '/') ? "%s%s" : "%s/%s",
724 	    reloc_root, cp);
725 
726 	return (0);
727 }
728