xref: /illumos-gate/usr/src/cmd/bart/create.c (revision f48205be61a214698b763ff550ab9e657525104c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 #include <signal.h>
28 #include <unistd.h>
29 #include <sys/acl.h>
30 #include <sys/statvfs.h>
31 #include <sys/wait.h>
32 #include "bart.h"
33 #include <aclutils.h>
34 
35 static int	sanitize_reloc_root(char *root, size_t bufsize);
36 static int	create_manifest_filelist(char **argv, char *reloc_root);
37 static int	create_manifest_rule(char *reloc_root, FILE *rule_fp);
38 static void	output_manifest(void);
39 static int	eval_file(const char *fname, const struct stat64 *statb);
40 static char	*sanitized_fname(const char *, boolean_t);
41 static char	*get_acl_string(const char *fname, const struct stat64 *statb,
42     int *err_code);
43 static int	generate_hash(int fdin, char *hash_str);
44 static int	read_filelist(char *reloc_root, char **argv, char *buf,
45     size_t bufsize);
46 static int	walker(const char *name, const struct stat64 *sp,
47     int type, struct FTW *ftwx);
48 
49 /*
50  * The following globals are necessary due to the "walker" function
51  * provided by nftw().  Since there is no way to pass them through to the
52  * walker function, they must be global.
53  */
54 static int		compute_chksum = 1, eval_err = 0;
55 static struct rule	*subtree_root;
56 static char		reloc_root[PATH_MAX];
57 static struct statvfs	parent_vfs;
58 
59 int
60 bart_create(int argc, char **argv)
61 {
62 	boolean_t	filelist_input;
63 	int		ret, c, output_pipe[2];
64 	FILE 		*rules_fd = NULL;
65 	pid_t		pid;
66 
67 	filelist_input = B_FALSE;
68 	reloc_root[0] = '\0';
69 
70 	while ((c = getopt(argc, argv, "Inr:R:")) != EOF) {
71 		switch (c) {
72 		case 'I':
73 			if (rules_fd != NULL) {
74 				(void) fprintf(stderr, "%s", INPUT_ERR);
75 				usage();
76 			}
77 			filelist_input = B_TRUE;
78 			break;
79 
80 		case 'n':
81 			compute_chksum = 0;
82 			break;
83 
84 		case 'r':
85 			if (strcmp(optarg, "-") == 0)
86 				rules_fd = stdin;
87 			else
88 				rules_fd = fopen(optarg, "r");
89 			if (rules_fd == NULL) {
90 				perror(optarg);
91 				usage();
92 			}
93 			break;
94 
95 		case 'R':
96 			(void) strlcpy(reloc_root, optarg, sizeof (reloc_root));
97 			ret = sanitize_reloc_root(reloc_root,
98 			    sizeof (reloc_root));
99 			if (ret == 0)
100 				usage();
101 			break;
102 
103 		case '?':
104 		default :
105 			usage();
106 		}
107 	}
108 	argv += optind;
109 
110 	if (pipe(output_pipe) < 0) {
111 		perror("");
112 		exit(FATAL_EXIT);
113 	}
114 
115 	pid = fork();
116 	if (pid < 0) {
117 		perror(NULL);
118 		exit(FATAL_EXIT);
119 	}
120 
121 	/*
122 	 * Break the creation of a manifest into two parts: the parent process
123 	 * generated the data whereas the child process sorts the data.
124 	 *
125 	 * The processes communicate through the pipe.
126 	 */
127 	if (pid > 0) {
128 		/*
129 		 * Redirect the stdout of this process so it goes into
130 		 * output_pipe[0].  The output of this process will be read
131 		 * by the child, which will sort the output.
132 		 */
133 		if (dup2(output_pipe[0], STDOUT_FILENO) != STDOUT_FILENO) {
134 			perror(NULL);
135 			exit(FATAL_EXIT);
136 		}
137 		(void) close(output_pipe[0]);
138 		(void) close(output_pipe[1]);
139 
140 		if (filelist_input == B_TRUE) {
141 			ret = create_manifest_filelist(argv, reloc_root);
142 		} else {
143 			ret = create_manifest_rule(reloc_root, rules_fd);
144 		}
145 
146 		/* Close stdout so the sort in the child proc will complete */
147 		(void) fclose(stdout);
148 	} else {
149 		/*
150 		 * Redirect the stdin of this process so its read in from
151 		 * the pipe, which is the parent process in this case.
152 		 */
153 		if (dup2(output_pipe[1], STDIN_FILENO) != STDIN_FILENO) {
154 			perror(NULL);
155 			exit(FATAL_EXIT);
156 		}
157 		(void) close(output_pipe[0]);
158 
159 		output_manifest();
160 	}
161 
162 	/* Wait for the child proc (the sort) to complete */
163 	(void) wait(0);
164 
165 	return (ret);
166 }
167 
168 /*
169  * Handle the -R option and sets 'root' to be the absolute path of the
170  * relocatable root.  This is useful when the user specifies '-R ../../foo'.
171  *
172  * Return code is whether or not the location spec'd by the -R flag is a
173  * directory or not.
174  */
175 static int
176 sanitize_reloc_root(char *root, size_t bufsize)
177 {
178 	char		pwd[PATH_MAX];
179 
180 	/*
181 	 * First, save the current directory and go to the location
182 	 * specified with the -R option.
183 	 */
184 	(void) getcwd(pwd, sizeof (pwd));
185 	if (chdir(root) < 0) {
186 		/* Failed to change directory, something is wrong.... */
187 		perror(root);
188 		return (0);
189 	}
190 
191 	/*
192 	 * Save the absolute path of the relocatable root directory.
193 	 */
194 	(void) getcwd(root, bufsize);
195 
196 	/*
197 	 * Now, go back to where we started, necessary for picking up a rules
198 	 * file.
199 	 */
200 	if (chdir(pwd) < 0) {
201 		/* Failed to change directory, something is wrong.... */
202 		perror(root);
203 		return (0);
204 	}
205 
206 	/*
207 	 * Make sure the path returned does not have a trailing /. This
208 	 * can only happen when the entire pathname is "/".
209 	 */
210 	if (strcmp(root, "/") == 0)
211 		root[0] = '\0';
212 
213 	/*
214 	 * Since the earlier chdir() succeeded, return success.
215 	 */
216 	return (1);
217 }
218 
219 /*
220  * This is the worker bee which creates the manifest based upon the command
221  * line options supplied by the user.
222  *
223  * NOTE: create_manifest() eventually outputs data to a pipe, which is read in
224  * by the child process.  The child process is running output_manifest(), which
225  * is responsible for generating sorted output.
226  */
227 static int
228 create_manifest_rule(char *reloc_root, FILE *rule_fp)
229 {
230 	struct rule	*root;
231 	int		ret_status = EXIT;
232 	uint_t		flags;
233 
234 	if (compute_chksum)
235 		flags = ATTR_CONTENTS;
236 	else
237 		flags = 0;
238 	ret_status = read_rules(rule_fp, reloc_root, flags, 1);
239 
240 	/* Loop through every single subtree */
241 	for (root = get_first_subtree(); root != NULL;
242 	    root = get_next_subtree(root)) {
243 
244 		/*
245 		 * Check to see if this subtree should have contents
246 		 * checking turned on or off.
247 		 *
248 		 * NOTE: The 'compute_chksum' and 'parent_vfs'
249 		 * are a necessary hack: the variables are used in
250 		 * walker(), both directly and indirectly.  Since
251 		 * the parameters to walker() are defined by nftw(),
252 		 * the globals are really a backdoor mechanism.
253 		 */
254 		ret_status = statvfs(root->subtree, &parent_vfs);
255 		if (ret_status < 0) {
256 			perror(root->subtree);
257 			continue;
258 		}
259 
260 		/*
261 		 * Walk the subtree and invoke the callback function
262 		 * walker()
263 		 */
264 		subtree_root = root;
265 		(void) nftw64(root->subtree, &walker, 20, FTW_PHYS);
266 
267 		/*
268 		 * Ugly but necessary:
269 		 *
270 		 * walker() must return 0, or the tree walk will stop,
271 		 * so warning flags must be set through a global.
272 		 */
273 		if (eval_err == WARNING_EXIT)
274 			ret_status = WARNING_EXIT;
275 
276 	}
277 	return (ret_status);
278 }
279 
280 static int
281 create_manifest_filelist(char **argv, char *reloc_root)
282 {
283 	int	ret_status = EXIT;
284 	char	input_fname[PATH_MAX];
285 
286 	while (read_filelist(reloc_root, argv,
287 	    input_fname, sizeof (input_fname)) != -1) {
288 
289 		struct stat64	stat_buf;
290 		int		ret;
291 
292 		ret = lstat64(input_fname, &stat_buf);
293 		if (ret < 0) {
294 			ret_status = WARNING_EXIT;
295 			perror(input_fname);
296 		} else {
297 			ret = eval_file(input_fname, &stat_buf);
298 
299 			if (ret == WARNING_EXIT)
300 				ret_status = WARNING_EXIT;
301 		}
302 	}
303 
304 	return (ret_status);
305 }
306 
307 /*
308  * output_manifest() the child process.  It reads in the output from
309  * create_manifest() and sorts it.
310  */
311 static void
312 output_manifest(void)
313 {
314 	char	*env[] = {"LC_CTYPE=C", "LC_COLLATE=C", "LC_NUMERIC=C", NULL};
315 	time_t		time_val;
316 	struct tm	*tm;
317 	char		time_buf[1024];
318 
319 	(void) printf("%s", MANIFEST_VER);
320 	time_val = time((time_t)0);
321 	tm = localtime(&time_val);
322 	(void) strftime(time_buf, sizeof (time_buf), "%A, %B %d, %Y (%T)", tm);
323 	(void) printf("! %s\n", time_buf);
324 	(void) printf("%s", FORMAT_STR);
325 	(void) fflush(stdout);
326 	/*
327 	 * Simply run sort and read from the the current stdin, which is really
328 	 * the output of create_manifest().
329 	 * Also, make sure the output is unique, since a given file may be
330 	 * included by several stanzas.
331 	 */
332 	if (execle("/usr/bin/sort", "sort", "-u", NULL, env) < 0) {
333 		perror("");
334 		exit(FATAL_EXIT);
335 	}
336 
337 	/*NOTREACHED*/
338 }
339 
340 /*
341  * Callback function for nftw()
342  */
343 static int
344 walker(const char *name, const struct stat64 *sp, int type, struct FTW *ftwx)
345 {
346 	int		ret;
347 	struct statvfs	path_vfs;
348 	boolean_t	dir_flag = B_FALSE;
349 	struct rule	*rule;
350 
351 	switch (type) {
352 	case FTW_F:	/* file 		*/
353 		rule = check_rules(name, 'F');
354 		if (rule != NULL) {
355 			if (rule->attr_list & ATTR_CONTENTS)
356 				compute_chksum = 1;
357 			else
358 				compute_chksum = 0;
359 		}
360 		break;
361 	case FTW_SL:	/* symbolic link	*/
362 	case FTW_DP:	/* end of directory	*/
363 	case FTW_DNR:	/* unreadable directory	*/
364 	case FTW_NS:	/* unstatable file	*/
365 		break;
366 	case FTW_D:	/* enter directory 		*/
367 		dir_flag = B_TRUE;
368 		ret = statvfs(name, &path_vfs);
369 		if (ret < 0)
370 			eval_err = WARNING_EXIT;
371 		break;
372 	default:
373 		(void) fprintf(stderr, INVALID_FILE, name);
374 		eval_err = WARNING_EXIT;
375 		break;
376 	}
377 
378 	/* This is the function which really processes the file */
379 	ret = eval_file(name, sp);
380 
381 	/*
382 	 * Since the parameters to walker() are constrained by nftw(),
383 	 * need to use a global to reflect a WARNING.  Sigh.
384 	 */
385 	if (ret == WARNING_EXIT)
386 		eval_err = WARNING_EXIT;
387 
388 	/*
389 	 * This is a case of a directory which crosses into a mounted
390 	 * filesystem of a different type, e.g., UFS -> NFS.
391 	 * BART should not walk the new filesystem (by specification), so
392 	 * set this consolidation-private flag so the rest of the subtree
393 	 * under this directory is not waled.
394 	 */
395 	if (dir_flag &&
396 	    (strcmp(parent_vfs.f_basetype, path_vfs.f_basetype) != 0))
397 		ftwx->quit = FTW_PRUNE;
398 
399 	return (0);
400 }
401 
402 /*
403  * This file does the per-file evaluation and is run to generate every entry
404  * in the manifest.
405  *
406  * All output is written to a pipe which is read by the child process,
407  * which is running output_manifest().
408  */
409 static int
410 eval_file(const char *fname, const struct stat64 *statb)
411 {
412 	int	fd, ret, err_code, i;
413 	char	last_field[PATH_MAX], ftype, *acl_str,
414 		*quoted_name;
415 
416 	err_code = EXIT;
417 
418 	switch (statb->st_mode & S_IFMT) {
419 	/* Regular file */
420 	case S_IFREG: ftype = 'F'; break;
421 
422 	/* Directory */
423 	case S_IFDIR: ftype = 'D'; break;
424 
425 	/* Block Device */
426 	case S_IFBLK: ftype = 'B'; break;
427 
428 	/* Character Device */
429 	case S_IFCHR: ftype = 'C'; break;
430 
431 	/* Named Pipe */
432 	case S_IFIFO: ftype = 'P'; break;
433 
434 	/* Socket */
435 	case S_IFSOCK: ftype = 'S'; break;
436 
437 	/* Door */
438 	case S_IFDOOR: ftype = 'O'; break;
439 
440 	/* Symbolic link */
441 	case S_IFLNK: ftype = 'L'; break;
442 
443 	default: ftype = '-'; break;
444 	}
445 
446 	/* First, make sure this file should be cataloged */
447 
448 	if ((subtree_root != NULL) &&
449 	    (exclude_fname(fname, ftype, subtree_root)))
450 		return (err_code);
451 
452 	for (i = 0; i < PATH_MAX; i++)
453 		last_field[i] = '\0';
454 
455 	/*
456 	 * Regular files, compute the MD5 checksum and put it into 'last_field'
457 	 * UNLESS instructed to ignore the checksums.
458 	 */
459 	if (ftype == 'F') {
460 		if (compute_chksum) {
461 			fd = open(fname, O_RDONLY|O_LARGEFILE);
462 			if (fd < 0) {
463 				err_code = WARNING_EXIT;
464 				perror(fname);
465 
466 				/* default value since the computution failed */
467 				(void) strcpy(last_field, "-");
468 			} else {
469 				if (generate_hash(fd, last_field) != 0) {
470 					err_code = WARNING_EXIT;
471 					(void) fprintf(stderr, CONTENTS_WARN,
472 					    fname);
473 					(void) strcpy(last_field, "-");
474 				}
475 			}
476 			(void) close(fd);
477 		}
478 		/* Instructed to ignore checksums, just put in a '-' */
479 		else
480 			(void) strcpy(last_field, "-");
481 	}
482 
483 	/*
484 	 * For symbolic links, put the destination of the symbolic link into
485 	 * 'last_field'
486 	 */
487 	if (ftype == 'L') {
488 		ret = readlink(fname, last_field, sizeof (last_field));
489 		if (ret < 0) {
490 			err_code = WARNING_EXIT;
491 			perror(fname);
492 
493 			/* default value since the computation failed */
494 			(void) strcpy(last_field, "-");
495 		}
496 		else
497 			(void) strlcpy(last_field,
498 			    sanitized_fname(last_field, B_FALSE),
499 			    sizeof (last_field));
500 
501 		/*
502 		 * Boundary condition: possible for a symlink to point to
503 		 * nothing [ ln -s '' link_name ].  For this case, set the
504 		 * destination to "\000".
505 		 */
506 		if (strlen(last_field) == 0)
507 			(void) strcpy(last_field, "\\000");
508 	}
509 
510 	acl_str = get_acl_string(fname, statb, &err_code);
511 
512 	/* Sanitize 'fname', so its in the proper format for the manifest */
513 	quoted_name = sanitized_fname(fname, B_TRUE);
514 
515 	/* Start to build the entry.... */
516 	(void) printf("%s %c %d %o %s %x %d %d", quoted_name, ftype,
517 	    (int)statb->st_size, (int)statb->st_mode, acl_str,
518 	    (int)statb->st_mtime, (int)statb->st_uid, (int)statb->st_gid);
519 
520 	/* Finish it off based upon whether or not it's a device node */
521 	if ((ftype == 'B') || (ftype == 'C'))
522 		(void) printf(" %x\n", (int)statb->st_rdev);
523 	else if (strlen(last_field) > 0)
524 		(void) printf(" %s\n", last_field);
525 	else
526 		(void) printf("\n");
527 
528 	/* free the memory consumed */
529 	free(acl_str);
530 	free(quoted_name);
531 
532 	return (err_code);
533 }
534 
535 /*
536  * When creating a manifest, make sure all '?', tabs, space, newline, '/'
537  * and '[' are all properly quoted.  Convert them to a "\ooo" where the 'ooo'
538  * represents their octal value. For filesystem objects, as opposed to symlink
539  * targets, also canonicalize the pathname.
540  */
541 static char *
542 sanitized_fname(const char *fname, boolean_t canon_path)
543 {
544 	const char *ip;
545 	unsigned char ch;
546 	char *op, *quoted_name;
547 
548 	/* Initialize everything */
549 	quoted_name = safe_calloc((4 * PATH_MAX) + 1);
550 	ip = fname;
551 	op = quoted_name;
552 
553 	if (canon_path) {
554 		/*
555 		 * In the case when a relocatable root was used, the relocatable
556 		 * root should *not* be part of the manifest.
557 		 */
558 		ip += strlen(reloc_root);
559 
560 		/*
561 		 * In the case when the '-I' option was used, make sure
562 		 * the quoted_name starts with a '/'.
563 		 */
564 		if (*ip != '/')
565 			*op++ = '/';
566 	}
567 
568 	/* Now walk through 'fname' and build the quoted string */
569 	while ((ch = *ip++) != 0) {
570 		switch (ch) {
571 		/* Quote the following characters */
572 		case ' ':
573 		case '*':
574 		case '\n':
575 		case '?':
576 		case '[':
577 		case '\\':
578 		case '\t':
579 			op += sprintf(op, "\\%.3o", (unsigned char)ch);
580 			break;
581 
582 		/* Otherwise, simply append them */
583 		default:
584 			*op++ = ch;
585 			break;
586 		}
587 	}
588 
589 	*op = 0;
590 
591 	return (quoted_name);
592 }
593 
594 /*
595  * Function responsible for generating the ACL information for a given
596  * file.  Note, the string is put into buffer malloc'd by this function.
597  * Its the responsibility of the caller to free the buffer.
598  */
599 static char *
600 get_acl_string(const char *fname, const struct stat64 *statb, int *err_code)
601 {
602 	acl_t		*aclp;
603 	char		*acltext;
604 	int		error;
605 
606 	if (S_ISLNK(statb->st_mode)) {
607 		return (safe_strdup("-"));
608 	}
609 
610 	/*
611 	 *  Include trivial acl's
612 	 */
613 	error = acl_get(fname, 0, &aclp);
614 
615 	if (error != 0) {
616 		*err_code = WARNING_EXIT;
617 		(void) fprintf(stderr, "%s: %s\n", fname, acl_strerror(error));
618 		return (safe_strdup("-"));
619 	} else {
620 		acltext = acl_totext(aclp, 0);
621 		acl_free(aclp);
622 		return (acltext);
623 	}
624 }
625 
626 
627 /*
628  *
629  * description:	This routine reads stdin in BUF_SIZE chunks, uses the bits
630  *		to update the md5 hash buffer, and outputs the chunks
631  *		to stdout.  When stdin is exhausted, the hash is computed,
632  *		converted to a hexadecimal string, and returned.
633  *
634  * returns:	The md5 hash of stdin, or NULL if unsuccessful for any reason.
635  */
636 static int
637 generate_hash(int fdin, char *hash_str)
638 {
639 	unsigned char buf[BUF_SIZE];
640 	unsigned char hash[MD5_DIGEST_LENGTH];
641 	int i, amtread;
642 	MD5_CTX ctx;
643 
644 	MD5Init(&ctx);
645 
646 	for (;;) {
647 		amtread = read(fdin, buf, sizeof (buf));
648 		if (amtread == 0)
649 			break;
650 		if (amtread <  0)
651 			return (1);
652 
653 		/* got some data.  Now update hash */
654 		MD5Update(&ctx, buf, amtread);
655 	}
656 
657 	/* done passing through data, calculate hash */
658 	MD5Final(hash, &ctx);
659 
660 	for (i = 0; i < MD5_DIGEST_LENGTH; i++)
661 		(void) sprintf(hash_str + (i*2), "%2.2x", hash[i]);
662 
663 	return (0);
664 }
665 
666 /*
667  * Used by 'bart create' with the '-I' option.  Return each entry into a 'buf'
668  * with the appropriate exit code: '0' for success and '-1' for failure.
669  */
670 static int
671 read_filelist(char *reloc_root, char **argv, char *buf, size_t bufsize)
672 {
673 	static int		argv_index = -1;
674 	static boolean_t	read_stdinput = B_FALSE;
675 	char			temp_buf[PATH_MAX];
676 	char 			*cp;
677 
678 	/*
679 	 * INITIALIZATION:
680 	 * Setup this code so it knows whether or not to read sdtin.
681 	 * Also, if reading from argv, setup the index, "argv_index"
682 	 */
683 	if (argv_index == -1) {
684 		argv_index = 0;
685 
686 		/* In this case, no args after '-I', so read stdin */
687 		if (argv[0] == NULL)
688 			read_stdinput = B_TRUE;
689 	}
690 
691 	buf[0] = '\0';
692 
693 	if (read_stdinput) {
694 		if (fgets(temp_buf, PATH_MAX, stdin) == NULL)
695 			return (-1);
696 		cp = strtok(temp_buf, "\n");
697 	} else {
698 		cp = argv[argv_index++];
699 	}
700 
701 	if (cp == NULL)
702 		return (-1);
703 
704 	/*
705 	 * Unlike similar code elsewhere, avoid adding a leading
706 	 * slash for relative pathnames.
707 	 */
708 	(void) snprintf(buf, bufsize,
709 	    (reloc_root[0] == '\0' || cp[0] == '/') ? "%s%s" : "%s/%s",
710 	    reloc_root, cp);
711 
712 	return (0);
713 }
714